o
    iv/                     @   s   d dl T d dlmZ d dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
 d dlm
  mZ ddlmZ ddlmZmZmZ dd	lmZ G d
d de
jZG dd de
jZG dd de
jZG dd de
jZG dd de
jZdS )    )*)NumberN)Tensor   )DinoVisionTransformer)wrap_dinov2_attention_with_sdpa'wrap_module_with_gradient_checkpointing)unwrap_module_with_gradient_checkpointing   )normalized_view_plane_uvc                       sb   e Zd Z							ddeded	ed
ededed ded ded f fddZdd Z  ZS )ResidualConvBlockN   	replicaterelu
layer_norm
group_normin_channelsout_channelshidden_channelskernel_sizepadding_mode
activationr   
leaky_relusilueluin_norm)r   r   instance_normnonehidden_norm)r   r   r   c	           
         sr  t t|   |d u r|}|d u r|}|dkrtj}	n$|dkr(tjtjdd}	n|dkr0tj}	n|dkr8tj	}	nt
d| t|dkrMt|d	 |n|d
krWtd|n|dkr`t|nt |	 tj||||d |d|dkr|t|d	 |n|d
krtd|n|dkrt|nt |	 tj||||d |d| _||krtj||ddd| _d S t | _d S )Nr   r   g?)negative_sloper   r   z!Unsupported activation function: r       r   r   r   r
   )r   paddingr   r   )r   r"   )superr   __init__nnReLU	functoolspartial	LeakyReLUSiLUELU
ValueError
Sequential	GroupNormInstanceNorm2dIdentityConv2dlayersskip_connection)
selfr   r   r   r   r   r   r   r   Zactivation_cls	__class__ -/data/cameron/moge_repo/moge/model/modules.pyr$      s:   .zResidualConvBlock.__init__c                 C   s    |  |}| |}|| }|S N)r3   r2   )r4   xskipr7   r7   r8   forward@   s   

zResidualConvBlock.forward)NNr   r   r   r   r   )	__name__
__module____qualname__intstrLiteralr$   r<   __classcell__r7   r7   r5   r8   r      s4    	-r   c                       s   e Zd ZU dZeed< ejed< ejed< eed< de	de
eee f def fdd	Zed
d ZejdefddZdd Zdd Zdd Zddejde
eejf de
eejf dedeejejf f
ddZ  ZS )DINOv2Encoderz]Wrapped DINOv2 encoder supporting gradient checkpointing. Input is RGB image in range [0, 1].backbone
image_mean	image_stddim_featuresintermediate_layersdim_outc              	      s   t t  |_ttdt|_|_	jdd_
j
jd jjj_t|tr/|nt|_t fddtjD _dtg dd	d
d	d	 dtg dd	d
d	d	 d S )Nz.dinov2.hub.backbonesFZ
pretrainedr   c              	      s"   g | ]}t jj d d ddqS )r   r   )r   r   r   strider"   )r%   r1   rH   .0_rJ   r4   r7   r8   
<listcomp>[   s    z*DINOv2Encoder.__init__.<locals>.<listcomp>rF   )g
ףp=
?gv/?gCl?r   r   rG   )gZd;O?gy&1?g?)r#   rD   r$   rI   getattr	importlibimport_module__package__
hub_loaderZbackbone_namerE   blocksattnqkvin_featuresrH   
isinstancer@   lennum_featuresr%   
ModuleListrangeoutput_projectionsregister_buffertorchtensorview)r4   rE   rI   rJ   deprecated_kwargsr5   rP   r8   r$   N   s   
"&zDINOv2Encoder.__init__c                 C   s   t | ddS )N_onnx_compatible_modeF)rR   )r4   r7   r7   r8   onnx_compatible_modec   s   z"DINOv2Encoder.onnx_compatible_modevaluec                 C   s   || _ || j_d S r9   )rf   rE   rg   )r4   rh   r7   r7   r8   rg   g   s   c                 C   s    | j dd }| j| d S )NTrK   )rV   
state_dictrE   load_state_dict)r4   Zpretrained_backbone_state_dictr7   r7   r8   init_weightsl   s   zDINOv2Encoder.init_weightsc                 C   s*   t t| jjD ]
}t| jj|  qd S r9   )r_   r\   rE   rW   r   r4   ir7   r7   r8   enable_gradient_checkpointingp   s   z+DINOv2Encoder.enable_gradient_checkpointingc                 C   s,   t t| jjD ]}t| jj| j qd S r9   )r_   r\   rE   rW   r   rX   rl   r7   r7   r8   enable_pytorch_native_sdpat   s   z(DINOv2Encoder.enable_pytorch_native_sdpaFimage
token_rows
token_colsreturn_class_tokenreturnc                    s   t j|d  d fdd| j d}|| j | j }| jj|| jdd}tj	 fddt
| j|D d	d
jd	d
}|rE||d d	 fS |S )N   bilinearF)modealign_corners	antialiasT)nrs   c                    s6   g | ]\}\}}|| d ddd f qS )r   r
   r   )permute	unflatten
contiguous)rN   projZfeatZclstokenrr   rq   r7   r8   rQ      s    
 z)DINOv2Encoder.forward.<locals>.<listcomp>r   )dim)Finterpolaterg   rF   rG   rE   Zget_intermediate_layersrI   rb   stackzipr`   sum)r4   rp   rq   rr   rs   Zimage_14featuresr:   r7   r   r8   r<   x   s   $
zDINOv2Encoder.forward)F)r=   r>   r?   __doc__r   __annotations__rb   r   r@   rA   UnionListr$   propertyrg   setterboolrk   rn   ro   
LongTensorTupler<   rC   r7   r7   r5   r8   rD   G   s   
 

&
JrD   c                	   @   s.   e Zd Z	d
dededed defddZd	S )	Resamplerr
   r   r   type_pixel_shufflenearestrv   conv_transposepixel_unshuffleavg_poolmax_poolscale_factorc                 C   s:  |dkrbt j| t j|||d  dddddt |t j||ddddd td|d D ]2}| d jjdd |d  | d jj|d |d < | d jjdd |d  | d jj|d |d < q-d S |dv rt j| t j	|||d	krtd
nd dt j||ddddd d S |dkrt j| t j
||||dt j||ddddd | d jjd d d d d dd df | d jjd d < d S |dkrt j| t |t j||d  |ddddd d S |dkrt j| t j||dddddt j||d d S |dkrt j| t j||dddddt j||d d S td| )Nr   r
   r   r   r   )r   rL   r"   r   r   )r   rv   rv   F)r   rw   rx   r   )r   rL   r   r   r   zUnsupported resampler type: )r%   r-   r$   r1   PixelShuffler_   weightdatabiasUpsampleConvTranspose2dPixelUnshuffle	AvgPool2d	MaxPool2dr,   )r4   r   r   r   r   rm   r7   r7   r8   r$      sJ   02<
zResampler.__init__N)r
   )r=   r>   r?   r@   rB   r$   r7   r7   r7   r8   r      s    r   c                   @   s   e Zd Zdee fddZdS )MLPdimsc              	   C   sR   t jj| gtjdd t|d d |dd D  t |d |d R   d S )Nc                 S   s(   g | ]\}}t ||t jd dfqS )T)inplace)r%   Linearr&   )rN   dim_inrJ   r7   r7   r8   rQ      s    z MLP.__init__.<locals>.<listcomp>r   r   )r%   r-   r$   	itertoolschainr   r   )r4   r   r7   r7   r8   r$      s   

zMLP.__init__N)r=   r>   r?   Sequencer@   r$   r7   r7   r7   r8   r      s    r   c                       s   e Zd Z					ddeee  dee deee  deed	 ef d
ededed ded ded f fddZdd Z	dee
j fddZ  ZS )	ConvStackr   r   r   r   r   dim_res_blocksrJ   
resamplersr   dim_times_res_block_hiddennum_res_blocksres_block_in_norm)r   r   r   r   res_block_hidden_normr   r   c
           
   
      s   t    tdd tt|tr|nt||D | _	tdd t
t|d d |dd  t|tr5|nt|D | _t fddt
|D | _tdd tt|tr`|nt||D | _d S )Nc              	   S   s4   g | ]\}}|d urt j||ddddnt  qS Nr   r   )r   rL   r"   r%   r1   r0   )rN   Zdim_in_dim_res_block_r7   r7   r8   rQ          "z&ConvStack.__init__.<locals>.<listcomp>c                 S   s&   g | ]\}\}}}t ||d |dqS )r
   )r   r   )r   )rN   rm   Zdim_prevZdim_succZ	resamplerr7   r7   r8   rQ      s    r   r   c                    sF   g | ]\} t j fd dtttr| nD  qS )c              	   3   s(    | ]}t   d V  qdS ))r   r   r   N)r   rM   )r   r   r   r   r   r7   r8   	<genexpr>   s    

z0ConvStack.__init__.<locals>.<listcomp>.<genexpr>)r%   r-   r_   r[   list)rN   rm   r   r   r   r   r   )r   r8   rQ      s    c              	   S   s4   g | ]\}}|d urt j||ddddnt  qS r   r   )rN   Zdim_out_r   r7   r7   r8   rQ      r   )r#   r$   r%   r^   r   r[   r   r   repeatinput_blocks	enumerater   
res_blocksoutput_blocks)
r4   r   r   rJ   r   r   r   r   r   r   r5   r   r8   r$      s"   









zConvStack.__init__c                 C   st   t t| jD ]}t| j| | j|< qt t| jD ]}t t| j| D ]}t| j| | | j| |< q&qd S r9   )r_   r\   r   r   r   )r4   rm   jr7   r7   r8   rn      s   z'ConvStack.enable_gradient_checkpointingrZ   c                 C   s   g }t t| jD ];}| j| || }|dkr|}n|d ur#|| }| j| |}|| j| | |t| jd k rD| j| |}q	|S )Nr   r   )r_   r\   r   r   appendr   r   )r4   rZ   out_featuresrm   featurer:   r7   r7   r8   r<      s   zConvStack.forward)r   r   r   r   r   )r=   r>   r?   r   Optionalr@   r   rB   r$   rn   rb   r   r<   rC   r7   r7   r5   r8   r      s6    

	'r   )typingnumbersr   rS   r   r'   sysrb   r   torch.nnr%   torch.nn.functional
functionalr   Z dinov2.models.vision_transformerr   utilsr   r   r	   utils.geometry_torchr   Moduler   rD   r-   r   r   r   r7   r7   r7   r8   <module>   s$    5D-