o
    {i                     @   s   d Z ddlZddlmZmZmZ ddlZddlm  m	Z
 ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ edZG dd dejZG dd dejZdd ZdS )z
Backbone modules.
    N)ListOptionalUnion)nn   )NestedTensor   )build_position_encodingLayerNorm2D)WindowsWrapperdinov3c                       sZ   e Zd Z			ddejdedeee  de	e
ef def
 fd	d
ZdefddZ  ZS )DINOBackboneNr   Tbackbone_modeltrain_backboneblocks_to_trainlayers_to_useuse_layernormc           
         s  t    || _|| _| jj| _|| _t| j D ]$\}\}| jr/tfdd| jD nd}|r9dv s9|s>|	d q| jjg| _
| jj}	t|trUt|	| |	n| t| jd| jjg| jj  fddt|	D | jrtd	d D | _tg| _|| _d S )
Nc                 3   s     | ]}d | d  v V  qdS ).N ).0b)namer   Q/data/cameron/keygrip/volume_dino_tracks/dinov3/eval/detection/models/backbone.py	<genexpr>5   s    z(DINOBackbone.__init__.<locals>.<genexpr>T
mask_tokenF
embed_dimsc                    s   g | ]
}| v r| qS r   r   )r   i)blocks_to_taker   r   r   
<listcomp>C       z)DINOBackbone.__init__.<locals>.<listcomp>c                 S   s   g | ]}t |qS r   r
   )r   	embed_dimr   r   r   r   F   s    )super__init__backboner   
patch_sizer   	enumeratenamed_parametersanyrequires_grad_stridesn_blocks
isinstanceintrangegetattrr!   r   
ModuleListlayer_normssumnum_channelsr   )
selfr   r   r   r   r   _	parameterZtrain_conditionZn_all_layers	__class__)r   r   r   r   r#   &   s(   

"

zDINOBackbone.__init__tensor_listc                 C   s   | j j|j| jdd}| jrdd t| j|D }tj|ddg}g }|D ])}|j	}|d us0J t
j|d   |jdd  dtjd	 }|t|| q%|S )
NT)nreshapec                 S   s   g | ]
\}}||  qS r   )
contiguous)r   lnxr   r   r   r   N   r    z(DINOBackbone.forward.<locals>.<listcomp>r   )axis)sizer   )r$   Zget_intermediate_layerstensorsr   r   zipr1   torchcatmaskFinterpolatefloatshapetoboolappendr   )r4   r9   xsoutr>   mrF   r   r   r   forwardK   s   ,zDINOBackbone.forward)Nr   T)__name__
__module____qualname__r   ModulerL   r   r   strr   r-   r#   r   rQ   __classcell__r   r   r7   r   r   %   s     

%r   c                       s*   e Zd Z fddZdefddZ  ZS )BackboneWithPositionEncodingc                    s"   t  || |j| _|j| _d S )N)r"   r#   r*   r3   )r4   r$   position_embeddingr7   r   r   r#   \   s   z%BackboneWithPositionEncoding.__init__r9   c                    s.   t  d |} fddt|D }||fS )Nr   c                    s*   g | ]\}} d  | | |jjqS )r   )rK   rB   dtype)r   idxr>   r4   r   r   r   c   s   * z8BackboneWithPositionEncoding.forward.<locals>.<listcomp>)listr&   )r4   r9   rO   posr   r\   r   rQ   a   s   z$BackboneWithPositionEncoding.forward)rR   rS   rT   r#   r   rQ   rW   r   r   r7   r   rX   [   s    rX   c                 C   st   t |}d}t| ||j|j|j}|jdkr0td|j d|j d t||j|j|j	d}ntd t
||S )NFr   zWrapping with z x z windows)Zn_windows_wZn_windows_hr%   zNot wrapping with windows)r	   r   r   r   backbone_use_layernormn_windows_sqrtloggerinfor   r%   rX   )r   argsrY   r   r$   r   r   r   build_backboneg   s   


rd   )__doc__loggingtypingr   r   r   rD   torch.nn.functionalr   
functionalrG   	util.miscr   position_encodingr	   utilsr   windowsr   	getLoggerra   rU   r   
SequentialrX   rd   r   r   r   r   <module>   s   
6