o
    {i                     @   s   d dl Z d dlmZ d dlmZmZ d dlmZmZ d dl	m
Z
mZ e dZG dd deZejfd	ejd
eee ef fddZG dd deZG dd dejZdS )    N)Enum)SequenceUnion)CenterPaddingStretchToMultiple)Tensornndinov3c                   @      e Zd ZdZdZdZdS )BackboneLayersSetLAST	FOUR_LASTFOUR_EVEN_INTERVALSN)__name__
__module____qualname__r   r   r    r   r   R/data/cameron/keygrip/volume_dino_tracks/dinov3/eval/dense/depth/models/encoder.pyr      s    r   modelbackbone_out_layersc                    s   t | dd t|tr|}|tjkr d g}n,|tjkr*dd t d  D }n|tjkrD dkr8g d}n fddtdd	D }t fd
d|D sQJ |S )a  
    Get indices for output layers of the ViT backbone. For now there are 3 options available:
    BackboneLayersSet.LAST : only extract the last layer, used in segmentation tasks with a bn head.
    BackboneLayersSet.FOUR_LAST : extract the last 4 layers, used in segmentation (multiscale setting)
    BackboneLayersSet.FOUR_EVEN_INTERVALS : extract outputs every 1/4 of the total number of blocks
    Reference outputs in 'FOUR_EVEN_INTERVALS' mode :
    ViT/S (12 blocks): [2, 5, 8, 11]
    ViT/B (12 blocks): [2, 5, 8, 11]
    ViT/L (24 blocks): [5, 11, 17, 23] (correct), [4, 11, 17, 23] (incorrect)
    ViT/g (40 blocks): [9, 19, 29, 39]
    n_blocks   c                 S   s   g | ]}|qS r   r   .0ir   r   r   
<listcomp>-   s    z-_get_backbone_out_indices.<locals>.<listcomp>      )r            c                    s   g | ]
}| d   d qS )r   r   r   r   r   r   r   r   3   s       c                    s   g | ]}| k qS r   r   )r   	out_indexr!   r   r   r   4       )	getattr
isinstancelistr   r   r   ranger   all)r   r   Zout_indicesr   r!   r   _get_backbone_out_indices   s   




r*   c                   @   r
   )PatchSizeAdaptationStrategyZcenter_paddingstretchneverN)r   r   r   CENTER_PADDINGSTRETCHNO_ADAPTATIONr   r   r   r   r+   8   s    r+   c                	       sf   e Zd ZdZdejfdejdee	e
e f dedef fddZd	ed
e
eeef  fddZ  ZS )DinoVisionTransformerWrapperzVision Transformer.Fbackbone_modelr   use_backbone_normadapt_to_patch_sizec           	         s>  t    || _|| _t| jt|tr|nt|d| _z| jj	 W n" t
yD   | jj}| jj}td|g| d |g|  Y nw  fdd| jD | _	z| jj}W n t
yn   | jj}td|d |}Y nw |tju rzt|| _n|tju rt|| _n|tju rt | _ntd|| jd d S )	N)r   zBBackbone does not define embed_dims, using [embed_dim] * n_blocks=z insteadc                    s   g | ]} | qS r   r   )r   idx
embed_dimsr   r   r   [   r$   z9DinoVisionTransformerWrapper.__init__.<locals>.<listcomp>z:Backbone does not define input_pad_size, using patch_size=z"Unknown value adapt_to_patch_size=F)super__init__
final_normbackboner*   r&   r'   r   backbone_out_indicesr7   AttributeError	embed_dimr   loggerwarninginput_pad_size
patch_sizer+   r.   r   patch_size_adapterr/   r   r0   r   Identity
ValueErrorrequires_grad_)	selfr2   r   r3   r4   r>   r   rA   rB   	__class__r6   r   r9   A   s>   



z%DinoVisionTransformerWrapper.__init__xreturnc                 C   s(   |  |}| jj|| jdd| jd}|S )NT)nreshapereturn_class_tokennorm)rC   r;   get_intermediate_layersr<   r:   )rG   rJ   outputsr   r   r   forwardp   s   
z$DinoVisionTransformerWrapper.forward)r   r   r   __doc__r+   r.   r   Moduler   strr'   intboolr9   r   tuplerR   __classcell__r   r   rH   r   r1   >   s$    /r1   )loggingenumr   typingr   r   Z$dinov3.eval.dense.depth.models.embedr   r   torchr   r   	getLoggerr?   r   r   rT   r'   rV   r*   r+   r1   r   r   r   r   <module>   s   
	
!