o
    ei
                     @   sn   d Z ddlZddlmZ ddlZddlmZ dZdZdZ	dZ
dd	ed
efddZdd ZG dd dejZdS )zmLoad DINOv3 from keygrip; optional freeze for teacher. Uses backbones only to avoid PyTorch 2.4-only imports.    N)Path   )g
ףp=
?gv/?gCl?)gZd;O?gy&1?g?z0dinov3_vits16plus_pretrain_lvd1689m-4057cbaa.pthTkeygrip_rootfreezec                 C   s   t |  } | d }|d t }| std| t|tjvr+tjdt| ddl	m
} |dt|dd}|rJ| D ]}d|_q@|  |S )	zkLoad DINOv3 vits16plus from keygrip via dinov3.hub.backbones (avoids hubconf/segmentors for PyTorch < 2.4).Zdinov3weightszDINO weights not found: r   )dinov3_vits16plusTF)
pretrainedr   
check_hash)r   resolveWEIGHTS_FILENAMEexistsFileNotFoundErrorstrsyspathinsertZdinov3.hub.backbonesr   
parametersrequires_gradeval)r   r   Zdinov3_repoZweights_pathr   dinop r   2/data/cameron/vidgen/dino_vid_model/dino_loader.py	load_dino   s   r   c                 C   s   |j d }| |\}\}}| jD ]}| jr| j||dnd}	|||	}q| jr9| |dd| jd df }
n| |dd| jd df }
|
|||ddddd}|S )zCx: (B, 3, H, W) normalized with mean/std. Returns (B, D, H_p, W_p).r   )HWN         )	shapeZprepare_tokens_with_masksblocksZ
rope_embedZuntie_cls_and_patch_normsnormZn_storage_tokensreshapepermute)r   xmeanstdBZx_tokensZH_pZW_pZblkZrope_sincosZx_norm_patchesZpatch_featuresr   r   r   extract_patch_features$   s   

" r)   c                       s0   e Zd ZdZddef fddZdd Z  ZS )	DinoNormalizezhResize to dino_size (256 or 512), map to [0,1] if needed, then ImageNet normalize. 512 -> 32x32 patches.   	dino_sizec              	      sP   t    || _| dttdddd | dttdddd d S )Nr&   r   r   r'   )	super__init__r,   register_buffertorchtensorIMAGENET_MEANviewIMAGENET_STD)selfr,   	__class__r   r   r.   6   s   
"zDinoNormalize.__init__c                 C   sf   |j d | jks|j d | jkrtjjj|| j| jfddd}| dk r+|d d }|| j | j S )	Nr   bilinearF)sizemodealign_cornersr   g      ?g       @)	r    r,   r0   nn
functionalinterpolateminr&   r'   )r5   r%   r   r   r   __call__<   s
    zDinoNormalize.__call__)r+   )__name__
__module____qualname____doc__intr.   rA   __classcell__r   r   r6   r   r*   3   s    r*   )T)rE   r   pathlibr   r0   torch.nnr=   ZDINO_PATCH_SIZEr2   r4   r   boolr   r)   Moduler*   r   r   r   r   <module>   s    