o
    \iB	                     @   s\   d Z ddlZddlmZ ddlZddlmZ dZdZdZ	defdd	Z
G d
d dejZdS )zQLoad DINOv3 from keygrip and extract 16x16 patch features from 256x256 RGB image.    N)Path   )g
ףp=
?gv/?gCl?)gZd;O?gy&1?g?keygrip_rootc                 C   s\   t |  } t }t|  tjjddddd}t| | D ]}d|_	q"|
  |S )z[Load DINOv3 model from keygrip repo. keygrip_root must contain dinov3/ and dinov3/weights/.Zdinov3Zdinov3_vits16pluslocalz?dinov3/weights/dinov3_vits16plus_pretrain_lvd1689m-4057cbaa.pth)sourceweightsF)r   resolveosgetcwdchdirtorchhubload
parametersrequires_gradeval)r   cwddinop r   5/data/cameron/vidgen/our_vid_model/dino_featurizer.pyload_dino_from_keygrip   s   

r   c                       s6   e Zd ZdZdef fddZdd Zdd Z  ZS )	DINOFeaturizerzNExtract patch features (B, D, H_p, W_p) from RGB (B, 3, 256, 256). H_p=W_p=16.r   c              	      s^   t    t|| _| jj| _| dtt	dddd | dtt
	dddd d S )Nmean      std)super__init__r   r   	embed_dimregister_bufferr   tensorIMAGENET_MEANviewIMAGENET_STD)selfr   	__class__r   r   r   #   s
   


"zDINOFeaturizer.__init__c                 C   s   || j  | j S )N)r   r   )r%   xr   r   r   	normalize*   s   zDINOFeaturizer.normalizec                 C   s  |j d dks|j d dkrtjjj|dddd}| dk r%|d	 d
 }| |}|j d }| j|\}\}}| jj	D ]}| jj
rK| jj
||dnd}|||}q=| jjrj| j|dd| jjd df }n| j|dd| jjd df }|}	|	|||| jdddd}
|
S )z=x: (B, 3, H, W) in [0,1] or [-1,1]. Returns (B, D, H_p, W_p).   )r+   r+   bilinearF)sizemodealign_cornersr   g      ?g       @)HWNr   r      )shaper   nn
functionalinterpolateminr)   r   Zprepare_tokens_with_masksblocksZ
rope_embedZuntie_cls_and_patch_normsnormZn_storage_tokensreshaper   permute)r%   r(   BZx_tokensZH_pZW_pZblkZrope_sincosZx_norm_patchesZpatch_tokensZpatch_featuresr   r   r   forward-   s    

&$zDINOFeaturizer.forward)	__name__
__module____qualname____doc__r   r   r)   r>   __classcell__r   r   r&   r   r       s
    r   )rB   r	   pathlibr   r   torch.nnr5   ZDINO_PATCH_SIZEr"   r$   r   Moduler   r   r   r   r   <module>   s    