o
    >i                     @   s2  d Z ddlZddlmZ ddlm  mZ dZdZdZ	dZ
dZdZd	Zd
Zdd ZG dd dejZedkrddlmZmZmZ eej rIdndZedeeeddZeeZdeZZeedddeZeeeded Ze  eeeZ W d   n1 sw   Y  e!de j" dS dS )zTPoint-track heatmap predictor: DINO + query-conditioned 64x64 heatmaps per timestep.    NZdinov3z?dinov3/weights/dinov3_vits16plus_pretrain_lvd1689m-4057cbaa.pth   )g
ףp=
?gv/?gCl?)gZd;O?gy&1?g?       @   c                 C   s   t j| |dddS )N      )padding)nnConv2d)Zc_inZc_out r   6/data/cameron/keygrip/point_track_pretraining/model.py_conv3x3   s   r   c                       sH   e Zd ZdZdeeedf fdd	Z fddZdd	 Z	d
d Z
  ZS )PointTrackHeatmapPredictorzuPredicts 64x64 heatmap logits per (query, timestep). Conditioning: concat DINO feature at query start to every patch.  Fc                    s&  t    || _|| _|| _ | _t| _td t	j
jtddtd| _|r:| j D ]}d|_q*| j  td ntd | jj| _td| j  | jd	 } fd
d| _tt|dtjddtddtjddtddtjddtd|d| _td  d  d| d  d  d d S )NzLoading DINOv2 model...Zdinov3_vits16pluslocal)sourceweightsFu   ✓ Frozen DINOv2 backboneu    ✓ DINOv2 backbone is trainableu   ✓ DINO embedding dim:    c                    s   t j|   fdddS )NbilinearF)sizemodealign_corners)Finterpolate)xheatmap_sizer   r   <lambda>6   s    z5PointTrackHeatmapPredictor.__init__.<locals>.<lambda>   T)inplace   r   u)   ✓ Point-track decoder: 2*D -> bilinear r   z -> 3 convs -> 1x1 -> (B, z, ))super__init__target_sizen_windown_queryr   DINO_PATCH_SIZEZ
patch_sizeprinttorchhubloadDINO_REPO_DIRDINO_WEIGHTS_PATHdino
parametersrequires_gradeval	embed_dimupsampler	   
Sequentialr   ReLUr
   decoder)selfr$   r%   r&   r   freeze_backboneparamZ
dim_concat	__class__r   r   r#      sB   







,	z#PointTrackHeatmapPredictor.__init__c                    s(   t  | t| dr| j|| _| S )Nr.   )r"   tohasattrr.   )r7   devicer:   r   r   r<   B   s   
zPointTrackHeatmapPredictor.toc                 C   s  |j d }| j|\}\}}| jjD ]}| jjr!| jj||dnd}|||}q| jjr[| j|ddd| jjd f }| j|dd| jjd df }	t	j
||	gdd}n| j|}|dddf }
|dd| jjd df }||||| j}|dddd }||
fS )z:Returns patch_features (B, D, H_p, W_p), cls_token (B, D).r   )HWNr   dimr   r   )shaper.   Zprepare_tokens_with_masksblocksZ
rope_embedZuntie_cls_and_patch_normsZcls_normZn_storage_tokensnormr)   catreshaper2   permute
contiguous)r7   r   BZx_tokensH_pW_pZblkZrope_sincosZ
x_norm_clsZx_norm_patchesZ	cls_tokenZpatch_tokenspatch_featuresr   r   r   _extract_dino_featuresH   s   
$$z1PointTrackHeatmapPredictor._extract_dino_featuresc                 C   s<  |j d }| |\}}|j \}}}}|j}	|d | | j  d|d }
|d | | j  d|d }tj||	d|d	|| j
}||dd||
f }|d	|| j
|||}|dd	|| j
|||}tj||gdd	}||| j
 d| ||}| |}| |}||| j
| j| j| j}|S )
z
        Args:
            x: (B, 3, H, W) first frame
            start_keypoint_2d: (B, n_query, 2) in pixel coords (448 space)

        Returns:
            heatmap_logits: (B, n_query, n_window, heatmap_size, heatmap_size)
        r   ).r   r   ).r   )r>   Nr   rA   )rC   rN   r>   r$   longclampr)   arangeviewexpandr&   	unsqueezerF   r3   r6   r%   r   )r7   r   Zstart_keypoint_2drJ   rM   _DrK   rL   devZstart_patch_xZstart_patch_yZ	batch_idxZ
query_featZpatch_expandZquery_expandconcatupoutr   r   r   forward\   s    
	""  

z"PointTrackHeatmapPredictor.forward)__name__
__module____qualname____doc__N_WINDOW_POINT_TRACKN_QUERY_POINTSHEATMAP_SIZEr#   r<   rN   r\   __classcell__r   r   r:   r   r      s    (r   __main__)rb   ra   rc   cudacpur   T)r$   r%   r&   r   r8   r   r   Zheatmap_logits)#r`   r)   torch.nnr	   torch.nn.functional
functionalr   r,   r-   r'   ZIMAGENET_MEANZIMAGENET_STDra   rb   rc   r   Moduler   r]   datar>   rf   is_availablemodelr<   rJ   Qrandnr   randZstart_2dno_gradlogitsr(   rC   r   r   r   r   <module>   sB    g


