
    {i*              	       f   S r SSKrSSKJr  SSKJs  Jr  SrSrSr	Sr
SrSrS	rS	rS
rSrSrSr " S S\R&                  5      r\S:X  a  \R,                  " \R.                  R0                  R3                  5       (       a  SOS5      r\" S\SS9r\R7                  \5      r\R8                  " SSSS5      R7                  \5      r\R<                  " 5          \" \S\R>                  " SS/5      S9u  r r!SSS5        \"" S\ RF                  5        \"" S\!RF                  5        gg! , (       d  f       N6= f)ae  Model for trajectory volume prediction using DINOv2.

Predicts a pixel-aligned volume: N_WINDOW x N_HEIGHT_BINS logits per pixel (cross-entropy).
Gripper is per-pixel (N_WINDOW x N_GRIPPER_BINS per pixel): supervised at GT pixel during training,
decoded at predicted pixel during inference (teacher forcing in train, argmax at pred pixel in val/inference).
    Nz;/Users/cameronsmith/Projects/robotics_testing/random/dinov3zt/Users/cameronsmith/Projects/robotics_testing/random/dinov3/weights/dinov3_vits16plus_pretrain_lvd1689m-4057cbaa.pth   )g
ףp=
?gv/?gCl?)gZd;O?gy&1?g?   gi1?gɿg?    c                   R   ^  \ rS rSrSrS\S4U 4S jjrU 4S jrS rS
S jr	S	r
U =r$ )TrajectoryHeatmapPredictor   zPredicts pixel-aligned volume (N_WINDOW x N_HEIGHT_BINS per pixel) and per-pixel gripper (N_WINDOW x N_GRIPPER_BINS per pixel).  Fc                 8  > [         TU ]  5         Xl        X l        [        U l        [        S5        [        R                  R                  [        SS[        S9U l        U(       aN  U R                  R                  5        H
  nSUl        M     U R                  R                  5         [        S5        O[        S5        U R                  R                   U l        [        SU R                    35        ["        R$                  " U R                   U R                  [&        -  S	S
9U l        [        SU R                   S[&         SU R                   S[&         S3	5        ["        R*                  " [        R,                  " U R                   5      S-  5      U l        [        SU R                    S35        ["        R$                  " U R                   U R                  [0        -  S	S
9U l        [        SU R                   S[0         SU R                   S[0         S3	5        g )NzLoading DINOv2 model...dinov3_vits16pluslocal)sourceweightsFu   ✓ Frozen DINOv2 backboneu    ✓ DINOv2 backbone is trainableu   ✓ DINO embedding dim:    )kernel_sizeu   ✓ Volume head: (B, *z, H_p, W_p) -> upsample to (B, z, z, H, W)g{Gz?u,   ✓ Learnable start keypoint embedding (dim=)u"   ✓ Gripper head (per-pixel): (B, )super__init__target_sizen_windowDINO_PATCH_SIZE
patch_sizeprinttorchhubloadDINO_REPO_DIRDINO_WEIGHTS_PATHdino
parametersrequires_gradeval	embed_dimnnConv2dN_HEIGHT_BINSvolume_head	Parameterrandnstart_keypoint_embeddingN_GRIPPER_BINSgripper_head)selfr   r   freeze_backboneparam	__class__s        S/Users/cameronsmith/Projects/robotics_testing/3dkeygrip/volume_dino_tracks/model.pyr   #TrajectoryHeatmapPredictor.__init__   s   & )'(IINN%	 # 
	 --/&+# 0IINN./45,,((89: 99NNMMM)

 	%dmm_Am_Dcdhdqdqcrrt  vC  uD  DK  L  	M(*U[[5PSW5W(X%<T^^<LANO IINNMMN*

 	24==/>BRRqrvrr  rA  AC  DR  CS  SZ  [  	\    c                    > [         TU ]  U5        [        U S5      (       a   U R                  R                  U5      U l        U $ )Nr   )r   tohasattrr   )r-   devicer0   s     r1   r5   TrajectoryHeatmapPredictor.toI   s5    
64  		V,DIr3   c                 t   UR                   S   nU R                  R                  U5      u  nu  pEU R                  R                   HA  nU R                  R                  (       a  U R                  R	                  XES9OSnU" X75      nMC     U R                  R
                  (       a  U R                  R                  USS2SU R                  R                  S-   24   5      nU R                  R                  USS2U R                  R                  S-   S24   5      n	[        R                  " X/SS9nOU R                  R                  U5      nUSS2S4   n
USS2U R                  R                  S-   S24   nUR                  X$XPR                  5      nUR                  SSSS5      R                  5       nX4$ )zjExtract patch features and CLS token.
Returns:
    patch_features: (B, D, H_p, W_p)
    cls_token: (B, D)
r   )HWNr   )dim      )shaper   prepare_tokens_with_masksblocks
rope_embeduntie_cls_and_patch_normscls_normn_storage_tokensnormr   catreshaper#   permute
contiguous)r-   xBx_tokensH_pW_pblkrope_sincos
x_norm_clsx_norm_patches	cls_tokenpatch_tokenspatch_featuress                r1   _extract_dino_features1TrajectoryHeatmapPredictor._extract_dino_featuresO   ss    GGAJ#yyBB1E*399##C@D		@T@T$))...<Z^K81H $ 99..++HQ8X$)):T:TWX:X8X5X,YZJ!YY^^HQ		8R8RUV8V8X5X,YZNyy*!=1EHyy~~h/HQTN	499#=#=#A#C CD%--ac>>J'//1a;FFH((r3   c                    UR                   S   nU R                  U5      u  pUR                   u  ppUR                  5       S:X  a!  UR                  S5      R	                  US5      nUSS2S4   U-  U R
                  -  R                  5       R                  SUS-
  5      nUSS2S4   U-  U R
                  -  R                  5       R                  SUS-
  5      n[        R                  " XxR                  S9nUUSS2X4==   U R                  R                  S5      -  ss'   U R                  U5      nUR                  XpR                  [        X5      n[         R"                  " UR                  XpR                  [        -  X5      U R
                  U R
                  4SSS9nUR                  XpR                  [        U R
                  U R
                  5      nU R%                  U5      n[         R"                  " UU R
                  U R
                  4SSS9nUR                  XpR                  [&        U R
                  U R
                  5      nUU4$ )	aN  
Args:
    x: (B, 3, H, W)
    start_keypoint_2d: (B, 2) or (2,) optional
    current_height, current_gripper: ignored (kept for API compatibility)

Returns:
    volume_logits: (B, N_WINDOW, N_HEIGHT_BINS, H, W)
    gripper_logits: (B, N_WINDOW, N_GRIPPER_BINS, H, W)  # per-pixel; index at GT pixel (train) or pred pixel (inference)
r   r   N)r7   bilinearF)sizemodealign_corners)r?   rW   r<   	unsqueezeexpandr   longclampr   aranger7   r*   r'   viewr   r&   Finterpolater,   r+   )r-   rK   gt_target_heatmaptrainingstart_keypoint_2dcurrent_heightcurrent_gripperrL   rV   rT   _DrN   rO   start_patch_xstart_patch_ybatch_indicesvolvolume_logitsgripgripper_logitss                        r1   forward"TrajectoryHeatmapPredictor.forwardg   s    GGAJ$($?$?$B!'--c  "a'=N=X=XYZ=[=b=bcdfh=i):*1a40369I9IIOOQWWXY[^ab[bc*1a40369I9IIOOQWWXY[^ab[bcQ/D/DE}aEF$JgJgJqJqrsJttF ~.hhq--AHHQ5s@""D$4$45	
 &**1mm]DL\L\^b^n^no   0}}""D$4$45	
 1mm^TEUEUW[WgWghn,,r3   )r   r#   r,   r   r   r*   r   r'   )NFNNN)__name__
__module____qualname____firstlineno____doc__N_WINDOWr   r5   rW   ru   __static_attributes____classcell__)r0   s   @r1   r   r      s+     J#&5 )\V)0*- *-r3   r   __main__mpscpur	   T)r   r   r.   r>   r=   Fg      l@)rh   ri   rr   rt   )$r{   r   torch.nnr$   torch.nn.functional
functionalre   r   r   r   IMAGENET_MEANIMAGENET_STDr|   
MIN_HEIGHT
MAX_HEIGHTMIN_GRIPPERMAX_GRIPPERr&   r+   Moduler   rw   r7   backendsr   is_availablemodelr5   r)   rK   no_gradtensorrq   rs   r   r?    r3   r1   <module>r      s4       N K %$

v- v-r z\\5>>#5#5#B#B#D#D%%PF&3[_`EHHVEAq#s#&&v.A	!eu||UTYN?[\	T 
	/399%	
DJJ' 
 
s   D""
D0