
    ,>i                        S r SSKrSSKJr  SSKJs  Jr  SrSrSr	Sr
SrSrS	rS	rS
rSrSrSrSr " S S\R(                  5      r\S:X  Ga  \R.                  " \R0                  R2                  R5                  5       (       a  SOS5      r\" S\SS9r\R9                  \5      r\R:                  " SSSS5      R9                  \5      r\R:                  " SS5      R9                  \5      S-  r\R@                  " S5      R9                  \5      r!\RD                  " 5          \" \\\!S9u  r#r$SSS5        \%" S\#RL                  5        \%" S\$RL                  5        gg! , (       d  f       N6= f)a4  ACT / vanilla regression baseline: image + current robot state -> N_WINDOW future (3D + gripper) in global robot frame.

Conditioned on current 3D position (3) and gripper state (1) concatenated to CLS before regression.
No heatmaps; direct regression of trajectory_3d (N_WINDOW, 3) and gripper (N_WINDOW,).
    Nz;/Users/cameronsmith/Projects/robotics_testing/random/dinov3zt/Users/cameronsmith/Projects/robotics_testing/random/dinov3/weights/dinov3_vits16plus_pretrain_lvd1689m-4057cbaa.pth   )g
ףp=
?gv/?gCl?)gZd;O?gy&1?g?   gi1?gɿg?       c                   X   ^  \ rS rSrSrS\SS4U 4S jjrU 4S jrS r  SS	 jr	S
r
U =r$ )ACTTrajectoryPredictor   z}Vanilla regression: image + current (3d, gripper) -> (N_WINDOW, 3) trajectory_3d + (N_WINDOW,) gripper in global robot frame.  Fi   c                 `  > [         TU ]  5         Xl        X l        [        U l        [        S5        [        R                  R                  [        SS[        S9U l        U(       aN  U R                  R                  5        H
  nSUl        M     U R                  R                  5         [        S5        O[        S5        U R                  R                   U l        ["        R$                  " ["        R&                  " U R                   [(        -   U5      ["        R*                  " 5       ["        R,                  " S5      ["        R&                  " XD5      ["        R*                  " 5       ["        R,                  " S5      ["        R&                  " XBS	-  U-   5      5      U l        [        S
U SU S35        g )NzLoading DINOv2 model...dinov3_vits16pluslocal)sourceweightsFu   ✓ Frozen DINOv2 backboneu    ✓ DINOv2 backbone is trainable皙?   u?   ✓ ACT head: [CLS, current_3d, current_gripper] -> MLP -> (B, z*3 + ))super__init__target_sizen_windowDINO_PATCH_SIZE
patch_sizeprinttorchhubloadDINO_REPO_DIRDINO_WEIGHTS_PATHdino
parametersrequires_gradeval	embed_dimnn
SequentialLinearCURRENT_STATE_DIMGELUDropoutmlp)selfr   r   freeze_backbone
hidden_dimparam	__class__s         `/Users/cameronsmith/Projects/robotics_testing/3dkeygrip/volume_dino_tracks_act_baseline/model.pyr   ACTTrajectoryPredictor.__init__    s3   & )'(IINN%	 # 
	 --/&+# 0IINN./45,,==IIdnn'88*EGGIJJsOIIj-GGIJJsOIIjQ,"9:
 	OPXzY^_g^hhijk    c                    > [         TU ]  U5        [        U S5      (       a   U R                  R                  U5      U l        U $ )Nr   )r   tohasattrr   )r+   devicer/   s     r0   r4   ACTTrajectoryPredictor.toB   s5    
64  		V,DIr2   c                    UR                   S   nU R                  R                  U5      u  nu  pEU R                  R                   HA  nU R                  R                  (       a  U R                  R	                  XES9OS nU" X75      nMC     U R                  R
                  (       a  U R                  R                  US S 2S U R                  R                  S-   24   5      nU R                  R                  US S 2U R                  R                  S-   S 24   5      n	[        R                  " X/SS9nOU R                  R                  U5      nUS S 2S4   n
U
$ )Nr   )HW   dim)shaper   prepare_tokens_with_masksblocks
rope_embeduntie_cls_and_patch_normscls_normn_storage_tokensnormr   cat)r+   xBx_tokensH_pW_pblkrope_sincos
x_norm_clsx_norm_patches	cls_tokens              r0   _extract_cls#ACTTrajectoryPredictor._extract_clsH   s   GGAJ#yyBB1E*399##C@D		@T@T$))...<Z^K81H $ 99..++HQ8X$)):T:TWX:X8X5X,YZJ!YY^^HQ		8R8RUV8V8X5X,YZNyy*!=1EHyy~~h/HQTN	r2   c	                 J   U R                  U5      n	U	R                  S   n
U	R                  nUc   [        R                  " U
SXR
                  S9nUc  [        R                  " XU	R
                  S9nUR                  5       S:X  a  UR                  S5      n[        R                  " XU/SS9nU R                  U5      nUSS2SU R                  S-  24   R                  XR                  S5      nUSS2U R                  S-  S24   R                  XR                  5      nX4$ )a  
Args:
    x: (B, 3, H, W)
    current_3d: (B, 3) current gripper keypoint 3D position in world frame. If None, zeros.
    current_gripper_state: (B,) or (B, 1) current gripper value. If None, zeros.
    start_keypoint_2d, current_height: ignored (API compatibility)

Returns:
    trajectory_3d: (B, N_WINDOW, 3) in global robot frame
    gripper: (B, N_WINDOW) gripper joint value per timestep
r   Nr   )r6   dtyper;   r<   )rQ   r>   r6   r   zerosrT   r=   	unsqueezerF   r*   r   view)r+   rG   gt_target_heatmaptrainingstart_keypoint_2dcurrent_heightcurrent_gripper
current_3dcurrent_gripper_stateclsrH   r6   condouttrajectory_3dgrippers                   r0   forwardACTTrajectoryPredictor.forwardW   s    "IIaLQ&		JJ ($)KK		$R! $$&!+$9$C$CA$F!yy#+@AqIhhtnA2!222388MM1Ma*,,-221mmD%%r2   )r   r#   r*   r   r   r   )NFNNNNN)__name__
__module____qualname____firstlineno____doc__N_WINDOWr   r4   rQ   rd   __static_attributes____classcell__)r/   s   @r0   r   r      s8     H#&5]`  lD C7;& &r2   r   __main__mpscpur
   T)r   r   r,      r   r   )r]   r^   rb   rc   )'rj   r   torch.nnr$   torch.nn.functional
functionalFr   r   r   IMAGENET_MEANIMAGENET_STDrk   
MIN_HEIGHT
MAX_HEIGHTMIN_GRIPPERMAX_GRIPPERN_HEIGHT_BINSN_GRIPPER_BINSr'   Moduler   rf   r6   backendsro   is_availablemodelr4   randnrG   cur_3drandcur_gripno_gradtrajgripr   r>    r2   r0   <module>r      sb  
    M K %$

  T&RYY T&n z\\5>>#5#5#B#B#D#D%%PF"sXW[\EHHVEAq#s#&&v.A[[A!!&)C/Fzz!}'H	1xP
d 
	/4::&	)TZZ   
s   E
E'