
    i                     j   S r SSKrSSKJr  SSKJs  Jr  SrSrSr	Sr
SrSrS	rS	rS
rSrSrSr " S S\R&                  5      r\S:X  a  \R,                  " \R.                  R0                  R3                  5       (       a  SOS5      r\" S\SS9r\R7                  \5      r\R8                  " SSSS5      R7                  \5      r\R<                  " 5          \" \5      u  rr r!SSS5        \"" S\RF                  5        \"" S\ RF                  5        \"" S\!RF                  5        gg! , (       d  f       NI= f)aY  Motion tracks baseline: image -> 2D location + height + gripper per timestep (camera frame).

Factorized as 2d (N_WINDOW, 2) + height (N_WINDOW,) + gripper (N_WINDOW,). Same lifting as volume:
recover_3d_from_direct_keypoint_and_height(2d, height, camera_pose, cam_K) -> 3D for eval/live.
Camera frame for 2d/height; lift to 3D in world for IK.
    Nz;/Users/cameronsmith/Projects/robotics_testing/random/dinov3zt/Users/cameronsmith/Projects/robotics_testing/random/dinov3/weights/dinov3_vits16plus_pretrain_lvd1689m-4057cbaa.pth   )g
ףp=
?gv/?gCl?)gZd;O?gy&1?g?   gi1?gɿg?    c                   T   ^  \ rS rSrSrS\SS4U 4S jjrU 4S jrS rSS	 jr	S
r
U =r$ )MotionTracksTrajectoryPredictor   zvPredict 2D (camera/image) + height + gripper per timestep. Lift to 3D with recover_3d_from_direct_keypoint_and_height.  Fi   c                   > [         TU ]  5         Xl        X l        [        U l        [        S5        [        R                  R                  [        SS[        S9U l        U(       aN  U R                  R                  5        H
  nSUl        M     U R                  R                  5         [        S5        O[        S5        U R                  R                   U l        US-  U-   U-   U l        [$        R&                  " [$        R(                  " U R                   U5      [$        R*                  " 5       [$        R,                  " S	5      [$        R(                  " XD5      [$        R*                  " 5       [$        R,                  " S	5      [$        R(                  " X@R"                  5      5      U l        [        S
U R"                   S35        g )NzLoading DINOv2 model...dinov3_vits16pluslocal)sourceweightsFu   ✓ Frozen DINOv2 backboneu    ✓ DINOv2 backbone is trainable   g?uB   ✓ MotionTracks head: CLS -> MLP -> (B, 2d+height+gripper) = (B, ))super__init__target_sizen_windowDINO_PATCH_SIZE
patch_sizeprinttorchhubloadDINO_REPO_DIRDINO_WEIGHTS_PATHdino
parametersrequires_gradeval	embed_dimout_dimnn
SequentialLinearGELUDropoutmlp)selfr   r   freeze_backbone
hidden_dimparam	__class__s         j/Users/cameronsmith/Projects/robotics_testing/3dkeygrip/volume_dino_tracks_motion_tracks_baseline/model.pyr   (MotionTracksTrajectoryPredictor.__init__   s;   & )'(IINN%	 # 
	 --/&+# 0IINN./45,,!|h.9==IIdnnj1GGIJJsOIIj-GGIJJsOIIj,,/
 	RSWS_S_R``abc    c                    > [         TU ]  U5        [        U S5      (       a   U R                  R                  U5      U l        U $ )Nr   )r   tohasattrr   )r)   devicer-   s     r.   r2   "MotionTracksTrajectoryPredictor.to@   s5    
64  		V,DIr0   c                    UR                   S   nU R                  R                  U5      u  nu  pEU R                  R                   HA  nU R                  R                  (       a  U R                  R	                  XES9OS nU" X75      nMC     U R                  R
                  (       a  U R                  R                  US S 2S U R                  R                  S-   24   5      nU R                  R                  US S 2U R                  R                  S-   S 24   5      n	[        R                  " X/SS9nOU R                  R                  U5      nUS S 2S4   n
U
$ )Nr   )HW   )dim)shaper   prepare_tokens_with_masksblocks
rope_embeduntie_cls_and_patch_normscls_normn_storage_tokensnormr   cat)r)   xBx_tokensH_pW_pblkrope_sincos
x_norm_clsx_norm_patches	cls_tokens              r.   _extract_cls,MotionTracksTrajectoryPredictor._extract_clsF   s   GGAJ#yyBB1E*399##C@D		@T@T$))...<Z^K81H $ 99..++HQ8X$)):T:TWX:X8X5X,YZJ!YY^^HQ		8R8RUV8V8X5X,YZNyy*!=1EHyy~~h/HQTN	r0   c                    U R                  U5      nU R                  U5      nUR                  S   n	USS2SU R                  S-  24   R	                  XR                  S5      n
USS2U R                  S-  U R                  S-  24   R	                  XR                  5      nUSS2U R                  S-  S24   R	                  XR                  5      nXU4$ )a  
Args:
    x: (B, 3, H, W)
    start_keypoint_2d, etc.: ignored (API compatibility)

Returns:
    trajectory_2d: (B, N_WINDOW, 2) in image/camera pixel coords
    trajectory_height: (B, N_WINDOW) height (z in world) per timestep
    gripper: (B, N_WINDOW) gripper value per timestep
r   Nr      )rN   r(   r;   r   view)r)   rD   gt_target_heatmaptrainingstart_keypoint_2dcurrent_heightcurrent_gripperclsoutrE   trajectory_2dtrajectory_heightgrippers                r.   forward'MotionTracksTrajectoryPredictor.forwardU   s     "hhsmIIaLA2!222388MM1M4==1#4t}}q7H#H HINNqR_R_`a*,,-221mmD88r0   )r   r!   r(   r   r"   r   r   )NFNNN)__name__
__module____qualname____firstlineno____doc__N_WINDOWr   r2   rN   r]   __static_attributes____classcell__)r-   s   @r.   r   r      s.     A#&5]` !dF9 9r0   r   __main__mpscpur	   T)r   r   r*   r   rQ   rZ   r[   r\   )$rc   r   torch.nnr#   torch.nn.functional
functionalFr   r   r   IMAGENET_MEANIMAGENET_STDrd   
MIN_HEIGHT
MAX_HEIGHTMIN_GRIPPERMAX_GRIPPERN_HEIGHT_BINSN_GRIPPER_BINSModuler   r_   r4   backendsrh   is_availablemodelr2   randnrD   no_gradt2dthgripr   r;    r0   r.   <module>r      s.      M K %$

L9bii L9^ z\\5>>#5#5#B#B#D#D%%PF+h`deEHHVEAq#s#&&v.A	aR 
	/399%	
rxx(	)TZZ  
 
s   D$$
D2