o
    ӘiI                     @   s@  d Z ddlZddlmZ ddlm  mZ dZdZdZ	dZ
dZdZd	Zd	Zd
ZdZdZdZdZG dd dejZedkreejj rFdndZededdZeeZeddddeZedded Ze deZ!e"  eeee!d\Z#Z$W d   n1 sw   Y  e%de#j& e%de$j& dS dS )a4  ACT / vanilla regression baseline: image + current robot state -> N_WINDOW future (3D + gripper) in global robot frame.

Conditioned on current 3D position (3) and gripper state (1) concatenated to CLS before regression.
No heatmaps; direct regression of trajectory_3d (N_WINDOW, 3) and gripper (N_WINDOW,).
    Ndinov3z?dinov3/weights/dinov3_vits16plus_pretrain_lvd1689m-4057cbaa.pth   )g
ףp=
?gv/?gCl?)gZd;O?gy&1?g?   gi1?gɿg?       c                       sL   e Zd ZdZdeddf fdd	Z fddZd	d
 Z		dddZ  Z	S )ACTTrajectoryPredictorz}Vanilla regression: image + current (3d, gripper) -> (N_WINDOW, 3) trajectory_3d + (N_WINDOW,) gripper in global robot frame.  Fi   c                    s   t    || _|| _t| _td tjj	t
ddtd| _|r4| j D ]}d|_q$| j  td ntd | jj| _tt| jt |t tdt||t tdt||d	 | | _td
| d| d d S )NzLoading DINOv2 model...dinov3_vits16pluslocal)sourceweightsFu   ✓ Frozen DINOv2 backboneu    ✓ DINOv2 backbone is trainable皙?   u?   ✓ ACT head: [CLS, current_3d, current_gripper] -> MLP -> (B, z*3 + ))super__init__target_sizen_windowDINO_PATCH_SIZE
patch_sizeprinttorchhubloadDINO_REPO_DIRDINO_WEIGHTS_PATHdino
parametersrequires_gradeval	embed_dimnn
SequentialLinearCURRENT_STATE_DIMGELUDropoutmlp)selfr   r   freeze_backbone
hidden_dimparam	__class__ >/data/cameron/keygrip/volume_dino_tracks_act_baseline/model.pyr       s8   




	zACTTrajectoryPredictor.__init__c                    s(   t  | t| dr| j|| _| S )Nr   )r   tohasattrr   )r(   devicer,   r.   r/   r0   B   s   
zACTTrajectoryPredictor.toc                 C   s   |j d }| j|\}\}}| jjD ]}| jjr!| jj||dnd }|||}q| jjr[| j|d d d | jjd f }| j|d d | jjd d f }	t	j
||	gdd}n| j|}|d d df }
|
S )Nr   )HW   dim)shaper   prepare_tokens_with_masksblocks
rope_embeduntie_cls_and_patch_normscls_normn_storage_tokensnormr   cat)r(   xBx_tokensH_pW_pblkrope_sincos
x_norm_clsx_norm_patches	cls_tokenr.   r.   r/   _extract_clsH   s   
$$z#ACTTrajectoryPredictor._extract_clsNc	                 C   s   |  |}	|	jd }
|	j}|du rtj|
d||	jd}|du r(tj|
||	jd}| dkr3|d}tj|	||gdd}| 	|}|ddd| j
d f |
| j
d}|dd| j
d df |
| j
}||fS )a  
        Args:
            x: (B, 3, H, W)
            current_3d: (B, 3) current gripper keypoint 3D position in world frame. If None, zeros.
            current_gripper_state: (B,) or (B, 1) current gripper value. If None, zeros.
            start_keypoint_2d, current_height: ignored (API compatibility)

        Returns:
            trajectory_3d: (B, N_WINDOW, 3) in global robot frame
            gripper: (B, N_WINDOW) gripper joint value per timestep
        r   Nr   )r2   dtyper5   r6   )rK   r8   r2   r   zerosrL   r7   	unsqueezer@   r'   r   view)r(   rA   gt_target_heatmaptrainingstart_keypoint_2dcurrent_heightcurrent_gripper
current_3dcurrent_gripper_stateclsrB   r2   condouttrajectory_3dgripperr.   r.   r/   forwardW   s   



&$zACTTrajectoryPredictor.forward)NFNNNNN)
__name__
__module____qualname____doc__N_WINDOWr   r0   rK   r\   __classcell__r.   r.   r,   r/   r      s    "r   __main__mpscpur   T)r   r   r)      r   r   )rU   rV   rZ   r[   )'r`   r   torch.nnr!   torch.nn.functional
functionalFr   r   r   IMAGENET_MEANIMAGENET_STDra   
MIN_HEIGHT
MAX_HEIGHTMIN_GRIPPERMAX_GRIPPERN_HEIGHT_BINSN_GRIPPER_BINSr$   Moduler   r]   r2   backendsrd   is_availablemodelr0   randnrA   cur_3drandcur_gripno_gradtrajgripr   r8   r.   r.   r.   r/   <module>   s>    W

