o
    ¾Ó˜iq  ã                   @   sH  d Z ddlZddlmZ ddlm  mZ dZdZdZ	dZ
dZdZd	Zd
Zd
ZdZdZdZdZed ZG dd„ dejƒZedkr¢e ejj ¡ rJdnd¡ZededdZe e¡Ze dddd¡ e¡Ze de¡ e¡d Z e !d¡ e¡Z"e #¡  eee e"d\Z$Z%W d  ƒ n1 sw   Y  e&de$j'ƒ e&de%j'ƒ dS dS )u  ACT joints baseline: image + current robot state -> N_WINDOW future (6D joints + gripper).

Conditioned on current 6 joint angles and gripper state (7) concatenated to CLS before regression.
Direct regression of trajectory_joints (N_WINDOW, 6) and gripper (N_WINDOW,) â€” no IK.
é    NÚdinov3z?dinov3/weights/dinov3_vits16plus_pretrain_lvd1689m-4057cbaa.pthé   )g
×£p=
ß?gÉv¾Ÿ/Ý?g–C‹lçûÙ?)gZd;ßOÍ?gyé&1¬Ì?gÍÌÌÌÌÌÌ?é   é   gœiÂö“1¦?gš™™™™™É¿gš™™™™™é?é    é   c                       sL   e Zd ZdZdeddf‡ fdd„	Z‡ fdd„Zd	d
„ Z		ddd„Z‡  Z	S )ÚACTJointsTrajectoryPredictorzgRegress (N_WINDOW, 6) joint angles + (N_WINDOW,) gripper from image + current joints + current gripper.éÀ  Fi   c                    sæ   t ƒ  ¡  || _|| _t| _tdƒ tjj	t
ddtd| _|r4| j ¡ D ]}d|_q$| j ¡  tdƒ ntdƒ | jj| _t t | jt |¡t ¡ t d¡t ||¡t ¡ t d¡t ||t | ¡¡| _td	|› d
|› dƒ d S )NzLoading DINOv2 model...Údinov3_vits16plusÚlocal)ÚsourceÚweightsFu   âœ“ Frozen DINOv2 backboneu    âœ“ DINOv2 backbone is trainableçš™™™™™¹?uM   âœ“ ACT joints head: [CLS, current_joints(6), current_gripper] -> MLP -> (B, z*6 + ú))ÚsuperÚ__init__Útarget_sizeÚn_windowÚDINO_PATCH_SIZEÚ
patch_sizeÚprintÚtorchÚhubÚloadÚDINO_REPO_DIRÚDINO_WEIGHTS_PATHÚdinoÚ
parametersÚrequires_gradÚevalÚ	embed_dimÚnnÚ
SequentialÚLinearÚCURRENT_STATE_DIMÚGELUÚDropoutÚN_JOINTSÚmlp)Úselfr   r   Úfreeze_backboneÚ
hidden_dimÚparam©Ú	__class__© úE/data/cameron/keygrip/volume_dino_tracks_act_baseline_joints/model.pyr       s8   
ü



ù	z%ACTJointsTrajectoryPredictor.__init__c                    s(   t ƒ  |¡ t| dƒr| j |¡| _| S )Nr   )r   ÚtoÚhasattrr   )r)   Údevicer-   r/   r0   r1   B   s   
zACTJointsTrajectoryPredictor.toc                 C   sÖ   |j d }| j |¡\}\}}| jjD ]}| jjr!| jj||dnd }|||ƒ}q| jjr[| j |d d …d | jjd …f ¡}| j |d d …| jjd d …f ¡}	t	j
||	gdd}n| j |¡}|d d …df }
|
S )Nr   )ÚHÚWr   ©Údim)Úshaper   Úprepare_tokens_with_masksÚblocksÚ
rope_embedÚuntie_cls_and_patch_normsÚcls_normÚn_storage_tokensÚnormr   Úcat)r)   ÚxÚBÚx_tokensÚH_pÚW_pÚblkÚrope_sincosÚ
x_norm_clsÚx_norm_patchesÚ	cls_tokenr/   r/   r0   Ú_extract_clsH   s   
$$z)ACTJointsTrajectoryPredictor._extract_clsNc
                 C   sÖ   |   |¡}
|
jd }|
j}|	du rtj|t||
jd}	|du r(tj|||
jd}| ¡ dkr3| d¡}tj	|
|	|gdd}|  
|¡}|dd…d| jt …f  || jt¡}|dd…| jt d…f  || j¡}||fS )aÊ  
        Args:
            x: (B, 3, H, W)
            current_joints: (B, 6) current arm joint angles. If None, zeros.
            current_gripper_state: (B,) or (B, 1) current gripper value. If None, zeros.
            current_3d, start_keypoint_2d, current_height: ignored (API compatibility)

        Returns:
            trajectory_joints: (B, N_WINDOW, 6) joint angles per timestep
            gripper: (B, N_WINDOW) gripper value per timestep
        r   N)r3   Údtyper   r6   )rK   r8   r3   r   Úzerosr'   rL   r7   Ú	unsqueezer@   r(   r   Úview)r)   rA   Úgt_target_heatmapÚtrainingÚstart_keypoint_2dÚcurrent_heightÚcurrent_gripperÚ
current_3dÚcurrent_gripper_stateÚcurrent_jointsÚclsrB   r3   ÚcondÚoutÚtrajectory_jointsÚgripperr/   r/   r0   ÚforwardW   s   



&$z$ACTJointsTrajectoryPredictor.forward)NFNNNNNN)
Ú__name__Ú
__module__Ú__qualname__Ú__doc__ÚN_WINDOWr   r1   rK   r]   Ú__classcell__r/   r/   r-   r0   r      s    "ÿr   Ú__main__ÚmpsÚcpur	   T)r   r   r*   é   é   r   )rW   rV   r[   r\   )(ra   r   Útorch.nnr!   Útorch.nn.functionalÚ
functionalÚFr   r   r   ÚIMAGENET_MEANÚIMAGENET_STDrb   r'   Ú
MIN_HEIGHTÚ
MAX_HEIGHTÚMIN_GRIPPERÚMAX_GRIPPERÚN_HEIGHT_BINSÚN_GRIPPER_BINSr$   ÚModuler   r^   r3   Úbackendsre   Úis_availableÚmodelr1   ÚrandnrA   Ú
cur_jointsÚrandÚcur_gripÚno_gradÚtraj_jÚgripr   r8   r/   r/   r/   r0   Ú<module>   s@    W

ÿö