o
    o>i                     @   s  d Z ddlZddlZddlmZ ddlm  mZ ej	ddZ
ej	ddZdZdZd	Zd	Zd
ZG dd dejZedkreej rGdndZededZeeZeddddeZeddddeZeddgeZededeZededeZ e!  eeeeee dZ"W d   n1 sw   Y  e"# D ]\Z$Z%e%dure&e$ de%j'  qdS dS )u  Dual-camera PARA with vanilla DINOv3 backbone.

Same as PARA but processes both agentview and wrist camera through a shared
DINOv3 backbone → bilinear upsample → conv refinement → per-view 1×1 conv heads.
No cross-attention or communication between views.
    NDINO_REPO_DIRz;/Users/cameronsmith/Projects/robotics_testing/random/dinov3DINO_WEIGHTS_PATHzt/Users/cameronsmith/Projects/robotics_testing/random/dinov3/weights/dinov3_vits16plus_pretrain_lvd1689m-4057cbaa.pth          @   c                       sh   e Zd ZdZdeedf fdd	Z fddZdd	 Zd
d Z	dddZ
dddZ		dddZ  ZS )DualParaPredictorzEDual-camera PARA: shared DINOv3 backbone, independent per-view heads.  Fc                    s  t    || _|| _|| _t| _d| _td t	j
jtddtd| _|r:| j D ]}d|_q*| j  td ntd | jj| _| j}tt	|d	 | _ttj||d
ddt tj||d
ddt tj||d
ddt | _td|  d}dD ]4}	t| |	 dt||t d t| |	 dt||| d t| |	 dt||d
 t d qtd| dt d| d| d| dt d tdd |  D }
tdd |  D }td|dd|
dd d S ) N	dual_parazLoading DINOv2 model...dinov3_vits16pluslocal)sourceweightsFu   ✓ Frozen DINOv2 backboneu    ✓ DINOv2 backbone is trainableg{Gz?      )paddingu8   ✓ Shared feature convs: 3× Conv2d(3×3) at pred_size=   )agentwrist_volume_head_gripper_head_rotation_headu   ✓ Per-view heads: volume(   ×z), gripper(z), rotation(u   ×3×)c                 s   s    | ]}|  V  qd S N)numel.0p r   >/data/cameron/para_normalized_losses/libero/model_dual_para.py	<genexpr>F   s    z-DualParaPredictor.__init__.<locals>.<genexpr>c                 s   s    | ]
}|j r| V  qd S r   )requires_gradr   r   r   r   r    r!   G   s    u   ✓ DualPara: ,z / z trainable params)super__init__target_size	pred_sizen_windowDINO_PATCH_SIZE
patch_size
model_typeprinttorchhubloadr   r   dino
parametersr"   eval	embed_dimnn	Parameterrandnstart_keypoint_embedding
SequentialConv2dGELUfeature_convssetattrN_HEIGHT_BINS
N_ROT_BINSsum)selfr&   r'   r(   freeze_backbonekwargsparamDZN_GRIPviewn_totaln_trainable	__class__r   r    r%      sD   



  &.zDualParaPredictor.__init__c                    s   t  | | j|| _| S r   )r$   tor0   )r@   devicerH   r   r    rJ   J   s   zDualParaPredictor.toc           
      C   s   |j d }| j|\}\}}| jjD ]}| jjr!| jj||dnd}|||}q| jjr@| j|dd| jjd df }n| j|dd| jjd df }||||| j	}	|	
dddd }	|	S )z,Extract patch features from DINOv3 backbone.r   )HWNr   r   r   )shaper0   prepare_tokens_with_masksblocks
rope_embeduntie_cls_and_patch_normsnormn_storage_tokensreshaper3   permute
contiguous)
r@   xBx_tokensH_pW_pblkrope_sincosx_norm_patchespatch_featuresr   r   r    _extract_featuresO   s   
&$z#DualParaPredictor._extract_featuresc                 C   s(   t j|| j| jfddd}| |}|S )z1Bilinear upsample to pred_size + conv refinement.bilinearF)sizemodealign_corners)Finterpolater'   r;   )r@   r`   featsr   r   r    _upsample_and_refine_   s
   
z&DualParaPredictor._upsample_and_refineNc                 C   s@  |j d }| j}| j }}t| | d|||t||}|dur|d  d|d }	|d  d|d }
tj	||j
d|d||}tj	||j
dd|||}t| | d|||d	||}|||dd|
|	f }t| | d
|||dt||}|||dddd|
|	f }nd }}|||fS )u6   Apply per-view 1×1 conv heads, index at query pixels.r   r   N).r   r   ).r   rK   r   r   r   r   )rN   r(   r'   getattrrE   r=   longclampr-   arangerK   expandr>   )r@   rh   	view_namequery_pixelsrY   NrL   rM   volpxpy	batch_idxtime_idxgrip_mapgripperrot_maprotationr   r   r    _get_view_predictionsf   s   

"  "$
z'DualParaPredictor._get_view_predictionsr   c                 C   s   |  |||\}}}||fS )z6For eval: predict gripper/rotation at specific pixels.)r|   )r@   rh   rq   rp   _griprotr   r   r    predict_at_pixels~   s   z#DualParaPredictor.predict_at_pixelsc                 C   sf  |j d }i }| |}|j \}	}
}}|durj| dkr&|d|d}|dddf | | j  d|d }|dddf | | j  d|d }tj	||j
d}||dd||f  | jd7  < | |}| |d|\}}}||d< ||d< ||d	< ||d
< |dur| |}| |}| |d|\}}}||d< ||d< ||d< ||d< |S )z`
        Returns dict with agent_volume/gripper/rotation/feats and wrist_* equivalents.
        r   Nr   rj   r   agent_volumeagent_gripperagent_rotationagent_featsr   wrist_volumewrist_gripperwrist_rotationwrist_feats)rN   ra   dim	unsqueezero   r&   rl   rm   r-   rn   rK   r7   ri   r|   )r@   Z	agent_imgZ	wrist_imgstart_keypoint_2dagent_query_pixelswrist_query_pixelsrY   resultZagent_patchesr}   rD   r[   r\   ZskxZskybir   avagarZwrist_patchesr   ZwvZwgwrr   r   r    forward   s4   

**$


zDualParaPredictor.forwardr   )r   )NNNN)__name__
__module____qualname____doc__	PRED_SIZEN_WINDOWr%   rJ   ra   ri   r|   r   r   __classcell__r   r   rH   r    r      s    0

r   __main__cudacpur	   )r&   r(   r   r   g      l@)r   r   r   z: )(r   osr-   torch.nnr4   torch.nn.functional
functionalrf   environgetr   r   r)   r   r=   r>   r   Moduler   r   rK   r   is_availablemodelrJ   r6   awtensorkpzerosaqwqno_gradoutitemskvr,   rN   r   r   r   r    <module>   s@     

