o
    ŒÝÑiOÉ  ã                   @   sN  d Z ddlZddlZddlZddlZddlmZ ddlZddlZddl	Z
ddlZddlm  mZ ddlmZ ej dej e¡¡ ddlZddlmZmZmZmZmZ ddlmZ dd„ Zd	d
„ Zddl m!Z"m#Z# ddl$m%Z% ddl&m'Z'm(Z(m)Z)m*Z* e
j+g d¢e
j,dZ-e
j+g d¢e
j,dZ.dZ/e/fdd„Z0e/fdd„Z1e/fdd„Z2e/ddfdd„Z3e/dfdd„Z4e/dfdd„Z5e/dfdd „Z6d!d"„ Z7d#d$„ Z8e9d%kr¥ej:d&d'Z;e;j<d(e=d)g d*¢d+d, e;j<d-e=d.d/d0 e;j<d1e=d2d3d4 e;j<d5e=d6d7d0 e;j<d8e>dd9 e;j<d:e=d;d9 e;j<d<e>d=d>d0 e;j<d?e>d@dAd0 e;j<dBe>dd9 e;j<dCe=dDd9 e;j<dEdFdGdH e;j<dIdFdJdH e;j<dKe>dLdMd0 e;j<dNe=dOdPd0 e;j<dQdFdRdH e;j<dSdFdTdH e;j<dUe?dVdWd0 e;j<dXe?dVdYd0 e;j<dZe?dVd[d0 e;j<d\e?dVd]d0 e;j<d^dFd_dH e;j<d`dFdadH e;j<dbdFdcdH e; @¡ ZAe8eAƒ dS dS )du¬  eval.py â€” Evaluate a trained PARA checkpoint in the LIBERO simulation environment.

PARA predicts the next N_WINDOW absolute EEF 3D positions from a single RGB image.
This script runs closed-loop rollouts: at each env step, re-run the model on the
current observation, decode the first predicted position into a delta OSC_POSE action,
and step the sim. Success is the LIBERO binary predicate check (env returns done=True).

Usage:
    python libero/eval.py         --checkpoint libero/checkpoints/para_libero_spatial_t0/best.pth         --benchmark libero_spatial         --task_id 0         --n_episodes 20

Action format: 7D OSC_POSE [delta_pos (3), delta_rot (3)=0, gripper (1)]
é    N)ÚPath)Útqdm)ÚTrajectoryHeatmapPredictorÚN_HEIGHT_BINSÚN_GRIPPER_BINSÚ
N_ROT_BINSÚ	PRED_SIZE)Ú*recover_3d_from_direct_keypoint_and_heightc                 C   s.  ddl m} | j |¡}| jj|  ¡ }| jj|  dd¡}|dd…df  }d}	|	|d  |d d  }
||
|  }t	j
 || ¡}|| | }t	 g d¢¡}tt	 ||¡ƒd	krat	 g d
¢¡}t	 ||¡}|t	j
 |¡ }t	 ||¡}t	 |¡}t	 |¡}t	 |¡t	 |¡ | t	 |¡t	 |¡ |  t	 |¡|  }|||  }|| }|t	j
 |¡d  }| }t	 g d¢¡}tt	 ||¡ƒd	krÍt	 g d¢¡}t	 ||¡}|t	j
 |¡d  }t	 ||¡}t	j|||gdd}| |¡ ¡ }t	 |d |d |d |d g¡}|| jj|< || jj|< |  ¡  dS )zÍReposition camera on a spherical cap around its default position.

    Matches the viewpoint generation script's camera positioning logic.
    theta=0, phi=0 means default camera position (no change).
    r   ©ÚRotationé   Né   çÍÌÌÌÌÌì?ç:Œ0âŽyE>)r   r   ç      ð?g®Gáz®ï?)é   r   ç        gê-™—q=)r   r   r   )r   r   r   éÿÿÿÿ©Úaxisr   )Úscipy.spatial.transformr   ÚmodelÚcamera_name2idÚdataÚcam_xposÚcopyÚcam_xmatÚreshapeÚnpÚlinalgÚnormÚarrayÚabsÚdotÚcrossÚradiansÚsinÚcosÚstackZfrom_matrixZas_quatÚcam_posÚcam_quatÚforward)ÚsimÚcamera_nameZ	theta_degZphi_degÚScipyRÚcam_idZdefault_posr   r+   ZTABLE_ZZt_hitZlook_atÚradiusZdefault_dirÚupÚrightZtrue_upÚthetaÚphiÚoffsetÚnew_posÚfwdZcam_zZup_hintZcam_xZcam_yÚRÚqZnew_quat© r:   ú3/data/cameron/para_normalized_losses/libero/eval.pyÚ_reposition_camera&   sN   

ÿþ"r<   c           
      C   sò   | dkrt S | dkrddlm} |S | dkrddlm} |S | dkr*ddlm} |S | d	kr6dd
lm} |S | dkrBddl	m
} |S | dkrNddlm} |S | dkrZddlm} |S | dkrfddlm} |S | dkrrddlm}	 |	S td| › ƒ‚)NÚparaÚactr   )ÚACTPredictorÚda3)ÚDA3PredictorÚmoge)ÚMoGePredictorÚdino_vla)ÚDinoVLAPredictorÚinternvl)ÚInternVLAPredictorÚinternvl_act)ÚInternVLACTPredictorÚdual_da3)ÚDualDA3PredictorÚ	dual_para)ÚDualParaPredictorÚcost_volume)ÚCostVolumePredictorzUnknown model_type: )r   Z	model_actr?   Z	model_da3rA   Z
model_mogerC   Zmodel_dino_vlarE   Zmodel_vla_internvlrG   Zmodel_vla_internvl_actrI   Zmodel_dual_da3rK   Zmodel_dual_pararM   Zmodel_cost_volumerO   Ú
ValueError)
Ú
model_typer?   rA   rC   rE   rG   rI   rK   rM   rO   r:   r:   r;   Úget_model_class\   s<   rR   )Ú	benchmarkÚget_libero_path)ÚOffScreenRenderEnv)Úget_camera_transform_matrixÚget_camera_extrinsic_matrixÚget_camera_intrinsic_matrixÚ#project_points_from_world_to_camera)g
×£p=
ß?gÉv¾Ÿ/Ý?g–C‹lçûÙ?©Údtype)gZd;ßOÍ?gyé&1¬Ì?gÍÌÌÌÌÌÌ?iÀ  c                 C   sb   |   tj¡d }t |¡ ¡ }tj|||ftjd}|t t	 }t
 | ddd¡¡ ¡  d¡}|S )uà   HxWx3 uint8 â†’ (1, 3, H, W) float tensor, ImageNet-normalized.

    LIBERO obs images are already upright (already flipped vs raw render),
    so we flipud to match training convention (flipud(obs) â†’ training image).
    ç     ào@©Úinterpolationr   r   r   )Úastyper   Úfloat32Úflipudr   Úcv2ÚresizeÚINTER_LINEARÚIMAGENET_MEANÚIMAGENET_STDÚtorchÚ
from_numpyÚ	transposeÚfloatÚ	unsqueeze)Úrgb_obsÚ
image_sizeÚimgÚtensorr:   r:   r;   Úpreprocess_obs   s   rp   c                 C   sx   t | |||ƒ}t| |ƒ}t| |||ƒ}|d  |  < |d  |  < | ¡ }|d  |9  < |d  |9  < |||fS )uª  Return camera matrices needed for projection and 3D recovery.

    - world_to_camera: (4,4) worldâ†’camera transform  â†’ for project_points_from_world_to_camera
    - camera_pose:     (4,4) cameraâ†’world transform   â†’ for recover_3d (ray unprojection)
    - cam_K:           (3,3) intrinsic at image_size  â†’ for recover_3d
    These are two different matrices; using the wrong one for 3D recovery gives bad targets.
    r   r   )rV   rW   rX   r   )r,   r-   rm   Úworld_to_cameraÚcamera_poseZ
cam_K_normÚcam_Kr:   r:   r;   Úget_camera_params   s   

rt   c                 C   sP   t |  dd¡ tj¡|||dd }t|d ƒ}t|d ƒ}tj||gtjdS )uQ   Project current EEF world position â†’ (u, v) pixel in training image convention.r   r   )ÚpointsZworld_to_camera_transformZcamera_heightZcamera_widthr   rZ   )	rY   r   r_   r   Úfloat64rj   rg   ro   r`   )Úeef_posrq   rm   Úpix_rcÚuÚvr:   r:   r;   Úeef_to_start_kp°   s   üûr{   c              
   C   s4  |j d }|| }	|  tj¡d }
t |
¡ ¡ }
tj|
||ftjd}
|d }t	j
| d¡dd |j ¡}|jddd  ¡  ¡ }tj|||ftjd}|| ¡  | ¡ d  }t |
¡}||d< t |
d	 |d
  dd¡}|d  tj¡}| ¡ }|| || }}t|d |	 ƒ}t|d |	 ƒ}t |||fdtjddtj¡ t| dd¡ tj¡|||ƒd }ttt|d ƒƒƒttt|d ƒƒƒ}}d|  krÒ|k rën nd|  krÞ|k rën nt |||fddd¡ d|› }|durü||rùdnd7 }t ||dtjd	ddtj¡ t ||dtjd	ddtj¡ |S )zPRender a single eval step: RGB + heatmap overlay + predicted pixel + GT EEF dot.r   r\   r]   ©r   r   r   ©Údimr   ©.r   çš™™™™™á?çÍÌÌÌÌÌÜ?r   ç      à?©r   éÿ   r   é   r   r   é   ©r„   r„   r„   ústep Nz	  SUCCESSz	  running©é
   é   ©é   r   r   )Úshaper_   r   r`   ra   r   rb   rc   rd   ÚFÚsoftmaxr   ÚmaxÚcpuÚnumpyÚminÚ
zeros_likeÚclipÚuint8ÚargmaxÚintÚ
drawMarkerÚMARKER_CROSSÚLINE_AArY   rv   Úroundrj   ÚcircleÚputTextÚFONT_HERSHEY_SIMPLEX)rl   Úvolume_logitsÚcurrent_eef_posrq   rs   rm   Ústep_idxÚsuccessÚ	pred_sizeÚscaleÚframeÚvol_tÚ	vol_probsÚ
heat_smallÚheatÚheat_rgbÚoverlayÚvisÚflat_idxÚpyÚpxÚpx_fullÚpy_fullrx   ry   rz   Úlabelr:   r:   r;   Úrender_eval_frame½   sD   

þý*0
rµ   c                 C   sV  |j d }|j d }|| }	|  tj¡d }
t |
¡ ¡ }
tj|
||ftjd}
t	| 
dd¡ tj¡|||ƒd }ttt|d ƒƒƒttt|d ƒƒƒ}}g }t|ƒD ]Ï}|d|f }tj| 
d¡dd 
|j ¡}|jddd  ¡  ¡ }tj|||ftjd}|| ¡  | ¡ d  }t |
¡}||d	< t |
d
 |d  dd¡}|d  tj¡}| ¡ }|| || }}t|d |	 ƒ}t|d |	 ƒ}t |||fdtjddtj¡ d|  krá|k rún nd|  krí|k rún nt |||fddd¡ d|› d|› }t ||dtjdddtj¡ t ||dtjdddtj¡ |  |¡ qTtj!|ddS )a
  Render a horizontal strip showing heatmaps for all N_WINDOW predicted timesteps.

    Each tile: RGB + heatmap overlay (red) + predicted pixel (green cross) + GT EEF (white dot).
    Returns a single wide image: (image_size, image_size * n_window, 3) uint8 RGB.
    r   r   r\   r]   r   r   r}   r   r   r€   r   r‚   rƒ   é   r   é   r‡   rˆ   z t+)é   é   gš™™™™™Ù?rŒ   r   )"rŽ   r_   r   r`   ra   r   rb   rc   rd   rY   r   rv   r™   r   rj   Úranger   r   r‘   r’   r“   r”   r•   r–   r—   r˜   rš   r›   rœ   rž   rŸ   r    ÚappendÚconcatenate)rl   r¡   r¢   rq   rs   rm   r£   Ún_windowr¥   r¦   r§   rx   Zeef_uZeef_vÚtilesÚtr¨   r©   rª   r«   r¬   r­   r®   r¯   r°   r±   r²   r³   r´   r:   r:   r;   Úrender_window_stripí   sH   

þý*
0rÀ   çš™™™™™©?c	           3         sØ  ddl m}	 d}
d}| jd }| jd }|| }tjtj}}tjtj}}tj	tj
tjd‰tj	tjtjd‰ g }g }t|ƒD ]:‰| dˆf }|jddd }| d¡ ¡  ¡ }|| }|| }|d	d	…||f  ¡  ¡ }| ||f¡ | |¡ q@tjd
d„ |D ƒtj|jd d¡}tj|tj|jd d¡}t ¡ $ t|dƒr³|jdkr³| |||¡\}‰n| ||¡\}‰W d	  ƒ n1 sÅw   Y  g }g }| ¡ } t|ƒD ]\‰\}}| dˆf }|d | }!|d | }"|d	d	…||f  ¡  ¡ }|ttd dƒ ||  | }#t tj	|!|"gtjd|#||ƒ}$|$d	u r*|r&|d n|  ¡ }$| |$¡ |$|  }%tj! "|%¡}&|&|krD|%|& | }%t #|%|
 dd¡}'tj	tj$tjd}(|	 %|(¡})ˆd	u ritj&dtjd}*nLˆ '¡ dkr‡ˆdˆf  (¡  )¡  *tj¡}+|+ˆ ˆ  ˆ },nt 	‡ ‡‡‡fdd„tdƒD ƒ¡},|)|	 +|,¡ }-|	 %|¡}.|-|. ,¡  }/t #|/ -¡ | dd¡}*t.|dˆf  ¡  ¡ ƒ}0|0dkrÈdnd}1tj&dtjd}2|'|2d	d…< |*|2dd…< |1|2d< | |2¡ qÖ||fS )uá  Decode all N_WINDOW predicted timesteps into OSC_POSE delta actions.

    Gripper/rotation are predicted by indexing features at the argmax pixel of each
    timestep and passing through the model's MLP heads (same as training inference path).

    Args:
        volume_logits:   (1, N_WINDOW, N_HEIGHT_BINS, pred_size, pred_size)
        model:           TrajectoryHeatmapPredictor (for predict_at_pixels)
        feats:           (1, D, pred_size, pred_size) feature map from forward()
        camera_pose:     (4,4) cameraâ†’world extrinsic  â† get_camera_extrinsic_matrix()
        cam_K:           (3,3) intrinsic at image_size
        current_eef_pos: (3,) numpy EEF position at start of window
        max_delta:       max position delta magnitude in metres before OSC normalisation

    Returns:
        actions:         list of N_WINDOW (7,) numpy [delta_pos(3), delta_rot_axisangle(3), gripper(1)]
        pred_3d_targets: list of N_WINDOW (3,) absolute EEF targets (for debug)
    r   r
   rÁ   r‚   r   r   rZ   r}   Nc                 S   s   g | ]\}}||g‘qS r:   r:   )Ú.0r±   r°   r:   r:   r;   Ú
<listcomp>T  s    z)decode_window_actions.<locals>.<listcomp>©r[   ÚdeviceZgripper_mlprN   ç      ð¿r   r   c                    sN   g | ]#}ˆd ˆ|dd…f   ¡  ¡ ttd dƒ ˆ | ˆ|   ˆ|  ‘qS ©r   Nr   ©r˜   Úitemr‘   r   ©rÂ   r   ©Úmax_rÚmin_rZrotation_logitsr¿   r:   r;   rÃ   „  ó    (þÿÿÿé   r†   )/r   r   rŽ   Úmodel_moduleÚ
MIN_HEIGHTÚ
MAX_HEIGHTÚMIN_GRIPPERÚMAX_GRIPPERr   r!   ÚMIN_ROTrv   ÚMAX_ROTrº   r‘   r   r˜   rÉ   r»   rg   ro   r`   rÅ   rk   ÚlongÚno_gradÚhasattrrQ   Úpredict_at_pixelsr   Ú	enumerater   r	   r   r    r–   ÚREF_ROTATION_QUATÚ	from_quatÚzerosr~   r’   r“   r_   Zfrom_rotvecÚinvÚ	as_rotvecr™   )3r¡   r   Úfeatsrr   rs   r¢   Úcurrent_eef_quatrm   Ú	max_deltar.   ÚOSC_POS_SCALEÚOSC_ROT_SCALEr½   r¥   r¦   Úmin_hÚmax_hÚmin_gÚmax_gZpred_px_listZpred_hbin_listr¨   Ú
max_over_hr¯   r°   r±   Úh_binÚpred_pixelsZ
pred_hbinsZgripper_logitsÚactionsÚpred_3d_targetsÚref_posr²   r³   ÚheightÚpred_3dÚ	delta_posr    Ú
delta_normÚref_quatZR_refÚdelta_rot_normZrot_sigmoidZdelta_rotvec_predÚR_predÚ	R_currentÚR_deltaÚg_classÚgripper_cmdÚactionr:   rË   r;   Údecode_window_actions%  s–   

ÿþ
€ü	ÿ




ý
rü   c           E   	      s  ddl m} d}d}| jd }| jd }|| }tjtj}}tjtjtj	d‰tjtj
tj	d‰ g }g }|
 ¡ }t|ƒD ]Ê}| d|f }|d|f }|jddd }| d¡ ¡  ¡ }|| }|| }|d | } |d | }!|d	d	…||f  ¡  ¡ }"|"ttd dƒ ||  | }#ttj| |!gtj	d|#||ƒ}$d
}%|$d	urÜt|$ dd¡|	||ƒd }&t|&d ƒt|&d ƒ}'}(d})|)|'  krÊ||) k rÜn n|)|(  krØ||) k rÜn nd}%|%ré|}*|}+|},d}-|}.n
|}*|}+|},d}-|}.|*jddd }/|/ d¡ ¡  ¡ }0|0| }1|0| }2|2d | }3|1d | }4|*d	d	…|1|2f  ¡  ¡ }5|5ttd dƒ ||  | }6ttj|3|4gtj	d|6|+|,ƒ}7|7d	u rZ|$d	urO|$n
|rV|d n| ¡ }7| |7¡ |7| }8tj |8¡}9|9|krt|8|9 | }8t |8| dd¡}:tj|2|1ggtj|.jd d¡};t ¡  |j|.|;|-d\}<‰W d	  ƒ n	1 s§w   Y  t ‡ ‡‡fdd„tdƒD ƒ¡}=|  d|=¡}>| !|¡}?|>|? "¡  }@t |@ #¡ | dd¡}At$|<d  ¡  ¡ ƒ}B|Bdkrédnd}Ctj%dtjd}D|:|Dd	d…< |A|Ddd…< |C|Dd< | |D¡ q=||fS )a/  Decode dual-camera predictions with agentview-guided wrist selection.

    Strategy: use agentview for coarse 3D prediction, then check if that 3D point
    projects into the wrist camera frustum. If yes, use wrist view's prediction
    (more precise at close range). If no, fall back to agentview.
    r   r
   rÁ   r‚   r   r   rZ   r}   NFr   r   TZwristÚagentrÆ   r   rÄ   )Ú	view_namec                    sN   g | ]#}ˆd d |dd…f   ¡  ¡ ttd dƒ ˆ | ˆ|   ˆ|  ‘qS rÇ   rÈ   rÊ   ©rÌ   rÍ   Z
rot_logitsr:   r;   rÃ   ü  rÎ   z.decode_dual_window_actions.<locals>.<listcomp>Úxyzr|   rÏ   r†   )&r   r   rŽ   rÐ   rÑ   rÒ   r   r!   rÕ   rv   rÖ   r   rº   r‘   r   r˜   rÉ   r   r	   rY   rj   r»   r   r    r–   rg   ro   r`   rÅ   rk   rØ   rÚ   Ú
from_eulerrÝ   rß   rà   r™   rÞ   )EZ	agent_volZ	wrist_volr   Úagent_featsÚwrist_featsÚagent_cam_poseZagent_cam_KÚwrist_cam_poseÚwrist_cam_KÚ	wrist_w2cr¢   râ   rm   rã   r.   rä   rå   r½   r¥   r¦   ræ   rç   rí   rî   rï   r¿   Za_vol_tZw_vol_tZa_max_over_hÚa_flatZa_pyZa_pxZ	a_px_fullZ	a_py_fullZa_h_binZa_heightZagent_3dZ	use_wristZwrist_pix_rcZwuZwvÚmarginr¨   Zcam_posers   rþ   rá   rê   r¯   r°   r±   r²   r³   rë   rð   rñ   rò   r    ró   rì   Zgrip_logitsÚ
euler_predrö   r÷   rø   rõ   rù   rú   rû   r:   rÿ   r;   Údecode_dual_window_actions›  s¬   


ÿÿþ8ÿ
$

 
ÿý
r  c           "      C   sä  ddl m} d}d}tjtjtjd}tjtjtjd}	tjtjtjd}
tjtj	tjd}t
tjƒ}t
tjƒ}| jd }g }| ¡ }t|ƒD ]¨}| d|f  ¡  ¡  tj¡}||	|  | }|d|f  ¡  ¡  tj¡}|||
  |
 }t
|d|f  ¡ ƒ}|||  | }|| }tj |¡}|dkr›|| d }t || dd¡}| d	|¡}| |¡}|| ¡  }t | ¡ | dd¡}t
|d|f  ¡ ƒ}|d
krÐdnd} tjdtjd}!||!dd…< ||!dd…< | |!d< | |!¡ qG|S )ad  Decode ACT normalized [0,1] predictions into OSC_POSE delta actions.

    Model outputs are in [0,1] (sigmoid). We denormalize using dataset min/max,
    then compute deltas for the OSC_POSE controller.

    Args:
        pos_pred:     (1, N_WINDOW, 3) normalized [0,1] positions (tensor)
        rot_pred:     (1, N_WINDOW, 3) normalized [0,1] rotations (tensor)
        gripper_pred: (1, N_WINDOW) normalized [0,1] gripper (tensor)
        current_eef_pos:  (3,) numpy
        current_eef_quat: (4,) numpy

    Returns:
        actions: list of N_WINDOW (7,) numpy [delta_pos(3), delta_rot(3), gripper(1)]
    r   r
   rÁ   r‚   rZ   r   rÆ   r   r   r   rÏ   Nr   r†   )r   r   r   r!   rÐ   ÚMIN_POSrv   ÚMAX_POSrÕ   rÖ   rj   rÓ   rÔ   rŽ   r   rº   r’   r“   r_   r   r    r–   r  rÝ   rß   rà   rÞ   r`   r»   )"Úpos_predÚrot_predÚgripper_predr¢   râ   r.   rä   rå   Úmin_posÚmax_posÚmin_rotÚmax_rotrè   ré   r½   rí   rï   r¿   Zpos_normrñ   Zrot_normr
  Úg_normZgripper_valrò   r    ró   rö   r÷   rø   Z	delta_rotZg_logitrú   rû   r:   r:   r;   Údecode_act_actions  sH   



r  c           o         s¢  t  t j ¡ r	dn	t jj ¡ rdnd¡}td|› ƒ t| jƒ}| 	¡ s+t
d|› ƒ‚t j|dd}t| dtj¡ƒt_t| dtj¡ƒt_t| d	tj¡ƒt_t| d
tj¡ƒt_d|v rh|d t_|d t_d|v rv|d t_|d t_d|v r|d t_tdtjd›dtjd›dƒ tdtjd›dtjd›dƒ tddd„ tjD ƒ› ddd„ tjD ƒ› ƒ tddd„ tjD ƒ› ƒ tddd„ tjD ƒ› ddd„ tjD ƒ› ƒ t| jƒ}| jdkrç|ttd}n| jdv rô|t| jd }n|td!}|j|d" d#d$ | |¡}| ¡  td%| j› d&|› ƒ t  !¡ | j" ƒ }| #| j$¡}|j%}td'| j"› d(|› ƒ t&j' (t)d)ƒ| *| j$¡¡}	t+ ,|	d*¡‰ t-d+d„ ˆ d,  .¡ D ƒƒ}
‡ fd-d„|
D ƒ}W d   ƒ n	1 sgw   Y  t/| j0t1|ƒƒ}td.|› d/t1|ƒ› d0ƒ t&j' (t)d1ƒ|j2|j3¡}t4|tt| j5g| jd2v rd3gng  d4}| 6| j6¡ | 7¡  | j8r|j9j:}d5D ]!}z|j; <|¡}t= >g d6¢¡|j;j?|< W q¶ t@y×   Y q¶w | A¡  g d7¢}tBƒ }|D ]}z| C|j; <|¡¡ W qå t@yþ   Y qåw tD|j;jEƒD ]}|j;jF| |v rd8|j;jG| d9< qtd:ƒ td;ƒ d }| jd<v rbt&j' (| jH| j"d=| j$› d>¡}t&j' 	|¡rRt j||d Id?¡}td@|› ƒ ntdA|› dBƒ t jJdCdD|dE}d }| jdv rz| KdFdG¡g}tdH|d? › ƒ g }g }tLtD|ƒdIdJD ]}| 7¡  | j8rõ|j9j:}d5D ]}zt= >g d6¢¡|j;j?|j; <|¡< W q— t@y¶   Y q—w | A¡  tBƒ }d7D ]}z| C|j; <|¡¡ W qÀ t@yÙ   Y qÀw tD|j;jEƒD ]}|j;jF| |v ród8|j;jG| d9< qà||  M¡ }| jNd?ks| jOd?kr9dKD ]} | dC }!||!  | jN7  < ||!dC   | jO7  < q	dLD ]} | dC }!g dM¢||!|!d9 …< q'| P|¡}"tDdNƒD ]}#| Qt=jJdOt=jRdP¡\}"}#}#}#qB| jSd?ksa| jTd?krltU|j:| j5| jS| jTƒ tV|j:| j5tƒ\}$}%}&d#}'d#}(| jWrg nd })g }*d?}+d?},dQ}-|+| jXk r¥|'s¥t=j>|"dR t=jYdP}.|"| j5› dS }/tZ|.|$tƒ |¡}0t[|/tƒ |¡}1t=j>|"dT t=jYdP}2d }3| jdUv r{t=j>tjt=jYdP}4t=j>tjt=jYdP}5t j\|.|4 |5|4 dV  t jR|dW ]d?dC¡ Id?¡}6t|" dXd?d?g¡d? ƒ}7t j\|7tj tjtj dV  gt jR|dW ]d?dC¡ Id?¡}8i }9|d ur&||9dY< |d ur/||9dZ< t  ^¡  ||1|0f|6|8d[œ|9¤Ž\}:};}<W d   ƒ n	1 sNw   Y  t_|:|;|<|.|2ƒ}=|:d?  `¡  a¡ |5|4  |4 ‰‡fd\d„tDˆjbd? ƒD ƒ}3d }>n?| jd]v rÓ|"d^ }?t[|?tƒ |¡}@tV|j:d3tƒ\}A}B}Ct  ^¡  ||1|@|0d_}DW d   ƒ n	1 s®w   Y  tc|Dd` |Dda ||Ddb |Ddc |%|&|B|C|A|.|2tdd\}=}#|Dd` }>nç| jdekrv|"d^ }?t[|?tƒ |¡}@tV|j:d3tƒ\}E}F}G|& M¡ }H|Hd?  t  < |HdC  t  < |G M¡ }I|Id?  t  < |IdC  t  < t  ^¡ C ||1|@|0t  d|%¡ ¡  Id?¡ |¡t  d|H¡ ¡  Id?¡ |¡t  d|F¡ ¡  Id?¡ |¡t  d|I¡ ¡  Id?¡ |¡df\}>}#}#}JW d   ƒ n	1 sbw   Y  te|>||J|%|&|.|2tdd\}=}3nDi }K|d ur||KdY< |d urŠ||KdZ< t  ^¡  ||1|0fi |K¤Ž\}>}#}#}JW d   ƒ n	1 s§w   Y  te|>||J|%|&|.|2tdd\}=}3tf| dgd#ƒ}L|LrÚ|>d urÚtg|/|>|.|$|&t|+ƒ}M|* h|,|+|Mf¡ |,dC7 },ti|=ƒD ]¸\}N}O| jjrñd8|Od9dh…< |)d ur_|>d ur|) htk|"| j5› dS |>|.|$|&t|+d di¡ nN|"| j5› dS  lt=jR¡dj }Pt= m|P¡ M¡ }Ptnjo|Pttftnjpdk}P|Pdj  lt=jq¡}Qdl|+› }Rtn r|Q|Rdmtnjsdndodptnjt¡ tn r|Q|RdmtnjsdndqdCtnjt¡ |) h|Q¡ | jur|3d ur|3|N  lt=jY¡}S|Od9dh…  M¡ }T|Odh }Ud?}Vdr}Wds}X|V|Wk rét=j>|"dR t=jYdP}Y|S|Y }Zt=jv w|Z¡}[|[|Xk r¡nHt= x|Zdt dQdu¡}\t=jJdOt=jRdP}]|\|]d d9…< | jjsÂ|T|]d9dh…< |-|]dh< | Q|]¡\}"}#}'}#|+dC7 }+|VdC7 }V|'rÝdv}(n|+| jXkrän|V|Wk s‡|'s|+| jXk rt=jJdOt=jRdP}^|U|^dh< | Q|^¡\}"}#}'}#|+dC7 }+|U}-n}| jyrH|O M¡ }_|-|_dh< | Q|_¡\}"}#}'}#|+dC7 }+|'r.dv}( nn|+| jXkr6 nf| Q|O¡\}"}#}'}#|Odh }-|+dC7 }+nC| jzrz| Q|O¡\}"}#}'}#|Odh }-|+dC7 }+|'rddv}( n8|+| jXkrl n0| Q|O¡\}"}#}'}#|+dC7 }+n| Q|O¡\}"}#}'}#|Odh }-|+dC7 }+|'r’dv}( n
|+| jXkrš nqâ|+| jXk r¥|'r”|)r/|>d urÊtk|"| j5› dS |>t=j>|"dR t=jYdP|$|&t|+|(di|)dw< n|(rÏdxndy}`tn r|)dw |`dztnjsd{|(ràd|nd}dptnjt¡ t| j{ƒd~ d=| j$›  }a|aj|dvdvd |ad€|d›dF|(rd‚ndƒ› d„ }btn }t~|bƒtnjd…Ž | j€ttf¡}c|)D ]‰ |c tn ‚ˆ tnjƒ¡¡ q|c „¡  |*ry|(r7d‚ndƒ}dt| j{ƒd† d=| j$›  d€|d›dF|d›  }e|ej|dvdvd |*D ] \}f}g}h|ed‡|fd›dˆ|gd‰›dŠ }itn …t~|iƒtn ‚|htnjƒ¡¡ qX| ht|(ƒ¡ | h|+dC ¡ tL d‹|dC dŒ›d|(r•dŽnd› d|+dC › ¡ q†| †¡  tt= ‡|¡ƒ}jtt= ‡|¡ƒ}ktd‘d’› ƒ td“| j"› ƒ td”| j$› d•|› ƒ td–|› ƒ td—tˆt‰|ƒƒ› d/|› ƒ td˜|jd™ dš›d›ƒ tdœ|kdš›d/| jX› ƒ td’› ƒ | j"| j$|t~|ƒ||j|||k| jXdtdœ}lt| j{ƒ}m|mj|dvdvd |mdž| j"› dŸ| j$› d  }ntŠ|nd¡ƒ‰ t‹jŒ|lˆ dpd¢ W d   ƒ n	1 	sCw   Y  td£|n› ƒ |lS )¤NÚcudaÚmpsr’   zDevice: zCheckpoint not found: )Úmap_locationZ
min_heightÚ
max_heightZmin_gripperZmax_gripperr  r  r  r  Zref_rotation_quatzHeight  range: [ú.4fz, ú]zGripper range: [zRot     range: c                 S   ó   g | ]}|d ›‘qS ©z.3fr:   ©rÂ   rz   r:   r:   r;   rÃ   x  ó    zrun_eval.<locals>.<listcomp>z .. zRef rot:       c                 S   r  )r  r:   r  r:   r:   r;   rÃ   y  r   zPos     range: c                 S   r  r  r:   r  r:   r:   r;   rÃ   z  r   r=   )Útarget_sizer¥   )rF   rH   )r!  Ú
model_name)r!  Zmodel_state_dictF)ÚstrictzLoaded model (z) from zTask: [z] ÚdatasetsÚrc                 S   s   g | ]	}|  d ¡r|‘qS )Zdemo_)Ú
startswith©rÂ   Úkr:   r:   r;   rÃ   ‘  s    r   c                    s    g | ]}ˆ d |› d d ‘qS )zdata/z/statesr   r:   r'  )Úfr:   r;   rÃ   ’  s     zRunning z / z episodes...Ú
bddl_files)rJ   rL   rN   Úrobot0_eye_in_hand)Úbddl_file_nameÚcamera_heightsÚcamera_widthsÚcamera_names)Zwooden_cabinet_1_mainZflat_stove_1_main)r   r   g      À)Zakita_black_bowl_2_mainZcookies_1_mainZ#glazed_rim_porcelain_ramekin_1_mainr   r   u6   âœ“ Clean scene: distractors hidden, furniture removedzEnvironment ready.)rD   r>   Ztask_z_clip.ptr   zLoaded CLIP embedding: z%WARNING: CLIP embedding not found at z, using zerosr   i   )rÅ   Ú_ú zTask text for VLA: ZEpisodes)Údesc)é	   é%   )r¹   é   é   )ç      $@r7  r   r·   rÏ   rZ   rÆ   Úrobot0_eef_posÚ_imageÚrobot0_eef_quat)r>   rH   r   rÄ   Úrobot0_gripper_qposÚclip_embeddingZ	task_text)r¢   Zcurrent_gripperc                    s   g | ]}ˆ | ‘qS r:   r:   )rÂ   r¿   )Ú
pos_denormr:   r;   rÃ   /  r   )rJ   rL   Zrobot0_eye_in_hand_image)Ústart_keypoint_2dZagent_volumeZwrist_volumer  r  )rm   rN   )r>  r  Zagent_cam_K_normr  Zwrist_cam_K_normÚsave_visr†   )r¤   r\   r]   rˆ   r‰   r€   r‡   r   rŒ   é   g{®Gázt?rÁ   r   Tr   ÚSUCCESSÚFAILURE)rŠ   é,   gffffffæ?rƒ   )r   r   r„   Úvideos)ÚparentsÚexist_okÚepZ03dr¤   Úfailz.mp4Zmp4vr®   ZreplanÚ_stepZ04dz.pngz  Ep Ú3dz: u   âœ“ SUCCESSu   âœ— FAILUREz  steps=Ú
z4====================================================z  Benchmark:    z  Task z:      z  Episodes:     z  Successes:    z  Success Rate: éd   z.1fú%z  Avg steps:    )rS   Útask_idÚ	task_nameÚ
checkpointÚ
n_episodesÚsuccess_rateÚ	successesÚstep_countsÚ	avg_stepsÚ	max_stepsrã   Úeval_Ú_taskz.jsonÚw)Úindentu   Results saved â†’ )rg   rÅ   r  Úis_availableÚbackendsr  Úprintr   rP  ÚexistsÚFileNotFoundErrorÚloadrj   ÚgetrÐ   rÑ   rÒ   rÓ   rÔ   rÕ   rÖ   r  r  rÜ   rR   rQ   Ú
IMAGE_SIZEr   r"  Úload_state_dictÚtoÚevalÚbmÚget_benchmark_dictrS   Úget_taskrN  ÚnameÚosÚpathÚjoinrT   Úget_task_demonstrationÚh5pyÚFileÚsortedÚkeysr”   rQ  ÚlenÚproblem_folderÚ	bddl_filerU   ÚcameraÚseedÚresetZclean_sceneÚenvr,   r   Úbody_name2idr   r!   Úbody_posÚ	Exceptionr+   ÚsetÚaddrº   ÚngeomÚgeom_bodyidÚ	geom_rgbaZclip_embeddings_dirrk   rÞ   Úreplacer   r   Zshift_dxZshift_dyÚset_init_stateÚstepr`   Z	cam_thetaZcam_phir<   rt   Z
save_videorV  rv   r{   rp   ro   ÚclamprØ   r  r’   r“   rŽ   r  rh   rü   ÚgetattrrÀ   r»   rÛ   Zzero_rotationrµ   r_   ra   rb   rc   rd   r—   rŸ   r    rœ   Zteleportr   r    r–   Zmove_then_gripZduplicate_actionsÚout_dirÚmkdirÚVideoWriterÚstrÚVideoWriter_fourccZ	video_fpsÚwriteÚcvtColorÚCOLOR_RGB2BGRÚreleaseÚimwriteÚcloseÚmeanr™   ÚsumÚopenÚjsonÚdump)oÚargsrÅ   Z	ckpt_pathZckptZ
ModelClassr   ÚbenchÚtaskrO  Ú	demo_pathZ	demo_keysÚinit_statesrQ  rt  rx  r,   ÚfnameÚbidZdistractor_namesZdistractor_bodiesÚdnameÚgeom_idr<  Ú	clip_pathZtask_text_for_evalrS  rT  Zep_idxZdist_bodiesÚdnÚgidÚ
init_stateÚqpsÚsiÚobsr0  rq   rr   rs   Údoner¤   ÚframesZ
vis_stripsr£   Z
replan_idxZcurrent_gripper_cmdr¢   rl   Zstart_kpZ
img_tensorrâ   rî   r  r  Zeef_normZ
grip_stateZ	grip_normZ	act_extrar  r  r  Zwindow_actionsr¡   Z	wrist_obsZwrist_tensorr  r  r  ÚoutZwrist_w2c_matZwrist_cam_pose_matZwrist_cam_K_matZagent_K_normZwrist_K_normrá   Úextra_kwargsr?  Ústripr¿   rû   r§   r®   r´   Ú
target_posZpred_rot_deltaZnew_gripperZservo_stepsZ	max_servoÚ	thresholdÚcur_posÚdeltaÚdistZdelta_clippedZservo_actionÚgrip_actionZmove_actionZtag_textZ	video_dirZ
video_pathÚwriterÚtagZvis_dirZreplan_iZstep_iZ	strip_imgZvis_pathrR  rU  Úresultsr†  Zout_pathr:   )r)  r=  r;   Úrun_eval[  s   ý





,,




þüÿÿ€$€
"þýþý


ÿýüÿþ
ÿ
ÿ
ú

ÿ
ûÿý


ÿ
ýþ



þ




í€

ÿ ­ W

ýÿ"ü,ÿÿ
õ
ÿr´  Ú__main__z"Evaluate PARA in LIBERO simulation)Údescriptionz--model_typer=   )r=   r>   r@   rB   rD   rF   rH   rJ   rL   Z
wrist_onlyrN   zModel architecture to evaluate)ÚtypeÚdefaultÚchoicesÚhelpz--model_namezOpenGVLab/InternVL2_5-1Bz4HuggingFace model name (used by internvl model_type))r·  r¸  rº  z--checkpointTzQPath to .pth checkpoint (e.g. libero/checkpoints/para_libero_spatial_t0/best.pth))r·  Úrequiredrº  z--benchmarkÚlibero_spatialzMLIBERO benchmark name (libero_spatial, libero_goal, libero_object, libero_10)z	--task_id)r·  r¸  z--cameraÚ	agentviewz--n_episodesr   z&Number of rollout episodes to evaluatez--max_stepsi,  z(Max env steps per episode before failurez--seedz	--out_dirzlibero/out/evalz--save_videoÚ
store_truez<Save per-episode MP4 with heatmap overlay to out_dir/videos/)rû   rº  z
--save_viszISave per-replan-step visualization strips (all N_WINDOW heatmaps) as PNGsz--video_fpsrŠ   z!FPS for saved videos (default 10)z--clip_embeddings_dirz/data/libero/parsed_liberoz?Directory containing precomputed CLIP embeddings (for dino_vla)z--zero_rotationzPZero out rotation deltas (position-only control, for diagnosing rotation issues)z--clean_scenezBRemove distractors and furniture (match OOD objpos training setup)z
--shift_dxr   z)Shift pick/place objects by dx in world Xz
--shift_dyz)Shift pick/place objects by dy in world Yz--cam_thetaz3Camera viewpoint polar angle from default (degrees)z	--cam_phiz*Camera viewpoint azimuthal angle (degrees)z
--teleportz[Servo to predicted 3D targets with closed-loop control (bypasses open-loop delta execution)z--move_then_gripzFExecute EEF move and gripper as separate steps (move first, then grip)z--duplicate_actionszUExecute each action twice [a1,a1,a2,a2,...] for sanity checking (should match normal))BÚ__doc__Úargparser”  rj  ÚsysÚpathlibr   rb   rn  r“   r   rg   Ztorch.nn.functionalÚnnÚ
functionalr   r   rk  ÚinsertÚdirnameÚ__file__r   rÐ   r   r   r   r   r   Úutilsr	   r<   rR   Úlibero.liberorS   rf  rT   Úlibero.libero.envsrU   Zrobosuite.utils.camera_utilsrV   rW   rX   rY   r!   r`   re   rf   rb  rp   rt   r{   rµ   rÀ   rü   r  r  r´  Ú__name__ÚArgumentParserÚparserÚadd_argumentr‰  r™   rj   Ú
parse_argsr–  r:   r:   r:   r;   Ú<module>   sÖ    6!
ÿ1
ÿ9
ÿy
ýwI   
R
þ
ÿ
ÿ
ÿ
ÿ
ÿÿÿ
ÿ
ÿÿÿ
ÿ
ÿ
ÿ
ÿÿÿÿÒ