o
    Mim                     @   s  d Z ddlZddlZddlZddlZddlmZ ddlZddlZddl	Z
ddlZddlm  mZ ddlmZ ejdeje ddlZddlmZmZmZmZmZ ddlmZ dd Zdd	lm Z!m"Z" dd
l#m$Z$ ddl%m&Z&m'Z'm(Z(m)Z) e
j*g de
j+dZ,e
j*g de
j+dZ-dZ.e.fddZ/e.fddZ0e.fddZ1e.ddfddZ2e.dfddZ3e.dfddZ4dd Z5dd  Z6e7d!krEej8d"d#Z9e9j:d$e;d%g d&d'd( e9j:d)e;d*d+d, e9j:d-e;d.d/d0 e9j:d1e<dd2 e9j:d3e;d4d2 e9j:d5e<d6d7d0 e9j:d8e<d9d:d0 e9j:d;e<dd2 e9j:d<e;d=d2 e9j:d>d?d@dA e9j:dBd?dCdA e9j:dDe<dEdFd0 e9j:dGe;dHdId0 e9= Z>e6e> dS dS )Ju  eval.py — Evaluate a trained PARA checkpoint in the LIBERO simulation environment.

PARA predicts the next N_WINDOW absolute EEF 3D positions from a single RGB image.
This script runs closed-loop rollouts: at each env step, re-run the model on the
current observation, decode the first predicted position into a delta OSC_POSE action,
and step the sim. Success is the LIBERO binary predicate check (env returns done=True).

Usage:
    python libero/eval.py         --checkpoint libero/checkpoints/para_libero_spatial_t0/best.pth         --benchmark libero_spatial         --task_id 0         --n_episodes 20

Action format: 7D OSC_POSE [delta_pos (3), delta_rot (3)=0, gripper (1)]
    N)Path)tqdm)TrajectoryHeatmapPredictorN_HEIGHT_BINSN_GRIPPER_BINS
N_ROT_BINS	PRED_SIZE)*recover_3d_from_direct_keypoint_and_heightc                 C   sz   | dkrt S | dkrddlm} |S | dkrddlm} |S | dkr*ddlm} |S | d	kr6dd
lm} |S t	d|  )Nparaactr   )ACTPredictorda3)DA3Predictormoge)MoGePredictordino_vla)DinoVLAPredictorzUnknown model_type: )
r   Z	model_actr   Z	model_da3r   Z
model_moger   Zmodel_dino_vlar   
ValueError)
model_typer   r   r   r    r   !/data/cameron/para/libero/eval.pyget_model_class&   s   r   )	benchmarkget_libero_path)OffScreenRenderEnv)get_camera_transform_matrixget_camera_extrinsic_matrixget_camera_intrinsic_matrix#project_points_from_world_to_camera)g
ףp=
?gv/?gCl?dtype)gZd;O?gy&1?g?i  c                 C   sb   |  tjd }t| }tj|||ftjd}|t t	 }t
|ddd d}|S )u   HxWx3 uint8 → (1, 3, H, W) float tensor, ImageNet-normalized.

    LIBERO obs images are already upright (already flipped vs raw render),
    so we flipud to match training convention (flipud(obs) → training image).
         o@interpolation   r      )astypenpfloat32flipudcopycv2resizeINTER_LINEARIMAGENET_MEANIMAGENET_STDtorch
from_numpy	transposefloat	unsqueeze)rgb_obs
image_sizeimgtensorr   r   r   preprocess_obsJ   s   r9   c                 C   sx   t | |||}t| |}t| |||}|d  |  < |d  |  < | }|d  |9  < |d  |9  < |||fS )u  Return camera matrices needed for projection and 3D recovery.

    - world_to_camera: (4,4) world→camera transform  → for project_points_from_world_to_camera
    - camera_pose:     (4,4) camera→world transform   → for recover_3d (ray unprojection)
    - cam_K:           (3,3) intrinsic at image_size  → for recover_3d
    These are two different matrices; using the wrong one for 3D recovery gives bad targets.
    r   r%   )r   r   r   r*   )simZcamera_namer6   world_to_cameracamera_poseZ
cam_K_normcam_Kr   r   r   get_camera_paramsX   s   

r>   c                 C   sP   t | ddtj|||dd }t|d }t|d }tj||gtjdS )uQ   Project current EEF world position → (u, v) pixel in training image convention.r%      )pointsZworld_to_camera_transformZcamera_heightZcamera_widthr   r   )	r   reshaper&   r'   float64r3   r0   r8   r(   )Zeef_posr;   r6   pix_rcuvr   r   r   eef_to_start_kpk   s   rF   c              
   C   s4  |j d }|| }	| tjd }
t|
 }
tj|
||ftjd}
|d }t	j
|ddd|j }|jddd   }tj|||ftjd}||  | d  }t|
}||d< t|
d	 |d
  dd}|d tj}| }|| || }}t|d |	 }t|d |	 }t|||fdtjddtj t|ddtj|||d }ttt|d ttt|d }}d|  kr|k rn nd|  kr|k rn nt|||fddd d| }|dur||rdnd7 }t||dtjd	ddtj t||dtjd	ddtj |S )zPRender a single eval step: RGB + heatmap overlay + predicted pixel + GT EEF dot.r!   r"   )r   r   r   dim:0yE>.r   皙??r%         ?r      r      r$   r?      rP   rP   rP   step Nz	  SUCCESSz	  running)
         rX   rX   )shaper&   r'   r(   r)   r*   r+   r,   r-   FsoftmaxrA   maxcpunumpymin
zeros_likeclipuint8argmaxint
drawMarkerMARKER_CROSSLINE_AAr   rB   roundr3   circleputTextFONT_HERSHEY_SIMPLEX)r5   volume_logitscurrent_eef_posr;   r=   r6   step_idxsuccess	pred_sizescaleframevol_t	vol_probs
heat_smallheatheat_rgboverlayvisflat_idxpypxpx_fullpy_fullrC   rD   rE   labelr   r   r   render_eval_framex   sD   

*0
r   c                 C   sV  |j d }|j d }|| }	| tjd }
t|
 }
tj|
||ftjd}
t	|
ddtj|||d }ttt|d ttt|d }}g }t|D ]}|d|f }tj|
ddd
|j }|jddd   }tj|||ftjd}||  | d  }t|
}||d	< t|
d
 |d  dd}|d tj}| }|| || }}t|d |	 }t|d |	 }t|||fdtjddtj d|  kr|k rn nd|  kr|k rn nt|||fddd d| d| }t||dtjdddtj t||dtjdddtj | | qTtj!|ddS )a
  Render a horizontal strip showing heatmaps for all N_WINDOW predicted timesteps.

    Each tile: RGB + heatmap overlay (red) + predicted pixel (green cross) + GT EEF (white dot).
    Returns a single wide image: (image_size, image_size * n_window, 3) uint8 RGB.
    r%   rG   r!   r"   r?   r   rH   rJ   rK   rL   rM   rN   rO      r$      rS   rT   z t+)      g?rW   )axis)"rY   r&   r'   r(   r)   r*   r+   r,   r-   r   rA   rB   rd   rh   r3   rangerZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   re   rf   rg   ri   rj   rk   appendconcatenate)r5   rl   rm   r;   r=   r6   rn   n_windowrp   rq   rr   rC   Zeef_uZeef_vZtilestrs   rt   ru   rv   rw   rx   ry   rz   r{   r|   r}   r~   r   r   r   r   render_window_strip   sH   

*
0r   皙?c	           /         s&  ddl m}	 d}
d}| jd }| jd }|| }tjtj}}tjtj}}tj	tj
tjdtj	tjtjd g }t|D ](| df }|jddd }|d  }|| }|| }|||f q>tjd	d
 |D tj|jdd}t  |||\}W d   n1 sw   Y  g }g }| }t|D ]\\}}| df }|d | }|d | }|dd||f   } | ttd d ||  | }!ttj	||gtjd|!||}"|"du r|r|d n| }"||" |"| }#tj|#}$|$|kr|#|$ | }#t |#|
 dd}%t	 fdd
tdD }&|	!d|&}'|	"|}(|'|(#  })t |)$ | dd}*|dddf   }+|+tt%d d ||  | },t&t |,| ||  d d dd}-tj'dtjd}.|%|.dd< |*|.dd< |-|.d< ||. q||fS )u  Decode all N_WINDOW predicted timesteps into OSC_POSE delta actions.

    Gripper/rotation are predicted by indexing features at the argmax pixel of each
    timestep and passing through the model's MLP heads (same as training inference path).

    Args:
        volume_logits:   (1, N_WINDOW, N_HEIGHT_BINS, pred_size, pred_size)
        model:           TrajectoryHeatmapPredictor (for predict_at_pixels)
        feats:           (1, D, pred_size, pred_size) feature map from forward()
        camera_pose:     (4,4) camera→world extrinsic  ← get_camera_extrinsic_matrix()
        cam_K:           (3,3) intrinsic at image_size
        current_eef_pos: (3,) numpy EEF position at start of window
        max_delta:       max position delta magnitude in metres before OSC normalisation

    Returns:
        actions:         list of N_WINDOW (7,) numpy [delta_pos(3), delta_rot_axisangle(3), gripper(1)]
        pred_3d_targets: list of N_WINDOW (3,) absolute EEF targets (for debug)
    r   Rotationr   rN   r%   rG   r   rH   c                 S   s   g | ]\}}||gqS r   r   ).0r|   r{   r   r   r   
<listcomp>  s    z)decode_window_actions.<locals>.<listcomp>)r    deviceN            ?c                    sN   g | ]#}d |ddf    ttd d  | |   |  qS )r   Nr%   )rc   itemr\   r   )r   r   Zmax_rZmin_rZrotation_logitsr   r   r   r   /  s    (r?   xyzr$      rR   )(scipy.spatial.transformr   rY   model_module
MIN_HEIGHT
MAX_HEIGHTMIN_GRIPPERMAX_GRIPPERr'   arrayMIN_ROTrB   MAX_ROTr   r\   rA   rc   r   r   r0   r8   r(   r   r4   no_gradZpredict_at_pixelsr*   	enumerater   r	   linalgnormra   
from_euler	from_quatinv	as_rotvecr   r3   zeros)/rl   modelfeatsr<   r=   rm   current_eef_quatr6   	max_deltaScipyROSC_POS_SCALEOSC_ROT_SCALEr   rp   rq   Zmin_hZmax_hZmin_gZmax_gZpred_px_listrs   Z
max_over_hrz   r{   r|   Zpred_pixelsZgripper_logitsactionsZpred_3d_targetsref_posr}   r~   Zh_binheightpred_3d	delta_posr   
delta_norm
euler_predR_pred	R_currentR_deltaZdelta_rot_normZg_binZgripper_valgripper_cmdactionr   r   r   decode_window_actions   s|   





&r   c                 C   sN  ddl m} d}d}| jd }g }	| }
t|D ]}| d|f   tj	}||
 }tj
|}|dkr=|| d }t|| dd}|d|f   tj	}|d|}||}||  }t| | dd}t|d|f  }tt|d	 d d
d}tjdtjd}||dd< ||dd< ||d< |	| q|	S )a  Decode ACT direct regression predictions into OSC_POSE delta actions.

    Args:
        pos_pred:     (1, N_WINDOW, 3) absolute 3D EEF positions (tensor)
        rot_pred:     (1, N_WINDOW, 3) euler XYZ rotations (tensor)
        gripper_pred: (1, N_WINDOW) gripper values (tensor)
        current_eef_pos:  (3,) numpy
        current_eef_quat: (4,) numpy

    Returns:
        actions: list of N_WINDOW (7,) numpy [delta_pos(3), delta_rot(3), gripper(1)]
    r   r   r   rN   r%   r   r   r   r$   rG   r   r   Nr?   rR   )r   r   rY   r*   r   r]   r^   r&   r'   rB   r   r   ra   r   r   r   r   r3   r   r(   r   )pos_predrot_predgripper_predrm   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   Z	delta_rotgr   r   r   r   r   decode_act_actionsG  s4   

r   c           =         s	  t t j r	dn	t jj rdnd}td|  t| j}|	 s+t
d| t j|dd}t|dtjt_t|dtjt_t|d	tjt_t|d
tjt_d|v rh|d t_|d t_tdtjddtjdd tdtjddtjdd tddd tjD  ddd tjD   t| j}| jdkr|ttd}n|td}||d  ||}|  td| j d|  t | j  }|| j }|j!}td| j d|  t"j#$t%d|&| j }	t'(|	d t)d d  d! * D }
 fd"d|
D }W d    n	1 sw   Y  t+| j,t-|}td#| d$t-| d% t"j#$t%d&|j.|j/}t0|tt| j1gd'}|2| j2 |3  td( d }| jd)krt"j#$| j4| jd*| j  d+}t"j#	|rt j||d5d,}td-|  ntd.| d/ t j6d0d1|d2}g }g }t7t8|d3d4D ]0}|3  |9|| }t8d5D ]}|:t;j6d6t;j<d7\}}}}qt=|j>| j1t\}}}d8}d8}| j?rg nd }g }d,}d,}|| j@k r|st;jA|d9 t;jBd7}|| j1 d: } tC||t|}!tD| t|}"t;jA|d; t;jBd7}#| jd<krNt E  ||"|!\}$}%}&W d    n	1 s>w   Y  tF|$|%|&||#}'d }(n;i })|d urY||)d=< t E  ||"|!fi |)\}(}}}*W d    n	1 svw   Y  tG|(||*||||#td>\}'}tH| d?d8}+|+r|(d urtI| |(|||t|},|J|||,f |d07 }tK|'D ]@\}-}.|d ur|(d ur|JtL|| j1 d: |(|||t|d d@ |:|.\}}}}|d07 }|rdA} n
|| j@kr nq|| j@k r|r|rh|(d urhtL|| j1 d: |(t;jA|d9 t;jBd7||t||d@|dB< t| jMdC d*| j   }/|/jNdAdAdD |/dE|dFdG|r>dHndI dJ }0tOPtQ|0tOjRdK | jSttf}1|D ] |1TtOU tOjV qV|1W  |r|rpdHndI}2t| jMdL d*| j   dE|dFdG|2  }3|3jNdAdAdD |D ] \}4}5}6|3dM|4dFdN|5dOdP }7tOXtQ|7tOU|6tOjV q|Jt| |J|d0  t7TdQ|d0 dRdS|rdTndU dV|d0   q|Y  tt;Z|}8tt;Z|}9tdWdX  tdY| j  tdZ| j  d[|  td\|  td]t[t\| d$|  td^|8d_ d`da tdb|9d`d$| j@  tdX  | j| j |tQ|||8|||9| j@dcdd}:t| jM};|;jNdAdAdD |;de| j df| j  dg }<t]|<dh t^j_|: didj W d    n	1 s|w   Y  tdk|<  |:S )lNcudampsr]   zDevice: zCheckpoint not found: )map_locationZ
min_heightZ
max_heightZmin_gripperZmax_gripperZmin_rotZmax_rotzHeight  range: [z.4fz, ]zGripper range: [zRot     range: c                 S   s   g | ]}|d qS )z.3fr   )r   rE   r   r   r   r     s    zrun_eval.<locals>.<listcomp>z .. r
   )target_sizerp   )r   Zmodel_state_dictzLoaded model (z) from zTask: [z] datasetsrc                 S   s   g | ]	}| d r|qS )Zdemo_)
startswithr   kr   r   r   r     s    datac                    s    g | ]} d | d d qS )zdata/z/statesr   r   r   fr   r   r     s     zRunning z / z episodes...Z
bddl_files)Zbddl_file_nameZcamera_heightsZcamera_widthsZcamera_nameszEnvironment ready.r   Ztask_z_clip.ptr   zLoaded CLIP embedding: z%WARNING: CLIP embedding not found at z, using zerosr%   i   )r   ZEpisodes)descr   r   r   FZrobot0_eef_pos_imageZrobot0_eef_quatr   clip_embedding)r6   save_vis)ro   TrG   Zvideos)parentsexist_okepZ03d_ro   failz.mp4Zmp4vry   ZreplanZ_stepZ04dz.pngz  Ep 3dz: u   ✓ SUCCESSu   ✗ FAILUREz  steps=
z4====================================================z  Benchmark:    z  Task z:      z  Episodes:     z  Successes:    z  Success Rate: d   z.1f%z  Avg steps:    r   )r   task_id	task_name
checkpoint
n_episodessuccess_rate	successesstep_counts	avg_steps	max_stepsr   eval__taskz.jsonwr$   )indentu   Results saved → )`r0   r   r   is_availablebackendsr   printr   r   existsFileNotFoundErrorloadr3   getr   r   r   r   r   r   r   r   r   
IMAGE_SIZEr   load_state_dicttoevalbmZget_benchmark_dictr   Zget_taskr   nameospathjoinr   Zget_task_demonstrationh5pyZFilesortedkeysr_   r   lenZproblem_folder	bddl_filer   cameraseedresetclip_embeddings_dirr4   r   r   r   Zset_init_statestepr'   r(   r>   r:   
save_videor   r   rB   rF   r9   r   r   r   getattrr   r   r   r   out_dirmkdirr+   ZVideoWriterstrZVideoWriter_fourcc	video_fpswriteZcvtColorZCOLOR_RGB2BGRreleaseZimwriteclosemeanrd   sumopenjsondump)=argsr   Z	ckpt_pathZckptZ
ModelClassr   Zbenchtaskr   Z	demo_pathZ	demo_keysZinit_statesr   r   envr   Z	clip_pathr   r   Zep_idxZobsr   r;   r<   r=   donero   framesZ
vis_stripsrn   Z
replan_idxrm   r5   Zstart_kpZ
img_tensorr   r   r   r   Zwindow_actionsrl   extra_kwargsr   r   stripr   r   Z	video_dirZ
video_pathwritertagZvis_dirZreplan_iZstep_iZ	strip_imgZvis_pathr   r   resultsr  Zout_pathr   r   r   run_eval~  sn  


,



"




<

",

r  __main__z"Evaluate PARA in LIBERO simulation)descriptionz--model_typer
   )r
   r   r   r   r   zModel architecture to evaluate)typedefaultchoiceshelpz--checkpointTzQPath to .pth checkpoint (e.g. libero/checkpoints/para_libero_spatial_t0/best.pth))r  requiredr"  z--benchmarklibero_spatialzMLIBERO benchmark name (libero_spatial, libero_goal, libero_object, libero_10))r  r   r"  z	--task_id)r  r   z--camera	agentviewz--n_episodesrX   z&Number of rollout episodes to evaluatez--max_stepsi,  z(Max env steps per episode before failurez--seedz	--out_dirzlibero/out/evalz--save_video
store_truez<Save per-episode MP4 with heatmap overlay to out_dir/videos/)r   r"  z
--save_viszISave per-replan-step visualization strips (all N_WINDOW heatmaps) as PNGsz--video_fpsrU   z!FPS for saved videos (default 10)z--clip_embeddings_dirz/data/libero/parsed_liberoz?Directory containing precomputed CLIP embeddings (for dino_vla))?__doc__argparser  r   syspathlibr   r+   r   r^   r'   r0   Ztorch.nn.functionalnn
functionalrZ   r   r   insertdirname__file__r   r   r   r   r   r   r   utilsr	   r   Zlibero.liberor   r   r   Zlibero.libero.envsr   Zrobosuite.utils.camera_utilsr   r   r   r   r   r(   r.   r/   r   r9   r>   rF   r   r   r   r   r  __name__ArgumentParserparseradd_argumentr  rd   
parse_argsr  r   r   r   r   <module>   s    
1
9
g7 
g






