o
    vi                  
   @   s@  d Z ddlZddlZddlZddlZddlZddlmZ ddlm	Z	 ddl
mZ ddlmZ ddlZddlZddlZddlZddlmZ ddlZddlZddlmZ ddlmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$ dd	l%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+ dd
l,m-Z-m.Z.m/Z/m0Z0 ddl1m2Z2m3Z3 d\Z4Z5d\Z6Z7G dd de8eZ9e9j:de9j;de9j<de9j=de9j>diZ?e	G dd dZ@ejAejBdeC gd eDeEZFde@ddfddZGde@ddfddZHd.de@deIfd d!ZJd/d#eKfd$d%ZL		d0de@d&e8fd'd(ZM			d1de@deIfd)d*ZNeO de@dePfd+d,ZQeEd-kreQ  dS dS )2aL  
run_libero_eval.py

Evaluates a trained policy in a LIBERO simulation benchmark task suite.

Adapted from: https://github.com/user/openvla-oft/blob/main/experiments/robot/libero/run_libero_eval.py

Parallel Inference:
    To enable parallel inference across multiple GPUs, use:
        --use_parallel_inference True
        --available_gpus "0,1,2,3"
        --num_queries_best_of_n 4

    This will run model queries in parallel across the specified GPUs using torch.multiprocessing, which can
    significantly speed up evaluation when using value functions that require multiple queries per action.

    Requirements:
    - Multiple GPUs must be available
    - CUDA must be properly configured
    - Sufficient GPU memory for multiple model copies

    Note: Uses torch.multiprocessing with 'spawn' start method for CUDA compatibility.

Usage examples:
    # *** Main checkpoint: 98.5% success rate ***
    #   Replace `task_suite_name` with one of {libero_spatial, libero_object, libero_goal, libero_10}
    #   Replace `seed` with one of {195, 196, 197}
    #   Replace `run_id_note` with a unique identifier for the run
    uv run -m cosmos_policy.experiments.robot.libero.run_libero_eval         --config cosmos_predict2_2b_480p_libero__inference_only         --ckpt_path nvidia/Cosmos-Policy-LIBERO-Predict2-2B         --config_file cosmos_policy/config/config.py         --use_wrist_image True         --use_proprio True         --normalize_proprio True         --unnormalize_actions True         --dataset_stats_path nvidia/Cosmos-Policy-LIBERO-Predict2-2B/libero_dataset_statistics.json         --t5_text_embeddings_path nvidia/Cosmos-Policy-LIBERO-Predict2-2B/libero_t5_embeddings.pkl         --trained_with_image_aug True         --chunk_size 16         --num_open_loop_steps 16         --task_suite_name libero_10         --local_log_dir cosmos_policy/experiments/robot/libero/logs/         --randomize_seed False         --data_collection False         --available_gpus "0,1,2,3,4,5,6,7"         --seed 195         --use_variance_scale False         --deterministic True         --run_id_note chkpt45000--5stepAct--seed195--deterministic         --ar_future_prediction False         --ar_value_prediction False         --use_jpeg_compression True         --flip_images True         --num_denoising_steps_action 5         --num_denoising_steps_future_state 1         --num_denoising_steps_value 1
    # Same as above, but with deterministic reset (seed=195/196/197, reset seed=0)
    # Also gets 98.5% success rate
    uv run -m cosmos_policy.experiments.robot.libero.run_libero_eval         --config cosmos_predict2_2b_480p_libero__inference_only         --ckpt_path nvidia/Cosmos-Policy-LIBERO-Predict2-2B         --config_file cosmos_policy/config/config.py         --use_wrist_image True         --use_proprio True         --normalize_proprio True         --unnormalize_actions True         --dataset_stats_path nvidia/Cosmos-Policy-LIBERO-Predict2-2B/libero_dataset_statistics.json         --t5_text_embeddings_path nvidia/Cosmos-Policy-LIBERO-Predict2-2B/libero_t5_embeddings.pkl         --trained_with_image_aug True         --chunk_size 16         --num_open_loop_steps 16         --task_suite_name libero_10         --local_log_dir cosmos_policy/experiments/robot/libero/logs/         --randomize_seed False         --data_collection False         --available_gpus "0,1,2,3,4,5,6,7"         --seed 195         --use_variance_scale False         --deterministic True         --run_id_note chkpt45000--5stepAct--seed195--deterministicand_deterministicResetSeed0         --ar_future_prediction False         --ar_value_prediction False         --use_jpeg_compression True         --flip_images True         --num_denoising_steps_action 5         --num_denoising_steps_future_state 1         --num_denoising_steps_value 1         --deterministic_reset True         --deterministic_reset_seed 0

    N)deque)	dataclass)Enum)Optional)	benchmark)
WorkerPoolManager
get_actionget_future_state_prediction	get_modelget_planning_modelget_qvalue_predictionget_value_predictioninit_t5_text_embeddings_cacheload_dataset_statsquery_model_parallel)get_libero_dummy_actionget_libero_envget_libero_imageget_libero_wrist_imagesave_rollout_video0save_rollout_video_with_future_image_predictions)	DATE_TIMEget_image_resize_sizelog_messagesetup_logging)jpeg_encode_imageset_seed_everywhere)      )      c                   @   s    e Zd ZdZdZdZdZdZdS )	TaskSuiteZlibero_spatialZlibero_objectZlibero_goalZ	libero_10Z	libero_90N)__name__
__module____qualname__LIBERO_SPATIALLIBERO_OBJECTLIBERO_GOAL	LIBERO_10	LIBERO_90 r*   r*   \/data/cameron/vidgen/cosmos-policy/cosmos_policy/experiments/robot/libero/run_libero_eval.pyr!      s    r!      i  i,  i  i  c                   @   s  e Zd ZU dZeed< dZeed< dZeed< dZeed< dZ	eed< dZ
eed	< d
Zeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed < dZeed!< dZ eed"< d#Z!eed$< d#Z"eed%< dZ#eed&< dZ$eed'< d(Z%eed)< dZ&eed*< d+Z'eed,< d-Z(eed.< dZ)eed/< dZ*eed0< d-Z+eed1< dZ,eed2< dZ-eed3< dZ.eed4< dZ/eed5< dZ0eed6< d7Z1eed8< d9Z2eed:< e3j4Z5eed;< d<Z6eed=< d>Z7eed?< d@Z8eedA< dBZ9eedC< d(Z:e;e edD< dZ<eedE< dFZ=eedG< dHZ>eedI< dJZ?eedK< dZ@eedL< dZAeedM< dZBeedN< d(S )OPolicyEvalConfigliberosuiteZcosmosmodel_family config	ckpt_pathplanning_model_config_nameplanning_model_ckpt_pathzcosmos_policy/config/config.pyconfig_fileTuse_third_person_imager   num_third_person_imagesuse_wrist_imagenum_wrist_imagesuse_proprioflip_imagesFuse_variance_scaleuse_jpeg_compressionar_future_predictionar_value_predictionar_qvalue_predictionr   num_denoising_steps_action num_denoising_steps_future_statenum_denoising_steps_valueunnormalize_actionsnormalize_propriodataset_stats_patht5_text_embeddings_pathtrained_with_image_aug   
chunk_sizenum_open_loop_stepsdeterministicdeterministic_resetNdeterministic_reset_seed%use_ensemble_future_state_predictionsr   (num_future_state_predictions_in_ensembleaverage(future_state_ensemble_aggregation_schemeuse_ensemble_value_predictions!num_value_predictions_in_ensemble!value_ensemble_aggregation_schemesearch_depth.mask_current_state_action_for_value_prediction'mask_future_state_for_qvalue_predictionnum_queries_best_of_nuse_parallel_inferencez0,1,2,3,4,5,6,7available_gpus   parallel_timeouttask_suite_name2   num_trials_per_taskDEFAULTinitial_states_path   env_img_resz./experiments/logslocal_log_dirrun_id_note	use_wandbZYOUR_ENTITYwandb_entityZYOUR_PROJECTwandb_projectr    seedrandomize_seeddata_collectionjpeg_compress)Cr"   r#   r$   r/   str__annotations__r0   r2   r3   r4   r5   r6   r7   boolr8   intr9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rK   rL   rM   rN   rO   rP   rQ   rS   rT   rU   rV   rW   rX   rY   rZ   r[   r\   r^   r!   r%   r_   ra   rc   re   rf   rg   r   rh   ri   rj   rk   rl   rm   rn   r*   r*   r*   r+   r-      st   
 r-   z'%(asctime)s [%(levelname)s] %(message)s)levelformathandlerscfgreturnc                 C   sV   | j dus	J ddt| j v r| jsJ d| jdd tD v s)J d| j dS )z"Validate configuration parameters.Nzckpt_path must not be None!Z	image_augz\Expecting `trained_with_image_aug==True` because model was trained with image augmentations!c                 S   s   g | ]}|j qS r*   )value).0r/   r*   r*   r+   
<listcomp>!  s    z#validate_config.<locals>.<listcomp>zInvalid task suite: )r3   ro   rI   r_   r!   rv   r*   r*   r+   validate_config  s   (r|   c                 C   sN   | j }||jvr| d|jv r| d}||jv s"J d| d|| _dS )z>Check that the model contains the action un-normalization key.Z	_no_noopszAction un-norm key z) not found in Cosmos Policy `norm_stats`!N)r_   Z
norm_stats
unnorm_key)rv   modelr}   r*   r*   r+   check_unnorm_key$  s
   

r   task_idc                 C   sv   | |}| jdkr2t| jd}t|}W d   n1 s w   Y  td| j | ||fS td| |dfS )z'Load initial states for the given task.rb   rNzUsing initial states from zUsing default initial states)Zget_task_init_statesrc   openjsonloadr   )rv   
task_suiter   log_fileinitial_statesfall_initial_statesr*   r*   r+   load_initial_states4  s   


r   Fr<   c                 C   s<   t | |}t| |}||t| d | d | d fd}|S )z%Prepare observation for policy input.Zrobot0_gripper_qposZrobot0_eef_posZrobot0_eef_quat)primary_imagewrist_imageproprio)r   r   npconcatenate)obsresize_sizer<   img	wrist_imgobservationr*   r*   r+   prepare_observationD  s   

r   task_descriptionc
           9         s   j r jdur jn j}
t|
 |  |dur ||}n| } j jkr7t	d j d j d t
 jd}d}g } jrFg nd}g }t j } j} jr]g }g }g }g }d}zd}||| k rHtjd	d
 dkr{d}t| ||k r|t j\}}}}|d7 }qct|| j}||d  |dur||d   jr||d  ||d  ||d  t|dkrd}d} j}  jr| dkr|r|jrt }!t ||| j}"t |! }#t dt|" d|#dd|	 ng }"t!| D ]}$g }%g }&g }'i }(t }!t" |||| j|$  j# j$ j%p, j&p, j' d	})t |! }*t d|$d  d|  d|*dd|	 |)d |(d< |%|(d   j%rt }!t( f|durg|n||)d |)d |)d |)d d |)d d |)d d  |)d d! |)d d"  j|$  j# j) j* j+ j,d#}+t |! }*t d|$d  d|  d$|*dd|	 |+d% |(d%< |&|(d%  n|)d% |(d%<  j&r(t }!t- |dur|n||)d |+d&  j|$  j# j. j/ j0d'	},t |! }*t d|$d  d|  d(|*dd|	 |,d) |(d)< |'|(d)  t d|$d  d|  d*|(d) d+|	 nm j'rt }!t1 |dur9|n||)d |)d  j|$  j# j. j/ j0d,	},t |! }*t d|$d  d|  d(|*dd|	 |,d) |(d)< |'|(d)  t d|$d  d|  d*|(d) d+|	 n|)d) |(d)< |'|(d)  |&|(d-< |'|(d.< |%|(d/< |"|( qt d0| d1| |	 t2|"D ] \}$}(|(d) }-t d|$d  d|  d2 j|$  d3|-d+|	 q fd4d5t2|"D }.t3|.4 d6d7 d8\}/}0|0d }|0d }|0d9 }1|5| || t d0| d:|/ d;|1d+|	 |6 }2t	d<| d=|2   jr-||27  ||28 \}}}}|r>d>}n
|d7 }||| k sjW n& t9yo }3 zd?|3 }4t:; }5t |4 d@|5 |	 W Y d}3~3nd}3~3ww  jrt<t=j>|ddAt=j>|ddAt=j>|ddAt=j>|ddA|dB}6t|dkr׈ j?rdCdD |D }7t|7dkrt=j>|7ddA|6dE<  jrdF|d v r|d dF durdGdD |D }8t=j>|8ddA|6dH< nd}6|||||6fS )Iz(Run a single episode in the environment.Nz"WARNING: cfg.num_open_loop_steps (z ) does not match cfg.chunk_size zo! For best performance (in terms of both speed and success rate), we recommend executing the full action chunk.)maxlenr   F
   DETERMINISTICr1   truer   r   r   r   zParallel queries completed: z results in z.3fs)rk   rl   rB   +generate_future_state_and_value_in_parallelzQuery /z: Action query time = z secactions
data_batchZgenerated_latentorig_clean_latent_framesZlatent_indicesfuture_proprio_latent_idxfuture_wrist_image_latent_idxfuture_wrist_image2_latent_idxfuture_image_latent_idxfuture_image2_latent_idx)r~   r   Zgenerated_latent_with_actionr   r   r   r   r   r   rk   rl   rC   rP   rQ   rS   z': Future state prediction query time = future_image_predictionsfuture_state_samples_list)r~   r   r   rk   rl   rD   rT   rU   z : Value prediction query time = value_predictionz: Value prediction: .4f)r~   r   Zaction_samplerk   rl   rD   rT   rU   !future_image_predictions_by_depthvalue_predictions_by_depthactions_by_depthzt=z: Current base seed: z (seed z): Predicted value = c                    s.   i | ]\}} j | |d  |d |d fqS )r   r   r   )rk   )ry   	query_idxreturn_dictr{   r*   r+   
<dictcomp>C  s    zrun_episode.<locals>.<dictcomp>c                 S   s   | d d S )Nr      r*   )xr*   r*   r+   <lambda>L  s    zrun_episode.<locals>.<lambda>)keyr   z: Selected seed z with value = zt: z
	 action: TzEpisode error: 
Full traceback:
)axis)Zprimary_imagesZwrist_imagesr   r   successc                 S   s    g | ]}|d  dur|d  qS )future_imageNr*   ry   r   r*   r*   r+   rz   v  s    zrun_episode.<locals>.<listcomp>future_primary_imagesfuture_wrist_imagec                 S      g | ]}|d  qS r   r*   r   r*   r*   r+   rz         future_wrist_images)@rN   rO   rk   r   resetZset_init_stateZget_observationrL   rK   printr   r9   TASK_MAX_STEPSr_   rm   osenvirongetlowerstepr   r0   r   r<   appendlenrZ   r[   initializedtimer   r^   r   ranger   rl   rB   r?   r@   rA   r	   rC   rP   rQ   rS   r   rD   rT   rU   r   	enumeratemaxitemsextendpopleftcopytolist	Exception	traceback
format_excdictr   stackr7   )9rv   envr   r~   planning_modeldataset_statsworker_poolr   initial_stater   Z
reset_seedr   Zaction_queuetreplay_imagesreplay_wrist_imagesfuture_image_predictions_listZ	max_steps	base_seedZprimary_images_listZwrist_images_listZproprio_listZactions_listr   ZNUM_STEPS_WAITrk   rewarddoneinfor   Zbest_actionsZbest_future_predictionsZnum_queries
start_timeZquery_resultsZtotal_query_timer   r   r   r   r   action_return_dictZ
query_timeZfuture_state_return_dictZvalue_return_dictZpredicted_valueZseed_to_return_dictZ	best_seedZbest_return_dictZbest_value_predictionsactione	error_msgtraceback_strcollected_datar   r   r*   r{   r+   run_episodeT  s  


&
	

  [
 r   c                    s  | }t ||
\}}t| j jd\}d\}}tt jD ]}td |
  j	dkr9|| }n+
dd}d| }|| | d sYtd	 d
| d|
 q%t|| | d }td|d  d|
 t ||||||||

\}}}|d7 }d7 r|d7 }|	d7 }	t||
d d} jrdd |D }d} jrdd |D }t| j j||||
dd  jrڈdurڇ fdd}|  td |
 td |
 td|	 d|	 d dd|
 q%|d kr	t|t| nd }d krt|	t nd }td!| |
 td"| |
  jrNtd# j d$ |d% j d$ |d& j d$ |i |	fS )'z!Run evaluation for a single task.)
resolutionr   r   z
Task: rb    _demo_r   zSkipping task z	 episode z due to failed expert demo!r   zStarting episode r   z...)r   r   r   Nc                 S   r   )r   r*   r   r*   r*   r+   rz     r   zrun_task.<locals>.<listcomp>c                 S   r   r   r*   r   r*   r*   r+   rz     r   F)	r   r   rK   rL   Zrollout_wrist_images future_primary_image_predictionsfuture_wrist_image_predictionsr   Z	show_diffc            	         s0  d j  dt d d d d j d} tj jd}tj|dd	 tj|| }t	|d
[}
 D ]H\}}t|tjr{|jdkoS|jd dkoS|jtjk}|rs jrsdd |D }ttd}|j|d ||d q8|j||d q8||j|< q8|jd< W d   dS 1 sw   Y  dS )z)Save collected episode data to HDF5 file.zepisode_data--suite=z--z--task=z--ep=z
--success=z.hdf5Zrollout_dataT)exist_okw   r   c                 S   s   g | ]}t |d dqS )_   )quality)r   )ry   framer*   r*   r+   rz         z8run_task.<locals>._save_episode_data.<locals>.<listcomp>uint8Z_jpeg)datadtype)r   r   N)r_   r   rg   r   pathjoinrf   makedirsh5pyFiler   
isinstancer   ndarrayndimshaper   r   rn   
vlen_dtypecreate_datasetattrs)	Zep_filenameZrollout_data_dirZep_filepathr   kvZis_imageZ	jpeg_listdtrv   r   r   r   r   total_episodesr*   r+   _save_episode_data  s    .$
"z$run_task.<locals>._save_episode_dataz	Success: z# episodes completed so far: z# successes:  (d   .1f%)r   zCurrent task success rate: zCurrent total success rate: success_rate/r   num_episodes/num_successes/)Zget_taskr   r   r0   re   tqdmr   ra   r   rc   replacer   arrayr   r   r7   r9   r   rK   rL   rm   floatrh   wandblogr_   )rv   r   r   r~   r   r   r   r   r  total_successesr   taskr   r   r   Ztask_episodesZtask_successesZepisode_idxr   Zinitial_states_task_keyZepisode_keyr   r   r   r   r   r  Ztask_success_rateZtotal_success_rater*   r  r+   run_task  s   



	$	r   c                 C   s  | j r
| jr
J d| j rdtjd< | jrtjddd t|  t| j	 t
| j t| j}d}| jrQdd	 | jd
D }|d| j }t| ||}d}d}n,t| \}}| j|jjjksmJ d|jjj d| j d}| jdkr{t| \}}nd}t| j}t| | j| j| j| j| j| j d\}	}
}t!d|  |	 | jr|rt!d| |	 t!d| j" d|	 t!dt#  |	 |D ]}|t$j%& krt!d| dt$j%&  d|	 qzt!d|	 |'  t!d|	 W n2 t(y } z d| }t)* }t!| d| |	 t!d|	 d}W Y d}~n
d}~ww t!d|	 t+, }|| j  }|j-}t!d| j |	 t!d| |	 d\}}t..t/|D ]}t0| ||||||||||	\}}qH|d krit1|t1| nd }t!d!|	 t!d"| |	 t!d#| |	 t!d$|d%d&|d' d(d)|	 | jrt23d*| j d+|d,| j d+|d-| j d+|i t24|
 |rz|5  W n& t(y } zd.| }t)* }t!| d| |	 W Y d}~nd}~ww |	r|	6  |S )/zEMain function to evaluate a trained policy on LIBERO benchmark tasks.z>Cannot enable both deterministic mode and randomize seed mode!Truer   spawnT)forceNc                 S   s   g | ]}t | qS r*   )rr   strip)ry   gpur*   r*   r+   rz   =  r   zeval_libero.<locals>.<listcomp>,z:Mismatch found between train and test chunk sizes! Train: z, Test: r1   )rv   Ztask_identifierZlog_dirrg   rh   ri   rj   zEval config: z$Parallel inference enabled on GPUs: zParallel timeout: r   zMultiprocessing start method: zWarning: GPU z not available (only z GPUs found)zStarting worker pool...z Worker pool started successfullyzFailed to start worker pool: r   z)Disabling parallel inference for this runz4Using serial inference (parallel inference disabled)zTask suite: zNumber of tasks: r   r   zFinal results:zTotal episodes: zTotal successes: zOverall success rate: r   r  r  r  r  r  z/totalr  r  z!Error shutting down worker pool: )7rM   rl   r   r   r[   mpset_start_methodr|   r   rk   r   rH   r   rG   r\   splitrZ   r   r
   rK   Zdataloader_traindatasetr5   r   r   r0   r   r_   rf   rg   rh   ri   rj   r   r^   get_start_methodtorchcudadevice_countZstart_workersr   r   r   r   Zget_benchmark_dictZn_tasksr  r   r   r  r  r  saveshutdownclose)rv   r   r   r\   r~   r   cosmos_configr   r   r   Zlocal_log_filepathZrun_idgpu_idr   r   r   Zbenchmark_dictr   Z	num_tasksr  r  r   Zfinal_success_rater*   r*   r+   eval_libero  s   





	




 

 r4  __main__)N)F)NN)r   r   N)R__doc__r   loggingr   r   r   collectionsr   dataclassesr   enumr   typingr   Zdraccusr  numpyr   r,  torch.multiprocessingmultiprocessingr'  r  r  Zlibero.liberor   ,cosmos_policy.experiments.robot.cosmos_utilsr   r   r	   r
   r   r   r   r   r   r   Z3cosmos_policy.experiments.robot.libero.libero_utilsr   r   r   r   r   r   Z+cosmos_policy.experiments.robot.robot_utilsr   r   r   r   Zcosmos_policy.utils.utilsr   r   ZCURR_STATE_START_LATENT_IDXZCURR_STATE_END_LATENT_IDXZFUTURE_STATE_START_LATENT_IDXZFUTURE_STATE_END_LATENT_IDXro   r!   r%   r&   r'   r(   r)   r   r-   basicConfigINFOStreamHandler	getLoggerr"   loggerr|   r   rr   r   rq   r   r   r   wrapr  r4  r*   r*   r*   r+   <module>   s   ]0 
	Y

  @
  

