o
    di                     @   s  d Z ddlZddlZddlZddlmZ ddlZddlZddlZ	ddl
Z
ddlmZ ddlmZ ddlmZ ddlmZmZmZmZmZmZmZ ddlmZmZmZmZmZmZmZ dd	l m!Z! e	j"d
e	j#d G dd deZ$			d?de%de%de&de&de%f
ddZ'e(dkrVe$ddddddddddddddd d!Z)e	j"d"d#d$ id% dZ*e)e* Z+e,d&e+d' j-e+d' j.f  e,d(e+d) j-e+d) j.f  e,d*e+d)   e,d+e+d, j-e+d, j.f  e,d-e+d,   e,d.e)j/  ej0ddd/ e1dD ]ZZ2e3de4e)d0 Z5e)e5 Z+e+d' 6d0d1d
d Z7e1e7j-d D ]4Z8e7e8 Z9d2e5 d3e+d4  d5e+d6  d7e+d8  d9e+d: d;d<e8 d=Z:e;e9<e: e,d>e:  q qdS dS )@z
LIBERO simulation benchmark task suites dataloader.

Run this command to print a few samples from the LIBERO dataset:
    python -m cosmos_policy.datasets.libero_dataset
    N)defaultdict)Image)Dataset)tqdm) build_rollout_step_index_mappingcalculate_epoch_structurecompute_monte_carlo_returnsdetermine_sample_typeget_action_chunk_with_padding"load_or_compute_dataset_statistics-load_or_compute_post_normalization_statistics)calculate_dataset_statisticsdecode_jpeg_bytes_datasetdecode_single_jpeg_frameget_hdf5_filespreprocess_imagerescale_datarescale_episode_data)duplicate_array   )	precision	linewidthc                %   @   s   e Zd Z																					d+d
edededededededededededededededededef$ddZdd Z	d d! Z
d"d# Zd$d% Zd&d' Zd(d) Zd*S ),LIBERODataset       FT         ?Gz?            data_dir
chunk_sizefinal_image_sizet5_text_embeddings_pathuse_image_auguse_stronger_image_auguse_wrist_imagesuse_third_person_imagesuse_proprionum_duplicates_per_imagerollout_data_dirdemonstration_sampling_probsuccess_rollout_sampling_probtreat_success_rollouts_as_demosreturn_value_function_returnsgammamulti_frame_futurefuture_frame_offsetsc           /      C   s"  || _ || _|| _|| _|| _|| _|| _|| _|	| _|
| _	|| _
|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _|rSt||ksSJ d| dt| | j	s]| j
s]J dt|}tjdd dkrr|dd }g }| jrtj| jsJ d	| j d
t| j}i | _i | _d| _d| _ d| _!d| _"t# | _$i | _%| jdkrt&|D ]}t'(|d}t)|d * }t+|dd d}t&|D ]}|d| d }d|v r|d dd }nd|v rt,|d }nt-dd|v r|d dd }nd|v rt,|d }nt-d|d| d dd .t/j0} |d| d dd .t/j0}!tj1|2dd }"|"dd 2d}#d}$|#D ]}%d |%v rTd}$qI|$|% d! }$qI|$dd }$| j$3|$ t|}&| jrxt4|&d"| jd#}'t5|||!| |$|&tj6|| j 2tj7d | jr|'8 ndd$| j| j< |  jd7  _|  j |&7  _ qW d   n	1 sw   Y  q| 9  || _|dkrt:|d%}t;<|| _=W d   n	1 sw   Y  t>| j | jt?d&| _@| js| jr| jrtA| j| j@d'| _| jrtA| j| j@d(| _tB| j | jt?d&| _Ct|dkrt&|d)d*D ]}t'(|d}d+|v r>d,}(t|d+ }&nd-|v rLd.}(t|d- }&nt-d/| |jDd0d}$| j$3|$ tE|jDd1d,})t5||$|&|)|(d2| j| j!< | jrtE|jDd1})|)rd"nd3}*t4|&|*| jd#}'|'8 | j| j! d4< |  j!d7  _!|  j"|&7  _"W d   n	1 sw   Y  q'| jr| jF D ]\}+},|,d1d,sѐq| G|,}-|-d5d,rt/jHd6d7 |-d8 D dd9.t/jI}t/jHd:d7 |-d; D dd9.t/jI}n|-d8 }|-d; }|-d' } |-d( }!d<|,d= v rd>}.n'd?|,d= v r&d@}.ndA|,d= v r0dB}.ndC|,d= v r:dD}.n	tJdE|,d=  |,d4}'|'durQ|'8 }'t5|||!| |,dF|,dG|.|'d$| j| j< | j$3|,dF |  jd7  _|  j |,dG7  _ q| 9  | K  | L  dS )Ha	  
        Initialize LIBERO dataset for training.

        Args:
            data_dir (str): Path to directory containing LIBERO task suite HDF5 files
            chunk_size (int): Action chunk size
            final_image_size (int): Target size for resized images (square), defaults to 224
            t5_text_embeddings_path (str): Path to precomputed T5 text embeddings dictionary (key: instruction, val: embedding)
            num_images_per_sample (int): Number of images to return per sample
            normalize_images (bool): Whether to normalize the images and return as torch.float32
            normalize_actions (bool): Whether to normalize the actions
            normalize_proprio (bool): Whether to normalize the proprioceptive state
            use_image_aug (bool): Whether to apply image augmentations
            use_stronger_image_aug (bool): Whether to apply stronger image augmentations
            use_wrist_images (bool): If True, loads wrist-mounted camera images
            use_third_person_images (bool): If True, loads third-person images
            use_proprio (bool): If True, adds proprio to image observations
            num_duplicates_per_image (int): Number of times to duplicate each image (so that each type of image fills 1 latent frame when encoded with the tokenizer)
            rollout_data_dir (str): Path to directory containing rollout data (if provided, will load rollout data in addition to base dataset)
            demonstration_sampling_prob (float): Probability of sampling from demonstration data instead of rollout data
            success_rollout_sampling_prob (float): Probability of sampling from success rollout data instead of failure rollout data
            treat_success_rollouts_as_demos (bool): If True, copy successful rollout episodes into demonstration dataset (self.data)
            return_value_function_returns (bool): If True, returns value function returns for rollout episodes
            gamma (float): Discount factor for value function returns
            multi_frame_future (bool): If True, load multiple consecutive future frames instead of
                duplicating a single frame. Uses VAE's natural 4-frame temporal encoding.
            future_frame_offsets (tuple): Frame offsets from current timestep for multi-frame future.
                Must have exactly num_duplicates_per_image entries (default 4).
                Default (7, 14, 21, 28) = every 7 frames.
        zfuture_frame_offsets must have z entries, got z=Must use at least one of wrist images or third-person images!	DEBUGGINGFalsetrueN   zError: Rollout data directory 'z' does not exist.r   rdatac                 S   s   t | dd S )N_r9   )intsplitx rA   K/data/cameron/vidgen/cosmos-policy/cosmos_policy/datasets/libero_dataset.py<lambda>   s    z(LIBERODataset.__init__.<locals>.<lambda>)keyzdata/z/obsZagentview_rgbZagentview_rgb_jpegzDNeither 'agentview_rgb' nor 'agentview_rgb_jpeg' found in HDF5 file.Zeye_in_hand_rgbZeye_in_hand_rgb_jpegzHNeither 'eye_in_hand_rgb' nor 'eye_in_hand_rgb_jpeg' found in HDF5 file.z/actionsz/robot_states/ir<   r   ZSCENE g      ?)terminal_rewardr3   )imageswrist_imagesproprioactionscommand	num_stepssuitereturnsrb)r$   r;   !calculate_dataset_statistics_funcrL   rK   zLoading rollout metadata)descprimary_imagesFprimary_images_jpegTz/No primary/wrist images found in rollout file: task_descriptionsuccess)	file_pathrM   rN   rW   is_jpegg        rP   rY   c                 S      g | ]}t |qS rA   r   .0brA   rA   rB   
<listcomp>N      z*LIBERODataset.__init__.<locals>.<listcomp>rI   axisc                 S   rZ   rA   r[   r\   rA   rA   rB   r_   R  r`   rJ   zsuite=libero_spatialrX   Zlibero_spatialzsuite=libero_objectZlibero_objectzsuite=libero_goalZlibero_goalzsuite=libero_10Z	libero_10z7Could not determine suite name from rollout file path: rM   rN   )Mr$   r%   r&   r'   normalize_imagesnormalize_actionsnormalize_proprior(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   lenr   osenvirongetlowerpathexistsr;   rollout_episode_metadatanum_episodesrN   rollout_num_episodesrollout_num_stepssetunique_commands_suite_to_step_indicesr   h5pyFilelistkeyssortedr   KeyErrorastypenpfloat32basenamer>   addr   dictrelpathsepcopy_build_step_index_mappingopenpickleloadt5_text_embeddingsr   r   dataset_statsr   r   dataset_stats_post_normattrsboolitems_load_rollout_episode_datastackuint8
ValueError!_build_rollout_step_index_mapping_calculate_epoch_structure)/selfr$   r%   r&   r'   rc   rd   re   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   
hdf5_filesrollout_hdf5_filesfilefZdemo_keys_listZsorted_demo_keysZdemo_key	obs_grouprI   rJ   rL   rK   raw_file_stringwordsrM   wrN   rP   rY   rW   rH   ep_idxep_metaepisode_dataZ
suite_namerA   rA   rB   __init__<   s  6



@
	

'



zLIBERODataset.__init__c                 C   s   t | dsd| _t | dsd| _t | ds| j| j | _| j}t|| j| j| j| jd}|d | _|d | _	|d | _
|d	 | _d
S )zVCalculate epoch layout with proper scaling: demos, success rollouts, failure rollouts._rollout_success_total_stepsr   _rollout_failure_total_steps_rollout_total_steps)rN   rollout_success_total_stepsrollout_failure_total_stepsr/   r0   adjusted_demo_countadjusted_success_rollout_countadjusted_failure_rollout_countepoch_lengthN)hasattrr   r   r   rN   r   r/   r0   r   r   r   r   )r   Zdemo_base_countresultrA   rA   rB   r     s$   





z(LIBERODataset._calculate_epoch_structurec                 C   s   i | _ d| _tt| _| j D ])\}}|d }t|D ]}||f| j | j< | j|d  | j |  jd7  _qqt| j	 | _
t| j
dkrXtdd | j D | _dS dS )zYBuild a mapping from global step index to (episode index, relative index within episode).r   rN   rO   r9   c                 s   s    | ]}t |V  qd S )N)rf   )r]   vrA   rA   rB   	<genexpr>  s    z:LIBERODataset._build_step_index_mapping.<locals>.<genexpr>N)_step_to_episode_map_total_stepsr   rv   rs   r;   r   rangeappendrw   Z_suitesrf   maxvaluesZ_max_suite_len)r   episode_idxr   rN   irA   rA   rB   r     s   
z'LIBERODataset._build_step_index_mappingc                 C   sB   t i | j}|d | _|d | _|d | _|d | _|d | _dS )zYBuild mapping for rollout dataset with separate tracking for successful/failure episodes.$_rollout_success_step_to_episode_map$_rollout_failure_step_to_episode_mapr   r   r   N)r   rm   r   r   r   r   r   )r   r   rA   rA   rB   r     s   



z/LIBERODataset._build_rollout_step_index_mappingc           	      C   s  |d }t |ds}|d r |d dd }|d dd }n|d dd }|d dd }|d	 dd tj}|d
 dd tj}| jrTtd	|i| jd	}| jr`td
|i| jd
}t	|||||d |d |d |d d}|W  d   S 1 sw   Y  dS )a  
        Load rollout episode data from HDF5 file using metadata.

        Args:
            episode_metadata (dict): Episode metadata containing file_path, success, etc.

        Returns:
            dict: Episode data dictionary with loaded arrays
        rX   r:   rY   rU   Nwrist_images_jpegrT   rJ   rL   rK   rM   rN   rW   )rI   rJ   rK   rL   rM   rN   rW   rY   )
rt   ru   rz   r{   r|   rd   r   r   re   r   )	r   episode_metadatarX   r   rI   rJ   rL   rK   r   rA   rA   rB   r     s:   
$z(LIBERODataset._load_rollout_episode_datac                 C   s   | j S )z3Returns the total number of samples in the dataset.)r   )r   rA   rA   rB   __len__  s   zLIBERODataset.__len__c           2         s  t || j| j}|dkrdnd}|dkrdnd}|dkr2|| j }| j| \}d}| j| }d}	n>|dkrR|| j }
|
| j }	| j|	 \}| j| }| 	|}n|| j | j }|| j
 }	| j|	 \}| j| }| 	|}d}d}|dkr| jrd}t |k rd	}d}n	d}d	}nd	}d}| j }|d
 d |kr}| jr|d
 d fdd| jD }n|g}i  i |ht|B }|D ]/}|dkr|d rt|d |  |< t|d | |< q|d |  |< |d | |< qg }d}tjt  dd}|| |d7 }| jr8|d  }  }t  }t|| jd}|| |}|d7 }| jrR }t|| jd}|| |}|d7 }| jrl  }t|| jd}|| |}|d7 }t  }t|| jd}|| |}|d7 }| jr|d | }t  }t|| jd}|| |} |d7 }| jr| jrtfdd|D }!||! n| }"t|"| jd}"||" |}#|d7 }| jr| jrt fdd|D }$||$ n | }%t|%| jd}%||% |}&|d7 }| jr%t  }'t|'| jd}'||' |}(|d7 }tj|dd})t|)| j| j | j!| j"d})t#|d | j|d
 d}*t$| j |d
 d }+t#|d |+| j|d
 d},| jrx|}-|durq|d |- }.n|d |- }.nt%d}.|+| j }/|d
 d |/kr}/| jr|/}-|dur|d |- }0n|d |- }0nt%d}0i d|)d|*dt&'| j(|d  dt&j)dt&j*dd d!d"t&+d| j| jd#| jt&)d$ d| jr|nt|d  d%| jr|nt|d | d&|d'|d(|d)|rdndd*|rdndd+|	d,|d-| jr"|(nd| jr*|nd| jr1|nd| jr8|nd| jr?| nd| jrF|#nd| jrM|&nd|.|,|0d.	}1|1S )/aM  
        Fetches images and action chunk sample by index.
        Returns action chunk rather than just single-step action.
        If the action chunk retrieval would go out of bounds, the last action is repeated however
        many times needed to fill up the chunk.

        Args:
            idx: Integer index to retrieve sample

        Returns:
            dict: Data sample: {
                video=images,
                actions=action chunk,
                t5_text_embeddings=text embedding,
                t5_text_mask=text embedding mask,
                fps=frames per second,
                padding_mask=padding mask,
                num_frames=number of frames per sequence,
                image_size=image size,
                proprio=proprio state,
                __key__=unique sample identifier,
            }
        demor9   r   success_rolloutNrF   Fr   TrN   c                    s   g | ]	}t |  qS rA   )min)r]   offset)max_possible_idxrelative_step_idxrA   rB   r_   G  s    z-LIBERODataset.__getitem__.<locals>.<listcomp>rY   rI   rJ   ra   rK   )Ztotal_num_copiesc                       g | ]} | qS rA   rA   r]   fi)decompressed_wrist_imagesrA   rB   r_         c                    r   rA   rA   r   )decompressed_imagesrA   rB   r_     r   )r&   rc   r(   stronger_image_augrL   )rL   r   r%   rN   rP   z-100videor   rM   t5_text_maski   )dtypefps   padding_mask
image_sizer   future_proprio__key__rollout_data_maskrollout_data_success_maskworld_model_sample_maskvalue_function_sample_maskglobal_rollout_idxaction_latent_idxvalue_latent_idx)	current_proprio_latent_idxcurrent_wrist_image_latent_idxcurrent_image_latent_idxfuture_proprio_latent_idxfuture_wrist_image_latent_idxfuture_image_latent_idxvalue_function_returnnext_action_chunknext_value_function_return),r	   r   r   rN   r   r;   r   r   rm   r   r   r   r2   randomr%   r4   r5   rq   r   r{   expand_dims
zeros_liker   r,   r   r-   r*   r+   r   concatenater   r&   rc   r(   r)   r
   r   floattorchsqueezer   onesint64zeros)2r   idxsample_typer   r   global_step_idxr   r   r   r   success_idxfailure_idxis_world_model_sampleis_value_function_samplep_world_modelfuture_frame_idxZfuture_frame_indicesZframes_needed	frame_idx
image_listZcurrent_sequence_idxZfirst_input_imagerK   imageZblank_imager   Zwrist_imager   Zcurrent_imager   r   r   r   Zfuture_wrist_framesZfuture_wrist_imager   Zfuture_primary_framesfuture_imager   value_imager   rI   action_chunknext_relative_step_idxr   Zreturn_timestepr   next_future_frame_idxr   Zsample_dictrA   )r   r   r   r   rB   __getitem__  s  

















		




	$zLIBERODataset.__getitem__N)r   r   r   FTTTTTTTr   r   r   r   FTr   Fr   )__name__
__module____qualname__strr=   r   r   tupler   r   r   r   r   r   r   rA   rA   rA   rB   r   ;   s    	

  L	2r   d   2   ./tempr$   r'   	fixed_idxnum_augmentations
output_dirc                 C   s  t d| d t| d|ddddddddd}g }t d| d	| d
 tt|D ]}|| }|| q*g }	|D ]}|d dddd }
|	|
 q:tj|	dd}	|	j	\}}}}}t d|	j	  g d}|j
sq|d |jsy|d tj|dd tt|t|D ]`}|t|k r|| nd| }d|v rqt d| d
 g }t|D ]}|	||f }|| qtj|d| d}tj|dd}|jtjkr|tj}tj||ddd t d|  qt d g }g }tt|t|D ]!}|t|k r|| nd| }d|vr|| || qt|dkrg }t|D ]"}g }|D ]}|	||f }|| q0tj|dd}|| q*tj|dd}|jtjkra|tj}tj|d}tj||ddd t d |  t d!d"|  t d# dS )$a  
    Create MP4 videos visualizing the distribution of augmentations for a fixed data point.

    Args:
        data_dir (str): Path to the dataset directory
        t5_text_embeddings_path (str): Path to T5 embeddings file
        fixed_idx (int): Index of the data point to apply augmentations to
        num_augmentations (int): Number of different augmentations to sample
        output_dir (str): Directory to save the visualization videos
    z*
Creating augmentation visualization with z samples...r   FTr9   )r$   r%   r'   rc   rd   r(   r*   r,   re   r-   r)   zGenerating z augmentations for data point z...r      r   r   ra   zAugmented video array shape: )Zblank_inputrK   wristZcurrent_viewr   Zfuture_wristZfuture_viewZblank_actionrK   r  exist_okZframe_blankzCreating video for Zaugmentation_visualization_z.mp4   N)r   macro_block_sizez(Saved augmentation visualization video: z/Creating combined video with all frame types...z'augmentation_visualization_combined.mp4z1Saved combined augmentation visualization video: z&Combined video shows frames in order: z | z$Augmentation visualization complete!)printr   r   r   r   permutenumpyr{   r   shaper,   remover*   rg   makedirsr   rf   rk   joinr   r   rz   imageiomimsaver   )r$   r'   r  r  r  Zaug_datasetZaugmented_samplesZaug_idxsampleZall_augmented_videosr   Znum_augs
num_framesheightwidthchannelsZframe_namesr   
frame_nameZframes_for_videoframe
video_pathZframes_arrayZvalid_frame_indicesZvalid_frame_namesZcombined_framesZframes_to_combineZcombined_frameZcombined_frames_arrayZcombined_video_pathrA   rA   rB   !create_augmentation_visualization  s   


 


r  __main__zusers/user/data/libero_regenz.users/user/data/libero_regen/t5_embeddings.pklr   Tr   z*users/user/data/libero_regen_rollout_data/r   r   )r$   r'   r%   r(   r*   r,   re   rd   r-   r)   r.   r/   r0   r2   r3   r   c                 C   s
   d | S )Nz{0:0.3f})formatr?   rA   rA   rB   rC     s   
 rC   )	formatterz
Images shape, dtype: r   zActions shape, dtype: rL   z	Actions:
z!T5 text embeddings shape, dtype: r   zT5 text embeddings:
zUnique commands: r	  r9   r  z ./temp/video__global_step_index_z__is_rollout=r   z__global_rollout_idx=r   z__is_success=r   z__value_function_return=r   z.4fz__frame_idx=z.pngzSaved image at path: )r  r  r  )=__doc__rg   r   r   collectionsr   rt   r  r  r{   r   PILr   torch.utils.datar   r   %cosmos_policy.datasets.dataset_commonr   r   r   r	   r
   r   r   $cosmos_policy.datasets.dataset_utilsr   r   r   r   r   r   r   cosmos_policy.utils.utilsr   set_printoptionsinfr   r   r=   r  r   datasetr   r  r  r  r   rr   r  r   r<   randintrf   Zglobal_step_indexr  rI   r   Zimg_np
image_path	fromarraysaverA   rA   rA   rB   <module>   s   $$		     f
 
< 