o
    vi!                     @   s  d Z ddlZddlZddlZddlZddlZddlZddlm	Z	 ddl
mZ ddlmZ ddlmZmZmZmZmZmZ ddlmZmZmZmZmZmZmZ ddlmZ ejd	ejd
 G dd deZ e!dkrhe dJi ddddddddddddddddddddddddd d!d"d!d#dd$d%Z"ejd&d'd( id) dZ#e"e# Z$e%d*e$d+ j&e$d+ j'f  e%d,e$d- j&e$d- j'f  e%d.e$d-   e%d/e$d0 j&e$d0 j'f  e%d1e$d0   ej(d2dd3 e)d4D ]aZ*e+de,e"d5 Z-e"e- Z$e$d+ .d5d6d	d Z/e)e/j&d D ]=Z0e/e0 Z1d7e- d8e$d9 dd:  d;e$d<  d=e$d>  d?e$d@  dAe$dB dCdDe0 dEZ2e	3e14e2 e%dFe2  qqe%dG e%dH e%dI dS dS )Kz
RoboCasa simulation benchmark dataloader.

Run this command to print a few samples from the RoboCasa dataset:
    python -m cosmos_policy.datasets.robocasa_dataset
    N)Image)Dataset)tqdm) build_rollout_step_index_mappingcalculate_epoch_structurecompute_monte_carlo_returnsdetermine_sample_type"load_or_compute_dataset_statistics-load_or_compute_post_normalization_statistics)calculate_dataset_statisticsdecode_jpeg_bytes_datasetdecode_single_jpeg_frameget_hdf5_filespreprocess_imagerescale_datarescale_episode_data)duplicate_array   )	precision	linewidthc                '   @   s   e Zd Z																					d-d	ed
edededededededededededededededededef&ddZdd Zd d! Z	d"d# Z
d.d%d&Z	$d.d'd(Zd)d* Zd+d, Zd$S )/RoboCasaDataset       FT         ?Gz?data_dir
chunk_sizefinal_image_sizet5_text_embeddings_pathuse_image_auguse_stronger_image_auguse_wrist_imagesuse_third_person_imagesuse_proprionum_duplicates_per_imagerollout_data_dirdemonstration_sampling_probsuccess_rollout_sampling_probp_world_modeltreat_success_rollouts_as_demosreturn_value_function_returnsgammalazy_load_demos!skip_computing_dataset_statisticsc           -      C   s  || _ || _|| _|| _|| _|| _|| _|| _|	| _|
| _	|| _
|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _| j	sI| j
sIJ dt|}tjdd dkr^|dd }g }| jrxtj| jssJ d| j dt| j}i | _i | _i | _d	| _d	| _ d	| _!d	| _"t# | _$d
| _%| jd	krt&|D ],}t'(|d}t)|d * }t+|dd d}t&|D ]}|d| d }d|v }|d|  j,d } | j$-|  |rt.|d }!nt.|d }!| jrt/|!d| jd}"nd}"| jrt0||| |!||"dur|"1 ndd| j| j< nd|v r"|d dd }#nd|v r.t2|d }#nt3dd|v r@|d dd }$nd|v rLt2|d }$nt3dd|v r^|d dd }%nd|v rjt2|d }%nt3d|d| d ddd| j%f 4t5j6}&|d| d  dd 4t5j6}'t0|#|$|%|'|&| |!|"dur|"1 ndd!| j| j< |  jd7  _|  j |!7  _ qW d   n	1 sw   Y  q| 7  || _|d"krt8|d#}t9:|| _;W d   n	1 sw   Y  |s| jrtjtj<| j d$st=d%t>| j | jt?d&| _@| js| jrI|sI| js?| jr2tA| j| j@d'| _| jr?tA| j| j@d(| _tB| j | jt?d&| _Ct.|d	krt&|d)d*D ]}t'(|d}d+|v rmd,}t.|d+ }!nd-|v r{d.}t.|d- }!nt3d/| |j,d } | j$-|  tD|j,d0d,}(t0|| |!|(|d1| j| j!< | jrtD|j,d0}(|(rdnd2})t/|!|)| jd}"|"1 | j| j! d3< |  j!d7  _!|  j"|!7  _"W d   n	1 sw   Y  qV| jr| jE D ]\}*}+|+d0d,sq| jr+|+d3}"|"dur|"1 }"t0|+d4 d|+d5|+d6|+d7|"d.d8| j| j< n{| F|+},|,d7d,rnt5jGd9d: |,d; D d	d<4t5jH}#t5jGd=d: |,d> D d	d<4t5jH}$t5jGd?d: |,d@ D d	d<4t5jH}%n|,d; }#|,d> }$|,d@ }%|,d' }&|,d( }'|+d3}"|"dur|"1 }"t0|#|$|%|'|&|+d5|+d6|"d!| j| j< | j$-|+d5 |  jd7  _|  j |+d67  _ q| 7  | I  tJdAt.| j$  | K  dS )Ba  
        Initialize RoboCasa dataset for training.

        Args:
            data_dir (str): Path to directory containing RoboCasa dataset HDF5 files
            chunk_size (int): Action chunk size
            final_image_size (int): Target size for resized images (square), defaults to 224
            t5_text_embeddings_path (str): Path to precomputed T5 text embeddings dictionary (key: instruction, val: embedding)
            num_images_per_sample (int): Number of images to return per sample
            normalize_images (bool): Whether to normalize the images and return as torch.float32
            normalize_actions (bool): Whether to normalize the actions
            normalize_proprio (bool): Whether to normalize the proprioceptive state
            use_image_aug (bool): Whether to apply image augmentations
            use_stronger_image_aug (bool): Whether to apply stronger image augmentations
            use_wrist_images (bool): If True, loads wrist-mounted camera images
            use_third_person_images (bool): If True, loads third-person images
            use_proprio (bool): If True, adds proprio to image observations
            num_duplicates_per_image (int): Number of times to duplicate each image (so that each type of image fills 1 latent frame when encoded with the tokenizer)
            rollout_data_dir (str): Path to directory containing rollout data (if provided, will load rollout data in addition to base dataset)
            demonstration_sampling_prob (float): Probability of sampling from demonstration data instead of rollout data
            success_rollout_sampling_prob (float): Probability of sampling from success rollout data instead of failure rollout data
            p_world_model (float): Probability of sampling a world model sample instead of a value function sample
            treat_success_rollouts_as_demos (bool): If True, copy successful rollout episodes into demonstration dataset (self.data)
            return_value_function_returns (bool): If True, returns value function returns for rollout episodes
            gamma (float): Discount factor for value function returns
            lazy_load_demos (bool): If True, only load demo metadata at initialization and load full data on-demand during __getitem__
        z=Must use at least one of wrist images or third-person images!	DEBUGGINGFalsetrueN   zError: Rollout data directory 'z' does not exist.r      rdatac                 S   s   t | dd S )N_r3   )intsplitx r<   M/data/cameron/vidgen/cosmos-policy/cosmos_policy/datasets/robocasa_dataset.py<lambda>   s    z*RoboCasaDataset.__init__.<locals>.<lambda>)keydata//obsrobot0_agentview_left_rgb_jpegtask_descriptionrobot0_agentview_left_rgbg      ?)terminal_rewardr-   )	file_pathdemo_keycommand	num_stepsis_jpegreturnsz\Neither 'robot0_agentview_left_rgb' nor 'robot0_agentview_left_rgb_jpeg' found in HDF5 file.robot0_agentview_right_rgbrobot0_agentview_right_rgb_jpegz^Neither 'robot0_agentview_right_rgb' nor 'robot0_agentview_right_rgb_jpeg' found in HDF5 file.robot0_eye_in_hand_rgbrobot0_eye_in_hand_rgb_jpegzVNeither 'robot0_eye_in_hand_rgb' nor 'robot0_eye_in_hand_rgb_jpeg' found in HDF5 file./actions/robot_states)left_primary_imagesright_primary_imageswrist_imagesproprioactionsrH   rI   rK   r   rbzdataset_statistics.jsonzDataset statistics file for this dataset does not yet exist. Please rerun with RoboCasaDataset(lazy_load_demos=False) once so that the dataset statistics are computed and saved. Then you can rerun with RoboCasaDataset(lazy_load_demos=True).)r   r6   !calculate_dataset_statistics_funcrV   rU   zLoading rollout metadata)descprimary_imagesFprimary_images_jpegTz)No primary images found in rollout file: success)rF   rH   rI   r\   rJ   g        rK   rF   rH   rI   rJ   )rF   rG   rH   rI   rJ   rK   is_from_rolloutc                 S      g | ]}t |qS r<   r   .0br<   r<   r=   
<listcomp>      z,RoboCasaDataset.__init__.<locals>.<listcomp>rR   axisc                 S   r^   r<   r_   r`   r<   r<   r=   rc     rd   rS   c                 S   r^   r<   r_   r`   r<   r<   r=   rc     rd   rT   z!Number of unique commands found: )Lr   r   r   r    normalize_imagesnormalize_actionsnormalize_proprior!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r   osenvirongetlowerpathexistsr6   demo_episode_metadatarollout_episode_metadatanum_episodesrI   rollout_num_episodesrollout_num_stepssetunique_commands
action_dimr   h5pyFilelistkeyssortedattrsaddlenr   dictcopyr   KeyErrorastypenpfloat32_build_step_index_mappingopenpickleloadt5_text_embeddingsjoin
ValueErrorr	   r   dataset_statsr   r
   dataset_stats_post_normboolitems_load_rollout_episode_datastackuint8!_build_rollout_step_index_mappingprint_calculate_epoch_structure)-selfr   r   r   r    rg   rh   ri   r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   
hdf5_filesrollout_hdf5_filesfilefdemo_keys_listsorted_demo_keysrG   	obs_grouprJ   rH   rI   rK   rR   rS   rT   rV   rU   r\   rE   ep_idxep_metaepisode_datar<   r<   r=   __init__9   s  4






"a
"	


'




zRoboCasaDataset.__init__c                 C   s   t | dsd| _t | dsd| _t | ds| j| j | _| j}t|| j| j| j| jd}|d | _|d | _	|d | _
|d	 | _d
S )zVCalculate epoch layout with proper scaling: demos, success rollouts, failure rollouts._rollout_success_total_stepsr   _rollout_failure_total_steps_rollout_total_steps)rI   rollout_success_total_stepsrollout_failure_total_stepsr(   r)   adjusted_demo_countadjusted_success_rollout_countadjusted_failure_rollout_countepoch_lengthN)hasattrr   r   r   rI   r   r(   r)   r   r   r   r   )r   demo_base_countresultr<   r<   r=   r     s$   





z*RoboCasaDataset._calculate_epoch_structurec                 C   s   i | _ d| _| jr/| j D ]\}}|d }t|D ]}||f| j | j< |  jd7  _qqdS | j D ]\}}|d }t|D ]}||f| j | j< |  jd7  _q@q4dS )zYBuild a mapping from global step index to (episode index, relative index within episode).r   rI   r3   N)_step_to_episode_map_total_stepsr.   rp   r   ranger6   )r   episode_idxepisode_metadatarI   ir   r<   r<   r=   r     s"   z)RoboCasaDataset._build_step_index_mappingc                 C   sB   t i | j}|d | _|d | _|d | _|d | _|d | _dS )zYBuild mapping for rollout dataset with separate tracking for successful/failure episodes.$_rollout_success_step_to_episode_map$_rollout_failure_step_to_episode_mapr   r   r   N)r   rq   r   r   r   r   r   )r   r   r<   r<   r=   r     s   



z1RoboCasaDataset._build_rollout_step_index_mappingNc                 C   s  | ddr| ||||S |d }|d }t|dZ}|d| d }|du }	|d	 rs|	rCt|d
 }
t|d }t|d }nttt|}i }
i }i }|D ] }t|d
 | |
|< t|d | ||< t|d | ||< qQnD|	r|d dd }
|d dd }|d dd }n)tt|}i }
i }i }|D ]}|d | |
|< |d | ||< |d | ||< q|dur|dur|d| d ||d| jf 	t
j}n|d| d ddd| jf 	t
j}|dur|	stt|}i }|D ]}|d| d | 	t
j||< qn|d| d dd 	t
j}| jr-td|i| jd}| jr\t|trS|D ]}|| dd}td|i| jd ||< q9n	td|i| jd}t|
|||||d |d |d	 d}|W  d   S 1 s{w   Y  dS )a  
        Load demo episode data from HDF5 file using metadata.
        Optimized to only load required frames and action chunks.

        Args:
            episode_metadata (dict): Episode metadata containing file_path, demo_key, etc.
            frame_indices (set or None): Set of frame indices to load. If None, loads all frames.
            action_start_idx (int or None): Start index for action chunk. If None, loads all actions.
            action_end_idx (int or None): End index for action chunk (exclusive). If None, loads all actions.

        Returns:
            dict: Episode data dictionary with loaded arrays (only requested frames/actions)
        r]   FrF   rG   r5   r@   rA   NrJ   rB   rM   rO   rD   rL   rN   rP   rQ   rV   r3   rU   rH   rI   )rR   rS   rT   rU   rV   rH   rI   rJ   )rl   r   rx   ry   r   r|   rz   r   rw   r   r   r   rh   r   r   ri   
isinstancer   reshapeflatten)r   r   frame_indicesaction_start_idxaction_end_idxrF   rG   r   r   load_allrR   rS   rT   frame_indices_listidxrV   rU   proprio_arrayr   r<   r<   r=   _load_demo_episode_data  s   

	"*" 
&z'RoboCasaDataset._load_demo_episode_datac                 C   s  |d }t |dC}|du }|d rY|r/|d dd }|d dd }	|d dd }
nntt|}i }i }	i }
|D ]}|d | ||< |d | |	|< |d | |
|< q=nD|rt|d dd }|d	 dd }	|d
 dd }
n)tt|}i }i }	i }
|D ]}|d | ||< |d	 | |	|< |d
 | |
|< q|dur|dur|d ||d| jf tj}n|d ddd| jf tj}|dur|stt|}i }|D ]}|d | tj||< qn|d dd tj}| jrt	d|i| j
d}| jr0t|tr'|D ]}|| dd}t	d|i| j
d ||< qn	t	d|i| j
d}t||	|
|||d |d |d |d d	}|W  d   S 1 sRw   Y  dS )a  
        Load rollout episode data from HDF5 file using metadata.
        Optimized to only load required frames and action chunks.

        Args:
            episode_metadata (dict): Episode metadata containing file_path, success, etc.
            frame_indices (set or None): Set of frame indices to load. If None, loads all frames.
            action_start_idx (int or None): Start index for action chunk. If None, loads all actions.
            action_end_idx (int or None): End index for action chunk (exclusive). If None, loads all actions.

        Returns:
            dict: Episode data dictionary with loaded arrays (only requested frames/actions)
        rF   r5   NrJ   r[   Zsecondary_images_jpegwrist_images_jpegrZ   Zsecondary_imagesrT   rV   rU   r3   r   rH   rI   r\   )	rR   rS   rT   rU   rV   rH   rI   r\   rJ   )rx   ry   r|   rz   rw   r   r   r   rh   r   r   ri   r   r   r   r   )r   r   r   r   r   rF   r   r   rR   rS   rT   r   r   rV   rU   r   r   r<   r<   r=   r   W  s   $"
&z*RoboCasaDataset._load_rollout_episode_datac                 C   s   | j S )z3Returns the total number of samples in the dataset.)r   )r   r<   r<   r=   __len__  s   zRoboCasaDataset.__len__c           ;      C   s	  t || j| j}|dkrdnd}|dkrdnd}|dkr^|| j }| j| \}}d}| jrV| j| }t|| j |d d }	||	h}
t|| j |d }| j	||
||d}n| j
| }d}nz|dkr|| j }|| j }| j| \}}| j| }t|| j |d d }	||	h}
t|| j |d }| j||
||d}n<|| j | j }|| j }| j| \}}| j| }t|| j |d d }	||	h}
t|| j |d }| j||
||d}d	}d	}|dkr| jrt | jk rd
}d	}n	d	}d
}nd
}d	}|| j }|d d }||kr
|}i }i }i }||h}|D ]}t|d tra|dd	rGt|d | ||< t|d | ||< t|d | ||< q|d | ||< |d | ||< |d | ||< q|dkr|dd	rt|d | ||< t|d | ||< t|d | ||< q|d | ||< |d | ||< |d | ||< qg }d}tjt|| dd}|| |d7 }| jrt|d tr|d | }n|d | }|| }t|}t|| jd}|| |}|d7 }| jr|| } t| | jd} ||  |}!|d7 }| j r>|| }"t|"| jd}"||" |}#|d7 }|| }$t|$| jd}$||$ |}%|d7 }t|| }t|| jd}|| |}&|d7 }| jrt|d trj|d | }'n|d | }'t|| }t|| jd}|| |}(|d7 }| jr|| })t|)| jd})||) |}*|d7 }| j r|| }+t|+| jd}+||+ |},|d7 }|| }-t|-| jd}-||- |}.|d7 }| jrt|| }/t|/| jd}/||/ |}0|d7 }tj!|dd}1t"|1# st$|1# rt%d| t&|1| j'| j(| j)| j*d}1t+"|1# s+t+$|1# r2t%d| |d | }2|dkrA| jpD|dk}3|2| jkrd|3rX|d d| j }4nJ|d ||| j  }4n>|3rp|d dd }5n|d |d }5| j|2 }6|3rt,|d d |6df}7nt,|d d |6df}7tj!|5|7gdd}4| jr|}8|dkr| jr| j| d |8 }9n|dur|d |8 }9n|d |8 }9nt-d}9i d|1d|d d|4dt+.| j/|d  dt+j0dt+j1dddd t+2d| j'| j'd!| j't+0d" d| jr|nt|d | d#| jr|'nt|d | d$|d%|d&|d'|r7dndd(|r?dndd)|d*|&| jrM|0nd| jrT|nd| jr[|!nd| j rb|#nd| j ri|%nd| jrp|(nd| jrw|*nd| j r~|,nd| j r|.nd|9d+
}:|:S ),aM  
        Fetches images and action chunk sample by index.
        Returns action chunk rather than just single-step action.
        If the action chunk retrieval would go out of bounds, the last action is repeated however
        many times needed to fill up the chunk.

        Args:
            idx: Integer index to retrieve sample

        Returns:
            dict: Data sample: {
                video=images,
                actions=action chunk,
                t5_text_embeddings=text embedding,
                t5_text_mask=text embedding mask,
                fps=frames per second,
                padding_mask=padding mask,
                num_frames=number of frames per sequence,
                image_size=image size,
                proprio=proprio state,
                __key__=unique sample identifier,
            }
        demor3   r   success_rolloutNrI   )r   r   r   r   FTrR   rJ   rS   rT   re   rU   )total_num_copiesz4Invalid image data detected (NaN or Inf) for sample )r   rg   r!   stronger_image_augz3Invalid processed image tensor detected for sample rV   rK   z-100videorH   r   t5_text_maski   )dtypefps   padding_mask
image_sizer   future_proprio__key__rollout_data_maskrollout_data_success_maskworld_model_sample_maskvalue_function_sample_maskglobal_rollout_idxaction_latent_idx)
value_latent_idxcurrent_proprio_latent_idxcurrent_wrist_image_latent_idxcurrent_image_latent_idxcurrent_image2_latent_idxfuture_proprio_latent_idxfuture_wrist_image_latent_idxfuture_image_latent_idxfuture_image2_latent_idxvalue_function_return)3r   r   r   rI   r   r.   rp   minr   r   r6   r   r   rq   r   r   r   r,   randomr*   r   r   rl   r   r   expand_dims
zeros_likeappendr%   r   r&   r#   r$   concatenateisnananyisinfr   r   r   rg   r!   r"   torchtilefloatsqueezer   onesint64zeros);r   r   sample_typer   r   global_step_idxr   relative_step_idxr   Zfuture_frame_idx_tempZframe_indices_neededr   r   r   success_idxfailure_idxis_world_model_sampleis_value_function_samplefuture_frame_idxmax_possible_idxZ decompressed_left_primary_imagesZ!decompressed_right_primary_imagesdecompressed_wrist_imagesframes_needed	frame_idx
image_listcurrent_sequence_idxfirst_input_imagerU   imageblank_imager   wrist_imager   Zcurrent_left_imager   Zcurrent_right_imager   r   r   r   future_wrist_imager   Zfuture_left_imager   Zfuture_right_imager   value_imager   imagesremaining_actionsZactions_already_slicedaction_chunkavailable_actionsnum_padding_neededpaddingreturn_timestepr   sample_dictr<   r<   r=   __getitem__  s  





	

	



















  	



%zRoboCasaDataset.__getitem__)r   r   r   FTTTTTTTr   r   r   r   r   FTr   FF)NNN)__name__
__module____qualname__strr8   r   r   r   r   r   r   r   r   r   r  r<   r<   r<   r=   r   8   s    	

  x
	s
jr   __main__r   z9users/user/data/robocasa/robocasa_regen_v2_1199succDemos/r    zJusers/user/data/robocasa/robocasa_regen_v2_1199succDemos/t5_embeddings.pklr       r!   Tr#   r$   r%   ri   rh   r&   r   r"   r'   zEusers/user/data/robocasa/robocasa_regen_rollout_data_v2_1291episodes/r(   r   r)   r,   r-   r   r   c                 C   s
   d | S )Nz{0:0.3f})formatr:   r<   r<   r=   r>   O  s   
 r>   )	formatterz
Images shape, dtype: r   zActions shape, dtype: rV   z	Actions:
z!T5 text embeddings shape, dtype: r   zT5 text embeddings:
z./temp)exist_ok2   r3      z"./temp/LAZYLOAD_global_step_index_z__task=rH      z__isRollout=r   z__globalRolloutIdx=r   z__isSuccess=r   z__value=r   z.4fz__frameIdx=z.pngzSaved image at path: zQ
================================================================================z1Data samples loading test completed successfully!zQ================================================================================
r<   )5__doc__rj   r   r   rx   numpyr   r   PILr   torch.utils.datar   r   %cosmos_policy.datasets.dataset_commonr   r   r   r   r	   r
   $cosmos_policy.datasets.dataset_utilsr   r   r   r   r   r   r   cosmos_policy.utils.utilsr   set_printoptionsinfr   r  datasetr   sampler   shaper   makedirsr   r7   randintr   Zglobal_step_indexpermuter  r   Zimg_np
image_path	fromarraysaver<   r<   r<   r=   <module>   s    $	        

	
N