o
    vi                     @   s   d Z ddlZddlZddlZddlZddlZddlZddlZddl	Z	ddl
mZ ddlmZ ddlmZ ddlmZmZmZmZmZmZmZmZ ddlmZmZmZmZmZmZmZ ej dej!d	 dRd
e"fddZ#dd Z$de"de"de"de%fddZ&G dd deZ'e(dkr|e'dSi ddddddddd dd!dd"dd#dd$d%d&d'd(d)d*d+d,dd-d.d/d0d1dZ)ej d2d3d4 id5 dZ*e)e* Z+e,d6e+d7 j-e+d7 j.f  e,d8e+d9 j-e+d9 j.f  e,d:e+d9   e,d;e+d< j-e+d< j.f  e,d=e+d<   e,d>e)j/  ej0d?dd@ e1dD ]^Z2e3de4e)dA Z5e)e5 Z+e+d7 6dAdBdd Z7e1e7j-d D ]7Z8dCe5 dDe+dE  dFe+dG  dHe+dI  dJe+dK  dLe+dM dNdOe8 dPZ9e:e7e8 ;e9 e,dQe9  qBqdS dS )Tz
ALOHA robot tasks dataloader.

Run this command to print a few samples from the ALOHA dataset:
    python -m cosmos_policy.datasets.aloha_dataset
    N)Image)Dataset)tqdm)build_demo_step_index_mapping build_rollout_step_index_mappingcalculate_epoch_structurecompute_monte_carlo_returnsdetermine_sample_typeget_action_chunk_with_padding"load_or_compute_dataset_statistics-load_or_compute_post_normalization_statistics)calculate_dataset_statisticsdecode_single_jpeg_frameget_hdf5_filespreprocess_imagerescale_datarescale_episode_dataresize_images   )	precision	linewidthresize_sizec                 C   s   t | }| std|  g }	 | \}}|snt |t j}|| q|  t	|dkr:td|  t
j|t
jd}|durKt||}|S )z
    Loads an MP4 video into a numpy array of images (T, H, W, C) in RGB uint8.

    Args:
        video_path (str): Absolute path to the MP4 file

    Returns:
        np.ndarray: Array of frames (uint8, RGB)
    Could not open video file: Tr   zNo frames found in video: dtypeN)cv2VideoCaptureisOpened
ValueErrorreadcvtColorCOLOR_BGR2RGBappendreleaselennparrayuint8r   )
video_pathr   capframesretZ	frame_bgrZ	frame_rgb r,   J/data/cameron/vidgen/cosmos-policy/cosmos_policy/datasets/aloha_dataset.pyload_video_as_images;   s$   



r.   c                 C   sR   t | }| std|  t|t j}|  |dkr't| j	d S |S )z>Return number of frames in an MP4 video using OpenCV metadata.r   r   )
r   r   r   r   intgetCAP_PROP_FRAME_COUNTr#   r.   shape)r(   r)   frame_countr,   r,   r-   get_video_num_frames^   s   
r4   curr_step_indexnum_history_indicesspacing_factorreturnc                 C   s2   t |dd}| ||  }t |d}t| S )a  
    Computes the step indices corresponding to the history, given the current step index.

    If any indices would go out of bounds (i.e., be less than 0), we simply return 0 for those indices.

    Args:
        curr_step_index (int): Current step index
        num_history_indices (int): Number of steps in the history
        spacing_factor (int): Spacing factor; returns 1 step in each spacing_factor steps

    Returns:
        tuple: History step indices
    r   )r%   arangemaximumtupletolist)r5   r6   r7   Z
steps_backindicesr,   r,   r-   get_history_indicesk   s   r?   c                3   @   s   e Zd Z																													d2d
edededededededededededededededededededededed ed!ed"ef2d#d$Zd%d& Zd'd( Z	d)d* Z
d+d, Zd-d. Zd/d0 Zd1S )3ALOHADatasetT       F      V-?      ?data_diris_train
chunk_sizefinal_image_sizet5_text_embeddings_pathuse_image_auguse_stronger_image_augdebugdebug2use_proprior6   history_spacing_factornum_duplicates_per_imagereturn_value_function_returnsgammalazy_video_decompressionrollout_data_dirdemonstration_sampling_probsuccess_rollout_sampling_probtreat_demos_as_success_rolloutstreat_success_rollouts_as_demosuse_jpeg_for_rolloutsload_all_rollouts_into_ramuse_third_person_imagesuse_wrist_imagesc           ?         s  || _ || _|| _|| _|| _|| _|| _|	| _|
| _|| _	|| _
|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _d| _t||d}tjdd dkre|dd }i | _d| _ d| _!t" | _#t$|D ]}t%&|d	q  d
 dv ot'fdddD }dv ot'fdddD } | o| }! d dd }" d dd }#|!s݈d d dd }$d d dd }%d d dd }&t(|$}'n]dd }(|(d d |(d d |(d d d})tj)*|fdd|)+ D }*| jrd}$d}%d}&t,|*d }'nt-|*d | jd}$t-|*d | jd}%t-|*d | jd}&t(|$}'|.dd }+d|+v rId}+n/d|+v rQd}+n'd |+v rYd!}+nd"|+v rad"}+nd#|+v rid$}+nd%|+v rqd&}+nt/d'|+ |+0d(d)},| j#1|, |'}-| jrt2|-d*| jd+}.t3||#|"|,|-| jr|.4 ndd,d-}/|!r| jr|*|/d< d,|/d.< n!|$|/d< |%|/d/< |&|/d0< d|/d.< n|$|/d< |%|/d/< |&|/d0< d|/d.< |/| j| j < |  j d7  _ |  j!|-7  _!W d   n	1 sw   Y  qv| 5  || _|d1kr#t6|d2}t78|| _9W d   n	1 sw   Y  t:| j | jt;d3| _<| js5| jrY| jrBt=| j| j<d4| _| jrOt=| j| j<d5| _t>| j | jt;d3| _?i | _@i | _Ad| _Bd| _C| jr| j+ D ]X\}0}1t3|1d6 |1d|1d/|1d0|1d5 |1d4 |1d7 |1d8 |1d.dd,d9
}2| jr|1d:|2d:< |1d.dr|1d |2d< |2| jA| jB< |  jC|1d8 7  _C|  jBd7  _BqntD| jtErt(| jdkrtj)F| jsJ d;| j d<g }3tjG| jd,d=D ]\}4}5}6|6D ]}| Hd>r|3Itj)J|4| qqtjdd dkr"|3dd? }3t$|3d@dAD ]}t%&|d	ԉ d
 v r< d
 ndtK fdBddCD }7duo]dv o]t'fdDddD }duordv ort'fdEddD } | jox|7}8|8 o| o| }!|7r| js| jstLdF d,| _|8rdG v rt( dG }-nwdH v rt( dH }-nkdI v rt( dI }-n_dJ v rt( dJ }-nStMdK| |!rdLd }(|(d d |(d d |(d d d})tj)*|fdMd|)+ D }*| jr	t,|*d nt-|*d jNd }-nt(d d }-|- d jNd krAtLdN| dO|- dP d jNd  dQ 	 W d   q( jOdRd1},|,d1krtj)Ptj)*tj)*|}+d|+v rcd}+n/d|+v rkd}+n'd |+v rsd!}+nd"|+v r{d"}+nd#|+v rd$}+nd%|+v rd&}+nt/d'|+ |+0d(d)},| j#1|, tQ jOdS}9z
tR jOdT}:W n tSy   |9rd*ndU}:Y nw t3||,tT|-|9tR|:tQ|!tQ|8dV};| jrt2tT|-tR|:| jd+}.|.|;d:< |;| j@| jB< |  jBd7  _B|  jCtT|-7  _CW d   n	1 s
w   Y  q(| jr6t(| j@dkr6t$| j@+ dWdAD ]\}<}=| U|=}>|>| jA|<< q&| jrt(| j@dkr| j@+ D ]\}<}=tQ|=dSsUqG|<| jAv ra| jA|< }1n| U|=}1t3|=d6|1d|1d/|1d0|1d5 |1d4 |1d7 tT|1d8 |1d.dd,d9
}2|1d.dr|1d|2d< |1dXdrd,|2dX< |1dY|2dY< |1dZ|2dZ< |1d[|2d[< |1d\|2d\< d:|1v r|1d: |2d:< |2| j| j < | j#1|2d7  |  j!tT|2d8 7  _!|  j d7  _ qG| 5  | jAi ks | j@i kr| V  | W  dS )]a
  
        Initialize ALOHA dataset for training.

        Args:
            data_dir (str): Path to directory containing preprocessed ALOHA HDF5 files
            is_train (bool): If True, loads train set; else loads val set
            chunk_size (int): Action chunk size
            final_image_size (int): Target size for resized images (square)
            t5_text_embeddings_path (str): Path to precomputed T5 text embeddings dictionary (key: instruction, val: embedding)
            num_images_per_sample (int): Number of images to return per sample
            normalize_images (bool): Whether to normalize the images and return as torch.float32
            normalize_actions (bool): Whether to normalize the actions
            normalize_proprio (bool): Whether to normalize the proprioceptive state
            use_image_aug (bool): Whether to apply image augmentations
            use_stronger_image_aug (bool): Whether to apply stronger image augmentations
            debug (bool): If True, loads only the first episode and returns only the first sample in that episode
            debug2 (bool): If True, loads all episodes but returns only one specific sample in the whole dataset
            use_proprio (bool): If True, adds proprio to image observations
            num_history_indices (int): Number of frames to include in history
            history_spacing_factor (int): Spacing amount between frames in history
            num_duplicates_per_image (int): Temporal compression factor for the image tokenizer
            return_value_function_returns (bool): If True, returns value function returns for rollout episodes
            gamma (float): Discount factor for value function returns
            lazy_video_decompression (bool): Whether to lazily decompress videos
            rollout_data_dir (str): Path to directory containing rollout data (if provided, will load rollout data in addition to base dataset)
            demonstration_sampling_prob (float): Probability of sampling from demonstration data instead of rollout data
            success_rollout_sampling_prob (float): Probability of sampling from success rollout data instead of failure rollout data
            treat_demos_as_success_rollouts (bool): If True, copy demonstration episodes into rollout data as successful rollouts
            treat_success_rollouts_as_demos (bool): If True, copy successful rollout episodes into demonstration dataset (self.data)
            use_third_person_images (bool): This is a null arg that is always True. We need it here to match the signature of the LIBERODataset class.
            use_wrist_images (bool): This is a null arg that is always True. We need it here to match the signature of the LIBERODataset class.
        F)rI   	DEBUGGINGFalsetrueN   r   robservationsimagesc                 3       | ]	}| d  v V  qdS rf   Nr,   .0Zcam_key	obs_groupr,   r-   	<genexpr>       
z(ALOHADataset.__init__.<locals>.<genexpr>cam_highcam_left_wristcam_right_wristvideo_pathsc                 3   rg   rs   Nr,   ri   rk   r,   r-   rm      rn   actionobservations/qposrp   rq   rr   c                 S   $   | d }t |tr|dS t|S Nr,   zutf-8
isinstancebytesdecodestrdsvalr,   r,   r-   
_read_path     

z)ALOHADataset.__init__.<locals>._read_pathc                        i | ]\}}|t j |qS r,   ospathjoinrj   kvfile_dirr,   r-   
<dictcomp>       z)ALOHADataset.__init__.<locals>.<dictcomp>r   /Z
fold_shirtZcandies_in_bowlZput_candies_in_bowlZcandy_in_bagZput_candy_in_bagZflatten_shirtZbrown_chicken_wing_on_plateZput_brown_chicken_wing_on_plateZpurple_eggplant_on_plateZput_purple_eggplant_on_platezUnknown command: _ g      ?)Zterminal_rewardrU   T)	file_pathproprioactionscommand	num_stepsreturnssuccessis_lazy_videoleft_wrist_imagesright_wrist_imagesrC   rb)rH   dataZ!calculate_dataset_statistics_funcr   r   r   r   r   )
r   rf   r   r   r   r   r   r   r   r   r   zError: Rollout data directory 'z' does not exist.)followlinks)z.h5z.hdf5z.he5
   zLoading ALOHA rollout metadata)descc                 3       | ]}| v V  qd S Nr,   rj   r   fr,   r-   rm     
    
primary_images_jpegwrist_images_jpegwrist_left_images_jpegwrist_right_images_jpegc                 3   rg   rh   r,   ri   rk   r,   r-   rm     
    

c                 3   rg   rt   r,   ri   rk   r,   r-   rm     r   zWARNING: Detected JPEG-compressed rollout images in HDF5 (e.g., 'primary_images_jpeg'), but use_jpeg_for_rollouts=False. Set use_jpeg_for_rollouts=True to load these rollouts.r   r   r   r   z.No JPEG image datasets found in rollout file: c                 S   rw   rx   ry   r~   r,   r,   r-   r     r   c                    r   r,   r   r   r   r,   r-   r     r   zWARNING: For file z=:
	Mismatch between number of video frames and action steps: z frames != z. action steps.
	Skipping loading this episode.task_descriptionr   success_scoreg        )r   r   r   r   r   use_mp4use_jpegz$Preloading rollout episodes into RAMis_lazy_jpegjpeg_file_pathjpeg_primary_keyjpeg_left_keyjpeg_right_key)XrH   rJ   rK   rL   normalize_imagesnormalize_actionsnormalize_propriorM   rN   rO   rP   rQ   r6   rR   rS   rT   rU   rV   rW   rX   rY   rZ   r[   r\   r]   Z_jpeg_rollout_hint_emittedr   r   environr0   lowerr   Znum_episodesr   setunique_commandsr   h5pyFileallr$   r   dirnameitemsr4   r.   splitr   replaceaddr   dictcopy_build_step_index_mappingopenpickleloadt5_text_embeddingsr   r   dataset_statsr   r   Zdataset_stats_post_normrollout_episode_metadatarollout_dataZrollout_num_episodesZrollout_num_stepsrz   r}   existswalkendswithr"   r   anyprintKeyErrorr2   attrsbasenameboolfloat	Exceptionr/   _load_rollout_episode_data!_build_rollout_step_index_mapping_calculate_epoch_structure)?selfrH   rI   rJ   rK   rL   r   r   r   rM   rN   rO   rP   rQ   r6   rR   rS   rT   rU   rV   rW   rX   rY   rZ   r[   r\   r]   r^   r_   
hdf5_filesfilehas_raw_imageshas_video_pathsr   r   r   rf   r   r   Zepisode_num_stepsr   video_filenamesrs   Zraw_file_stringr   r   r   episode_entryr   episode_dataZep_copyZrollout_hdf5_filesrootdirsfileshas_top_jpegr   r   r   Zmetadata_entryZep_idxZep_metaZep_entryr,   r   r   rl   r-   __init__   s  ?













r





	



 R






	  





zALOHADataset.__init__c                 C   s"   t | j}|d | _|d | _dS )zYBuild a mapping from global step index to (episode index, relative index within episode)._step_to_episode_map_total_stepsN)r   r   r   r   r   resultr,   r,   r-   r   x  s   

z&ALOHADataset._build_step_index_mappingc                 C   sD   t | j| j}|d | _|d | _|d | _|d | _|d | _dS )zYBuild mapping for rollout dataset with separate tracking for successful/failure episodes.$_rollout_success_step_to_episode_map$_rollout_failure_step_to_episode_map_rollout_success_total_steps_rollout_failure_total_steps_rollout_total_stepsN)r   r   r   r   r   r   r   r   r   r,   r,   r-   r   ~  s   



z.ALOHADataset._build_rollout_step_index_mappingc                 C   s   t | dsd| _t | dsd| _t | ds| j| j | _t| j| j| j| j| jd}|d | _|d | _	|d | _
|d	 | _d
S )zVCalculate epoch layout with proper scaling: demos, success rollouts, failure rollouts.r   r   r   r   )r   Zrollout_success_total_stepsZrollout_failure_total_stepsrX   rY   adjusted_demo_countadjusted_success_rollout_countadjusted_failure_rollout_countepoch_lengthN)hasattrr   r   r   r   r   rX   rY   r   r   r   r   r   r,   r,   r-   r     s"   





z'ALOHADataset._calculate_epoch_structurec                 C   s4   | j rdS | jr
dS t| dr| jdkr| jS | jS )z3Returns the total number of samples in the dataset.rc   r   r   )rO   rP   r   r   r   r   r,   r,   r-   __len__  s   zALOHADataset.__len__c           N         s
  t   }|} jrd} jrd}t| j j}|dkr2| j } j| \}}d} j| }	d}
nf|dkre| j }|t	dt
 dd }
 j|
 \}}| jv rZd} j| }	n> j| } |}	n3d	}| j  j }|t	dt
 d
d }
 j|
 \}}| jv rd} j| }	n
 j| } |}	t   }d}d}|dkr jrd}tj |k rd}d}n	d}d}nd}d}|	ddrd|	vs|	d du r|	d }t|d  jd}t|d  jd}t|d  jd}||	d< ||	d< ||	d< d|	d< t   }| j }|	d d }||kr|}d}d}d}d}d}d}|	ddr|	d }|	d}|	d}|	d}dd } t|db}!|rS||!v rS| |!| |}|rb||!v rb| |!| |}|rq||!v rq| |!| |}|r||!v r| |!| |}|r||!v r| |!| |}|r||!v r| |!| |}W d   n	1 sw   Y  d tjd!tjf fd"d#}"|"|}|"|}|"|}|"|}|"|}|"|}g }#g }$d}%d}&d}'d}(d})d}*d}+d},d}-d}.d}/d}0|	ddr|n|	d | }1t|1}2|#|2 |$d |%d7 }%|&d7 }& jrV|	d$ | }3|	ddr(|n|	d | }4t|	ddr9|n|	d | }5|&})|#|5 |$ j |% j7 }%|&d7 }&|	ddr_|n|	d | }6|&}*|#|6 |$ j |% j7 }%|&d7 }&|	ddr|n|	d | }7|&}+|#|7 |$ j |% j7 }%|&d7 }&|	ddr|n|	d | }8|&},|#|8 |$ j |% j7 }%|&d7 }&t|	ddr|n|	d | }9|&}'|#|9 |$ j |% j7 }%|&d7 }& jr |	d$ | }:t|	ddr|n|	d | }5|&}-|#|5 |$ j |% j7 }%|&d7 }&nd}-|	ddr+|n|	d | };|&}.|#|; |$ j |% j7 }%|&d7 }&|	ddrP|n|	d | }<|&}/|#|< |$ j |% j7 }%|&d7 }&|	ddru|n|	d | }=|&}0|#|= |$ j |% j7 }%|&d7 }& jrt|	ddr|n|	d | }>|&}(|#|> |$ j |% j7 }%|&d7 }&nd}(t   }t|#}?d%|'fd&|(fd' jr|)ndfd(|*fd)|+fd*|,fd+ jr|-ndfd,|.fd-|/fd.|0ff
D ]#\}@}A|Adkrd|A  kr	|?k sn J |@ d/|A d0|? qtj |#dd1}Bt!|B j j" j# j$d2}Bt   }t%j&|$t%j'|Bj(d3}Ct%j)|B|Cdd4}D|Dj*d t+|C, - ksTJ d5t   }t.|	d6 | j|	d d7}Et   } jr|dury|d8 | }Fn|	d8 | }Fnt/d9}Ft   }t0| j |	d d }Gt.|	d6 |G j|	d d7}H|G j }I|	d d }|I|kr|}I jr|dur|d8 |I }Jn|	d8 |I }Jnt/d9}Jt   }|dkrdnd}K|dkrdnd}Lt   }Mi d:|Dd;|	d; d6|Ed<t%1 j2|	d;  d=t%j3d>t%j4d?d@dAdBt%5d j jdC jt%3dD d$ jr%|3nt|	d$ | dE jr5|:nt|	d$ | dF|dG|FdH|HdI|JdJ|KdK|LdL|rVdnd|r]dnd|
|'|( jrg|)nd|*|+|, jrq|-nd|.|/|0dMS )Na  
        Fetches images and action chunk sample by index.
        Returns action chunk rather than just single-step action.
        If the action chunk retrieval would go out of bounds, the last action is repeated however
        many times needed to fill up the chunk.

        Args:
            idx: Integer index to retrieve sample

        Returns:
            dict: Data sample: {
                video=images,
                actions=action chunk,
                t5_text_embeddings=text embedding,
                t5_text_mask=text embedding mask,
                fps=frames per second,
                padding_mask=padding mask,
                num_frames=number of frames per sequence,
                image_size=image size,
            }
        r   iw demoNr9   Zsuccess_rolloutrc   r   Zfailure_rolloutr   FrG   Tr   rf   rs   rp   r   rq   rr   r   r   r   r   r   r   r   r   c                 S   s   | | }t |S r   )r   )r   idxarrr,   r,   r-   _decode_one&  s   z-ALOHADataset.__getitem__.<locals>._decode_onerd   imgr8   c                    s@   | j d  jks| j d  jkrttj| dd jdS | S )Nr   rc   axis)r2   rK   r   r%   expand_dimssqueeze)r  r   r,   r-   _ensure_size<  s    z.ALOHADataset.__getitem__.<locals>._ensure_sizer   action_latent_idxvalue_latent_idxcurrent_proprio_latent_idxcurrent_wrist_image_latent_idxcurrent_wrist_image2_latent_idxcurrent_image_latent_idxfuture_proprio_latent_idxfuture_wrist_image_latent_idxfuture_wrist_image2_latent_idxfuture_image_latent_idx=z out of range for num_segments=r  )rK   r   rM   stronger_image_aug)r   device)dimz%Expanded T does not match repeats sumr   )r   relative_step_idxrJ   r   r   z-100videor   r   t5_text_maski   r   fps   padding_mask
image_size   future_proprio__key__value_function_returnnext_action_chunknext_value_function_returnrollout_data_maskrollout_data_success_maskworld_model_sample_mask)value_function_sample_maskglobal_rollout_idxr  r  r	  r
  r  r  r  r  r  r  )6timerO   rP   r	   r   r   r   r   r   maxgetattrr   r   r   r   r   rT   r%   randomrandr0   r.   rK   rJ   r   r   ndarray
zeros_liker"   rQ   rS   r$   stackr   r   rM   rN   torch	as_tensorlongr  repeat_interleaver2   r/   sumitemr
   r   minr  r   onesint64zeros)Nr   r   t0t_prevZsample_typeZglobal_step_idxepisode_idxr  episode_metadatar   r&  Zsuccess_idxZfailure_idxZis_world_model_sampleZis_value_function_sampleZp_world_modelrs   rf   r   r   Zfuture_frame_idxZmax_possible_idxZprimary_currentZleft_currentZright_currentZprimary_futureZleft_futureZright_futureZ	jpeg_fileprimary_keyleft_key	right_keyr   Zf_jr  r*   repeatsZ
cum_framesZsegment_idxr  r  r	  r
  r  r  r  r  r  r  Zref_image_for_shapeZblank_first_input_framer   imageZblank_proprio_imageleft_wrist_imageright_wrist_imageprimary_imageZblank_action_imager  Zfuture_left_wrist_imageZfuture_right_wrist_imagefuture_imageZvalue_imageZnum_segmentsnamer   Zall_unique_imageslengthsZ
all_imagesaction_chunkr  Znext_relative_step_idxr   Znext_future_frame_idxr!  r"  r#  Zt_nowr,   r   r-   __getitem__  s  
























































2$



	
zALOHADataset.__getitem__c                    sR  |d }t |d d v r d ndt fdddD }duo3dv o3tfd	dd
D }duoFdv oFtfddd
D }| joK|}| oS|oS| } d dd tj} d dd tj}	|rd}
d}d}d}d}d}d}d v rd}nd v rd}nd v rd}nd v rd}ntdd}d v rd}nd v rd}d}d v rd}n||sوd d dd }
d d dd }d d dd }d}d}nWdd }|d d |d d |d d d
}t	j
|fdd| D }| jrd}
d}d}d}nt|d | jd}
t|d | jd}t|d | jd}d}| jr=td|i| jd}| jrJtd|	i| jd}	t|
|||	||d  t|d! |d"}|rc||d< |rzd|d#< ||d$< ||d%< ||d&< ||d'< t|d(|d(< d)|v r|d) |d)< W d   |S W d   |S 1 sw   Y  |S )*zgLoad rollout episode data from HDF5 file using metadata (raw or MP4). Applies normalization if enabled.r   rd   re   Nc                 3   r   r   r,   r   r   r,   r-   rm   k  r   z:ALOHADataset._load_rollout_episode_data.<locals>.<genexpr>r   rf   c                 3   rg   rh   r,   ri   rk   r,   r-   rm   w  s    ro   rs   c                 3   rg   rt   r,   ri   rk   r,   r-   rm   |  rn   ru   rv   FTr   r   r   r   z/No JPEG datasets found to decode primary imagesrp   rq   rr   c                 S   rw   rx   ry   r~   r,   r,   r-   r     r   z;ALOHADataset._load_rollout_episode_data.<locals>._read_pathc                    r   r,   r   r   r   r,   r-   r     r   z;ALOHADataset._load_rollout_episode_data.<locals>.<dictcomp>r   r   r   r   r   )rf   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   r   r\   astyper%   float32r   r   r   r   r   rV   r.   rK   r   r   r   r   r   r/   r   r0   )r   r<  r   r   r   r   r   r   r   r   rf   r   r   r   r   rs   r=  r>  r?  r   r   r   r,   r   r-   r   e  s   







 
  
   z'ALOHADataset._load_rollout_episode_dataN)TrA   rB   rC   FTTTFFFFrD   rE   rD   FrF   FrC   rG   rG   FFFFTT)__name__
__module____qualname__r}   r   r/   r   r   r   r   r   r   rI  r   r,   r,   r,   r-   r@      s    

   t	   =r@   __main__rH   zusers/user/data/aloha/preprocessed/mixture_20250905_foldshirt15_candiesinbowl45_candyinbag45_eggplantchickenonplate80_185_demosrL   zusers/user/data/aloha/preprocessed/mixture_20250905_foldshirt15_candiesinbowl45_candyinbag45_eggplantchickenonplate80_185_demos/t5_embeddings.pklrJ   2   rM   TrN   rQ   r   r   rS   r  rZ   FrX   g?rY   rG   rT   rU   rF   rW   zZusers/user/data/aloha/rollout_data/mixture_20250921_648rollouts_505evalSuite_143candyInBagr\   r   c                 C   s
   d | S )Nz{0:0.3f})format)xr,   r,   r-   <lambda>  s   
 rS  )	formatterz
Images shape, dtype: r  zActions shape, dtype: r   z	Actions:
z!T5 text embeddings shape, dtype: r   zT5 text embeddings:
zUnique commands: z./temp)exist_okrc      z ./temp/video__global_step_index___r   z__is_rollout=r"  z__global_rollout_idx=r&  z__is_success_rollout=r#  z__value_function_return=r  z.4fz__frame_idx=z.pngzSaved image at path: r   r,   )<__doc__r   r   r*  r'  r   r   numpyr%   r/  PILr   torch.utils.datar   r   Z%cosmos_policy.datasets.dataset_commonr   r   r   r   r	   r
   r   r   $cosmos_policy.datasets.dataset_utilsr   r   r   r   r   r   r   set_printoptionsinfr/   r.   r4   r<   r?   r@   rL  datasetr   sampler   r2   r   r   makedirsranger   randintr$   Zglobal_step_indexpermuterf   i
image_path	fromarraysaver,   r,   r,   r-   <module>   s   ($
#        
p	
F!