o
    vi                     @   s   d dl mZmZ d dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZmZmZ d dlmZ d d	lmZ G d
d deZdS )    )CallableOptionalN)filters)reraise_exception)DatasetConfig)
WebDataset)remove_extensions_from_keys	skip_keys
update_url)Dataset)logc                	       sH   e Zd Zeddfdededee def fddZd	e	fd
dZ
  ZS )r   NFconfighandlerdecoder_handler
detshufflec                    s    t  j||d || _|| _dS )zWebdataloader class

        Args:
            config: Dataset config
            handler (Callable): Error handler for webdataset class
            decoder_handler (Callable): Error handler during decoding
        )r   r   N)super__init__r   r   )selfr   r   r   r   	__class__ U/data/cameron/vidgen/cosmos-policy/cosmos_policy/_src/predict2/datasets/webdataset.pyr   #   s   
zDataset.__init__returnc                 K   s  | j j}t|}|dksJ dt| jd| j j}| jj}|| || j j t	|| j
| j| j| j| jd}| jrD|t| n|t| t| jdg }g }|D ]}	t|	tsft|	sfJ d||	 qW|tj|d| ji | jjr|t |t t| jdd	}
t|
ttjjfsJ d
t |
 | !|
}|| |t" | j j#|_$t%&d|  t%&d|j$  |S )z
        Build the dataset object.
        The function only diffs from BaseDataset.build_dataset by only adding the decoder_handler to the WebDataset object.
        r   zDid not find any data.buffer_size)load_from_object_storeeasy_io_backendZs3_bucket_namestreaming_downloadr   decodersz*Decoder should either be callable or a strr   augmentationNzgetting type: z#Total number of training shards: %dzTotal training key count: %d)'wdinfo	tar_fileslengetattrr   
chunk_sizedistributorset_urlsset_chunk_sizer   Zuse_object_storer   bucketr   r   r   appendr   wdsshuffle
isinstancestrcallabledecoder   remove_extension_from_keysr   r	   dict	omegaconf
dictconfig
DictConfigtypeZbuild_data_augmentorr
   total_key_countZtotal_imagesr   info)r   kwargsZtar_listZnum_tarsZshuffle_buffer_sizeZdistributor_fndatasetZdecoder_listZdecoder_functionsdecoderZaugmentor_cfgZaugmentation_fnr   r   r   build_dataset5   sL   







zDataset.build_dataset)__name__
__module____qualname__r   r   r   r   boolr   r   r:   __classcell__r   r   r   r   r   "   s    r   )typingr   r   r1   
webdatasetr)   r   webdataset.handlersr   ?cosmos_policy._src.imaginaire.datasets.webdataset.config.schemar   ZAcosmos_policy._src.imaginaire.datasets.webdataset.utils.iteratorsr   <cosmos_policy._src.imaginaire.datasets.webdataset.utils.miscr   r	   r
   Z<cosmos_policy._src.imaginaire.datasets.webdataset.webdatasetr   ZBaseDataset#cosmos_policy._src.imaginaire.utilsr   r   r   r   r   <module>   s   