o
    vi'                  1   @   sX  d dl Z z
d dlmZ dZW n ey   dZY nw d dlmZmZ d dlm	Z	 d dl
m  m  m  m  m  mZ d dlm  m  m  m  m  mZ d dlm  m  m  m  mZ d dlm  m  m  m  mZ d dlm  m  m  m  mZ d dlm  m  m  mZ d dlmZ d dl m!Z! d d	l"m#Z# d d
l$m%Z% d dl&m'Z'm(Z( 			 																	d6de)de)de)de*de+de+de+de+d e)d!e)d"ee) d#e)d$e)d%e*d&e+d'e+d(e+d)e+d*ee d+e*d,e*d-e*d.e*d/e j,j-f0d0d1Z.			2			3	d7de)de)d e)de*d!e)d"e)d%e*d#e)d$e)d/e j,j-fd4d5Z/dS )8    N)parallel_stateTF)CallableOptional)warn_and_continue)DatasetConfig)log)AUGMENTOR_OPTIONS)DATASET_OPTIONS)IMAGE_RES_SIZE_INFOVIDEO_RES_SIZE_INFOy   
   <   allvideo_basic_augmentor_v1s3t2w_qwen2p5_7bt5_xxl         Z   dataset_namevideo_decoder_name
resolutionis_trainnum_video_frames
chunk_sizemin_fps_thresmax_fps_thresdataset_resolution_typeaugmentor_nameobject_storecaption_typeembedding_type
detshufflelong_caption_ratiomedium_caption_ratioshort_caption_ratiouser_caption_ratiodataset_info_fnuse_native_fpsuse_original_fpsuse_random_consecutive_framesuse_random_interleaved_framesreturnc                 C   s  |t  v s
J d|
dv sJ dg d}|dkr"|	|v s"J d|	|v r.|dks.J d|dv s:J d	| d
|
sH|d usDJ d| }nt|  }||
|||}t|	 ||||||||||||||d}trt rt dksyt dkrt	
dt  dt  d tjddddd|d}d}ntjddddd|d}tg dd||tj|||||dtjg|dd d	}tj|t|dS )Nz?The provided resolution cannot be found in VIDEO_RES_SIZE_INFO.)r   
swiftstackgcpFz>We support s3 and swiftstack only, or False for local loading.)video_basic_augmentor_v2Z%video_basic_augmentor_v2_with_controlZ+noframedrop_nocameramove_video_augmentor_v1video_naive_byteszHWe can only use video_basic_augmentor_v2 with video_naive_bytes decoder.zHWe can only use video_naive_bytes decoder with video_basic_augmentor_v2.r   gt720pZgt1080p%The provided dataset resolution type  is not supported.z.dataset_info_fn is required for local loading.)r   r#   r$   min_fpsmax_fpsr&   r'   r(   r)   r   r+   r,   r-   r.   r   Using parallelism size CP :, TP :zO for video dataset, switch to ShardlistMultiAspectRatioParallelSync distributorTshuffleZsplit_by_nodeZsplit_by_workerZresume_flagverboseZis_infinite_loaderFd   )r   sequence_lengthr   r   r   )	keysbuffer_sizestreaming_downloaddataset_infodistributordecodersaugmentationZremove_extension_from_keysZsample_keys_full_list_path)configZdecoder_handlerr%   )r   rA   r	   r   USE_MEGATRONr   is_initializedget_context_parallel_world_size$get_tensor_model_parallel_world_sizer   critical parallel_sync_multi_aspect_ratio%ShardlistMultiAspectRatioParallelSyncdistributorsShardlistMultiAspectRatior   video_decoderZconstruct_video_decoderpickle_decoderspkl_decoder
webdatasetDatasetr   )r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   Zbasic_augmentor_namesrD   Z	augmentorrE   Zvideo_data_config rW   [/data/cameron/vidgen/cosmos-policy/cosmos_policy/_src/predict2/datasets/dataset_provider.pyget_video_dataset)   s   




		
rY   image_basic_augmentorai_v3p1c	              	   C   s  |t  v s
J d|dv sJ d|dv sJ d| dt|  }	|	||||}
t| |||d}t r`t dksBt dkr`t	d	t  d
t  d t
jddddd|d}d}ntjddddd|d}tg dd|
|tjtjg|d}tj||dS )Nz?The provided resolution cannot be found in IMAGE_RES_SIZE_INFO.)r   r0   r1   z'We support s3, gcp and swiftstack only.r4   r6   r7   )r   r#   r$   r   r:   r;   zO for image dataset, switch to ShardlistMultiAspectRatioParallelSync distributorTr<   F   )rA   rB   rC   rD   rE   rF   rG   )rH   r%   )r
   rA   r	   r   r   rJ   rK   rL   r   rM   rN   rO   rP   rQ   r   image_decoders
pil_loaderrS   rT   rU   rV   )r   r   r    r   r!   r"   r%   r#   r$   r*   rD   rG   rE   Zimage_data_configrW   rW   rX   get_image_dataset   s^   

		r_   )Tr   r   r   r   r   r   r   r   r   Fr   r   r   r   NTFFF)r   TrZ   r   Fr[   r   )0	omegaconfmegatron.corer   rI   ImportErrortypingr   r   Zwebdataset.handlersr   Z@cosmos_policy._src.imaginaire.datasets.webdataset.decoders.image_src
imaginairedatasetsrU   rF   imager]   ZAcosmos_policy._src.imaginaire.datasets.webdataset.decoders.picklepicklerS   Z>cosmos_policy._src.imaginaire.datasets.webdataset.distributorsrP   Z;cosmos_policy._src.predict2.datasets.decoders.video_decoderpredict2rR   ZQcosmos_policy._src.predict2.datasets.distributor.parallel_sync_multi_aspect_ratiorE   rN   Z/cosmos_policy._src.predict2.datasets.webdatasetZ?cosmos_policy._src.imaginaire.datasets.webdataset.config.schemar   #cosmos_policy._src.imaginaire.utilsr   Z7cosmos_policy._src.predict2.datasets.augmentor_providerr   ZCcosmos_policy._src.predict2.datasets.data_sources.data_registrationr	   *cosmos_policy._src.predict2.datasets.utilsr
   r   strboolint
dictconfig
DictConfigrY   r_   rW   rW   rW   rX   <module>   s   **$$$	

 	
