o
    ?߱i/(                  3   @   s^  d dl Z z
d dlmZ dZW n ey   dZY nw d dlmZmZ d dlm	Z	 d dl
m  m  m  m  m  mZ d dlm  m  m  m  m  mZ d dlm  m  m  m  mZ d dlm  m  m  m  mZ d dlm  m  m  m  mZ d dlm  m  m  mZ d dlmZ d dl m!Z! d d	l"m#Z# d d
l$m%Z% d dl&m'Z'm(Z( 			 																		d7de)de)de)de*de+de+de+de+d e)d!e)d"ee) d#e)d$e)d%e*d&e+d'e+d(e+d)e+d*ee d+e*d,e*d-e*d.e*d/e*d0e j,j-f2d1d2Z.			3			4	d8de)de)d e)de*d!e)d"e)d%e*d#e)d$e)d0e j,j-fd5d6Z/dS )9    N)parallel_stateTF)CallableOptional)warn_and_continue)DatasetConfig)log)AUGMENTOR_OPTIONS)DATASET_OPTIONS)IMAGE_RES_SIZE_INFOVIDEO_RES_SIZE_INFOy   
   <   allvideo_basic_augmentor_v1s3t2w_qwen2p5_7bt5_xxl         Z   dataset_namevideo_decoder_name
resolutionis_trainnum_video_frames
chunk_sizemin_fps_thresmax_fps_thresdataset_resolution_typeaugmentor_nameobject_storecaption_typeembedding_type
detshufflelong_caption_ratiomedium_caption_ratioshort_caption_ratiouser_caption_ratiodataset_info_fnuse_native_fpsuse_original_fpsuse_random_consecutive_framesuse_random_interleaved_framesprefer_crop_over_padreturnc                 C   s  |t  v s
J d|
dv sJ dg d}|dkr"|	|v s"J d|	|v r.|dks.J d|dv s:J d	| d
|
sH|d usDJ d| }nt|  }||
|||}t|	 |||||||||||||||d}trt rt dkszt dkrt	
dt  dt  d tjddddd|d}d}ntjddddd|d}tg dd||tj|||||dtjg|dd d	}tj|t|dS )Nz?The provided resolution cannot be found in VIDEO_RES_SIZE_INFO.)r   
swiftstackgcpFz>We support s3 and swiftstack only, or False for local loading.)Zvideo_basic_augmentor_v2Z%video_basic_augmentor_v2_with_controlZ+noframedrop_nocameramove_video_augmentor_v1Zvideo_naive_byteszHWe can only use video_basic_augmentor_v2 with video_naive_bytes decoder.zHWe can only use video_naive_bytes decoder with video_basic_augmentor_v2.r   Zgt720pZgt1080p%The provided dataset resolution type  is not supported.z.dataset_info_fn is required for local loading.)r   r#   r$   Zmin_fpsZmax_fpsr&   r'   r(   r)   r   r+   r,   r-   r.   r/   r   Using parallelism size CP :, TP :zO for video dataset, switch to ShardlistMultiAspectRatioParallelSync distributorTshufflesplit_by_nodesplit_by_workerZresume_flagverboseZis_infinite_loaderFd   )r   sequence_lengthr   r   r   )	keysbuffer_sizestreaming_downloaddataset_infodistributordecodersaugmentationZremove_extension_from_keysZsample_keys_full_list_path)configZdecoder_handlerr%   )r   r?   r	   r   USE_MEGATRONr   is_initializedget_context_parallel_world_size$get_tensor_model_parallel_world_sizer   critical parallel_sync_multi_aspect_ratio%ShardlistMultiAspectRatioParallelSyncdistributorsShardlistMultiAspectRatior   video_decoderZconstruct_video_decoderpickle_decoderspkl_decoder
webdatasetDatasetr   )r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   Zbasic_augmentor_namesrB   Z	augmentorrC   Zvideo_data_config rU   a/data/cameron/vidgen/cosmos-predict2.5/cosmos_predict2/_src/predict2/datasets/dataset_provider.pyget_video_dataset)   s   




		
rW   image_basic_augmentorai_v3p1c	              	   C   s  |t  v s
J d|dv sJ d|dv sJ d| dt|  }	|	||||}
t| |||d}t r`t dksBt dkr`t	d	t  d
t  d t
jddddd|d}d}ntjddddd|d}tg dd|
|tjtjg|d}tj||dS )Nz?The provided resolution cannot be found in IMAGE_RES_SIZE_INFO.)r   r1   r2   z'We support s3, gcp and swiftstack only.r3   r4   r5   )r   r#   r$   r   r6   r7   zO for image dataset, switch to ShardlistMultiAspectRatioParallelSync distributorTr8   F   )r?   r@   rA   rB   rC   rD   rE   )rF   r%   )r
   r?   r	   r   r   rH   rI   rJ   r   rK   rL   rM   rN   rO   r   image_decoders
pil_loaderrQ   rR   rS   rT   )r   r   r    r   r!   r"   r%   r#   r$   r*   rB   rE   rC   Zimage_data_configrU   rU   rV   get_image_dataset   s^   

		r]   )Tr   r   r   r   r   r   r   r   r   Fr   r   r   r   NTFFFF)r   TrX   r   FrY   r   )0	omegaconfmegatron.corer   rG   ImportErrortypingr   r   Zwebdataset.handlersr   ZBcosmos_predict2._src.imaginaire.datasets.webdataset.decoders.image_src
imaginairedatasetsrS   rD   imager[   ZCcosmos_predict2._src.imaginaire.datasets.webdataset.decoders.picklepicklerQ   Z@cosmos_predict2._src.imaginaire.datasets.webdataset.distributorsrN   Z=cosmos_predict2._src.predict2.datasets.decoders.video_decoderpredict2rP   ZScosmos_predict2._src.predict2.datasets.distributor.parallel_sync_multi_aspect_ratiorC   rL   Z1cosmos_predict2._src.predict2.datasets.webdatasetZAcosmos_predict2._src.imaginaire.datasets.webdataset.config.schemar   %cosmos_predict2._src.imaginaire.utilsr   Z9cosmos_predict2._src.predict2.datasets.augmentor_providerr   ZEcosmos_predict2._src.predict2.datasets.data_sources.data_registrationr	   ,cosmos_predict2._src.predict2.datasets.utilsr
   r   strboolint
dictconfig
DictConfigrW   r]   rU   rU   rU   rV   <module>   s   **$$$	

 
	
