o
    ?߱i                     @   s\   d dl mZmZ d dlZd dlZd dlZd dlmZ G dd dej	Z
G dd dej	ZdS )    )DictUnionN)instantiatec                	   @   s^   e Zd ZdZdeeeeeejj	j
ejef f f fddZdefddZdd	 Zd
d ZdS )IterativeJointDataLoaderzJ
    A joint dataloader that supports loading both images and videos.
    dataloadersc                 K   s   g g g | _ | _| _| D ]-\}}t| ddhks$J d| | j| | j t|d  | j|d  qd| _t	| j| _
d| _dd | j D | _| j D ]}|  jt|7  _qUdS )a  
        Initialize the JointDataLoader with multiple datasets.

        Args:
            dataloaders: key - dataset_name; value - {"dataloader": dataloader, "ratio": data_ratio}

        Example:
            joint_loader = IterativeJointDataLoader(
                dataloaders{
                    "image_data": {
                        "dataloader": webdataset.WebLoader(...),
                        "ratio": 4,
                    },
                    "video_data": {
                        "dataloader": torch.utils.data.DataLoader(...),
                        "ratio": 1,
                    },
                }
            )
        
dataloaderratioInvalid config: r   c                 S      g | ]}t |qS  iter.0r   r   r   a/data/cameron/vidgen/cosmos-predict2.5/cosmos_predict2/_src/predict2/datasets/joint_dataloader.py
<listcomp>A       z5IterativeJointDataLoader.__init__.<locals>.<listcomp>N)dataloader_listdataset_name_listdata_ratiositemssetkeysappendr   	global_idsum	ratio_sumdata_lenr   len)selfr   kwargsdataset_namedataloader_datadatar   r   r   __init__   s   "
z!IterativeJointDataLoader.__init__returnc                 C      | j S Nr   r   r   r   r   __len__E      z IterativeJointDataLoader.__len__c                 c   sR    	 | j | j }| |}| j| }t|}| j| |d< |  j d7  _ ~|V  q)NTr!      )r   r   _get_dataloader_indexr   nextr   )r   data_idZindex_idcurr_dataloaderoutputr   r   r   __iter__H   s   

z!IterativeJointDataLoader.__iter__c                 C   s4   t | jD ]\}}||k r|  S ||8 }qtd)zDMaps global id to the corresponding dataloader index based on ratio.zInvalid data_id)	enumerater   
ValueError)r   r/   irr   r   r   r-   S   s
   
z.IterativeJointDataLoader._get_dataloader_indexN)__name__
__module____qualname____doc__r   strr   torchutilsr#   
DataLoader
webdataset	WebLoaderintr$   r*   r2   r-   r   r   r   r   r      s    $
'r   c                	   @   sV   e Zd ZdZdeeeeeejj	j
ejef f f fddZdefddZdd	 Zd
S )RandomJointDataLoaderz[
    A joint dataloader that supports randomly samples batches from multiple datasets.
    r   c                 C   s   g g g | _ | _| _| D ]-\}}t| ddhks$J d| | j| | j t|d  | j|d  qt	t
| jdsJJ dd| _dd | j D | _| j D ]}|  jt|7  _qYd	S )
a	  
        Initialize the JointDataLoader with multiple datasets.

        Args:
            **kwargs: Arbitrary keyword arguments where each key is a string
                      representing the dataset name, and each value is either
                      a `webdataset.WebLoader` or `torch.utils.data.DataLoader`
                      instance.

        Raises:
            AssertionError: If any value in kwargs is not an instance of
                            `webdataset.WebLoader` or `torch.utils.data.DataLoader`.
            AssertionError: If any key in kwargs is not a string.

        Example:
            joint_loader = JointDataLoader(
                images=webdataset.WebLoader(...),
                videos=torch.utils.data.DataLoader(...)
            )
        r   r   r	   g      ?z1Sum of sample probabilities should be equal to 1.r   c                 S   r
   r   r   r   r   r   r   r      r   z2RandomJointDataLoader.__init__.<locals>.<listcomp>N)r   r   r   r   r   r   r   r   npiscloser   r   r   r   )r   r   r!   r"   r#   r   r   r   r$   b   s   "
zRandomJointDataLoader.__init__r%   c                 C   r&   r'   r(   r)   r   r   r   r*      r+   zRandomJointDataLoader.__len__c                 c   sP    	 t tjjt| jd| jdd }| j| }t|}| j	| |d< ~|V  q)NTr,   )pr   r!   )
rA   rC   randomchoicer   r   r   r   r.   r   )r   r/   r0   r1   r   r   r   r2      s   "
zRandomJointDataLoader.__iter__N)r7   r8   r9   r:   r   r;   r   r<   r=   r#   r>   r?   r@   rA   r$   r*   r2   r   r   r   r   rB   \   s    $
&rB   )typingr   r   numpyrC   r<   r?   +cosmos_predict2._src.imaginaire.lazy_configr   r@   r   rB   r   r   r   r   <module>   s   C