o
    x}i
                  	   @   sp   d Z ddlZddlmZ ddlZddlZddlmZ dZ	dZ
dZG dd	 d	eZddedededefddZdS )z
Dataset for image-to-video fine-tuning: random N-frame windows from robot episodes.
Each sample is a contiguous clip; first frame is conditioning, full clip is target.
Output format matches DiffusionEngine: "jpg" = video in [-1, 1], (B, T, C, H, W).
    N)Path)Dataseti@  i      c                   @   sz   e Zd ZdZdeeeddfdededededed	edB d
edB fddZ	dd Z
dedejfddZdedefddZdS )RobotVideoDatasetz
    Samples random contiguous N-frame windows from episode directories.
    Each episode dir contains 000000.png, 000001.png, ... (and optional .npy files).
       Ndataset_root
num_framestarget_num_framesheightwidthmax_episodesseedc                    sj  t || _|| _|| _|| _|| _|d urt| | j s't	d| j t
dd | j D }|d ur=|d | }t|dkrKt	d| j g | _|D ]>}	t
dd |	dD }
t
dd |
D }t| |D ] fd	dt|D }t fd
d|D r| j|	f qmqPt| jdkrt	d| dt| dtdt| dt| j d d S )NzDataset root not found: c                 S   s"   g | ]}|  rd |jv r|qS )episode)is_dirname).0d r   N/data/cameron/vidgen/generative-models/scripts/training/robot_video_dataset.py
<listcomp>/   s   " z.RobotVideoDataset.__init__.<locals>.<listcomp>r   zNo episodes in c                 S   s   g | ]	}|j  r|qS r   )stemisdigitr   fr   r   r   r   9   s    z*.pngc                 S   s   g | ]}t |jqS r   )intr   r   r   r   r   r   :   s    c                    s   g | ]} | qS r   r   r   i)	start_idxr   r   r   =       c                 3   s    | ]}| v V  qd S Nr   r   )	frame_setr   r   	<genexpr>>   s    z-RobotVideoDataset.__init__.<locals>.<genexpr>z	No valid z-frame windows in z: episodes. Ensure episodes have consecutive frame indices.zRobotVideoDataset: z episodes, z samples)r   r   r   r	   r
   r   randomr   exists
ValueErrorsortediterdirlensamplesglobsetrangeallappendprint)selfr   r   r	   r
   r   r   r   episode_dirsep_dirframe_filesframe_indicesneedr   )r    r   r   __init__   s@   



$zRobotVideoDataset.__init__c                 C   s
   t | jS r   )r'   r(   )r/   r   r   r   __len__G   s   
zRobotVideoDataset.__len__pathreturnc                 C   s&   ddl m} t||d}|S )Nr   )ImageRGB)PILr9   nparrayopenconvert)r/   r7   r9   imgr   r   r   _load_frameJ   s   zRobotVideoDataset._load_frameidxc           	         s*  j | \}}g }tjD ]}|| }||dd }|| qtj|ddtjd dd l	 tj fddtj
d D ddj
d jk rqtdd  jj
d  d	d	d	f}tj|gddnj
d jkrd j tdd
d	dd d diS )N06dz.pngr   )axisg     o@c                    s*   g | ]} j | jjf jd qS ))interpolation)resizer   r
   INTER_LINEAR)r   tcv2r/   videor   r   r   [   s    
z1RobotVideoDataset.__getitem__.<locals>.<listcomp>         g       @g      ?jpg)r(   r+   r   r-   rA   r<   stackastypefloat32rJ   shaper	   tileconcatenatetorch
from_numpypermute)	r/   rB   r1   r   framesr   	frame_idxr7   padr   rI   r   __getitem__O   s2   
zRobotVideoDataset.__getitem__)__name__
__module____qualname____doc__
NUM_FRAMESVIDEO_HVIDEO_Wstrr   r5   r6   r   r<   ndarrayrA   dictr]   r   r   r   r   r      s4    
.r   {Gz?      batchcond_augfps_idmotion_bucket_idc                 C   s   t dd | D }|j\}}}}}	|ddddf  }
|
}|
|t |
  }|||t j|g| t jdt j|g| t jdt j|g| t jd|t j||t jddS )zf
    Collate batch and add conditioning keys expected by DiffusionEngine / StandardDiffusionLoss.
    c                 S   s   g | ]}|d  qS )rP   r   )r   br   r   r   r   x   r   z'collate_robot_video.<locals>.<listcomp>NrM   )dtype)rP   cond_frames_without_noisecond_framesrm   rn   rl   num_video_framesimage_only_indicator)	rW   rQ   rT   clone
randn_liketensorlongrS   zeros)rk   rl   rm   rn   videosBTCHWfirst_framerq   rr   r   r   r   collate_robot_videot   s   r   )rh   ri   rj   )ra   r"   pathlibr   numpyr<   rW   torch.utils.datar   rc   rd   rb   r   listfloatr   r   r   r   r   r   <module>   s     a