o
    vi@                     @   s2  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ	 d dl
mZ d dlmZ d+dededB defd	d
Zd,dejdedejfddZdejdejfddZdejfddZdd Zd-ddZd-ddZdejdedejfdd Zd.dejd!edejfd"d#Z		$	d/dejd%ed&ed'ed(edejfd)d*ZdS )0    N)Image)
transforms)
functional)tqdmdata_diris_trainreturnc           	      C   s   g }t j| sJ d|  dt j| ddD ]H\}}}|D ]@}| dr^t j||}|durYt jt j|| 	t j
}|rMd|v rM|| q|sXd|v rX|| q|| qq|S )	a  
    Recursively get a list of all HDF5 files in the specified directory and its subdirectories,
    including those reached via symbolic links.

    Args:
        data_dir (str): Path to the directory to search
        is_train (bool | None): If None, returns all HDF5 files.
                                If True, returns only files in 'train' subdirectories.
                                If False, returns only files in 'val' subdirectories.

    Returns:
        list: List of paths to HDF5 files
    zError: Directory 'z' does not exist.T)followlinks)z.h5z.hdf5z.he5Ntrainval)ospathexistswalklowerendswithjoinnormpathrelpathsplitsepappend)	r   r   
hdf5_filesrootdirsfilesfilefilepath
path_parts r   J/data/cameron/vidgen/cosmos-policy/cosmos_policy/datasets/dataset_utils.pyget_hdf5_files   s"   

r!   _   image_npqualityc                    sf   dt jdt jffdd | jdkr | S | jdkr+ fdd| D }t j|d	d
S td| j )u  Apply JPEG compression/decompression to a NumPy image or batch of images.

    Accepts either a single image with shape (H, W, C) **or** a batch of images
    with shape (B, H, W, C). All inputs must be uint8 RGB.

    Args:
        image_np (np.ndarray): Input image(s) as uint8 array(s).
        quality (int): JPEG quality factor (1–95).

    Returns:
        np.ndarray: JPEG-compressed (and re-decoded) image(s) with the same shape
        as the input.
    imgr   c                    s\   | j tjksJ d| j  t| }t }|j|d d |d t	|}t
|S )z'JPEG-compress a single image (H, W, C).zExpected uint8 image but got JPEG)formatr$   r   )dtypenpuint8r   	fromarrayioBytesIOsaveseekopenarray)r%   Zpil_imgbufferZcompressed_img)r$   r   r    _compress_singleU   s   



z3apply_jpeg_compression_np.<locals>._compress_single      c                    s   g | ]} |qS r   r   ).0r%   )r3   r   r    
<listcomp>e   s    z-apply_jpeg_compression_np.<locals>.<listcomp>r   axiszEExpected image_np with shape (H, W, C) or (B, H, W, C) but got shape )r)   ndarrayndimstack
ValueErrorshape)r#   r$   
compressedr   )r3   r$   r    apply_jpeg_compression_npF   s   

r@   
jpeg_bytesc                 C   s&   t t|  }t|tjS )z5Decode a single JPEG frame from bytes to numpy array.)	r   r0   r,   r-   tobytesr)   r1   astyper*   )rA   r%   r   r   r    decode_single_jpeg_framek   s   rD   c                 C   sH   g }| D ]}t t| }|t| qtj|dd	tj
S )uG   Decode a variable-length JPEG byte dataset (T,) → (T, H, W, 3) uint8.r   r8   )r   r0   r,   r-   rB   r   r)   r1   r<   rC   r*   )Zjpeg_dsframesZjpeg_arrr%   r   r   r    decode_jpeg_bytes_datasetq   s
   rF   c                 C   s  g }g }t d t|  D ]\}}|d }|d }|| || qtj|dd}tj|dd}t d tj|dd}	tj|dd}
tj|dd}tj	|dd}tj
|dd}tj|dd}tj|dd}tj|dd}tj	|dd}tj
|dd}|	|
||||||||d
}|S )z
    Calculate statistics over all actions and proprio in the dataset.

    Args:
        data (dict): Dataset dictionary

    Returns:
        dict: Dataset statistics dictionary
    z%Collecting all actions and proprio...actionsproprior   r8   zComputing dataset statistics...)
actions_minactions_maxactions_meanactions_stdactions_medianproprio_minproprio_maxproprio_meanproprio_stdproprio_median)printr   itemsr   r)   concatenateminmaxmeanstdmedian)dataZall_actionsZall_proprioepisode_idxepisode_datarG   rH   Zall_actions_arrayZall_proprio_arrayrI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   statsr   r   r    calculate_dataset_statisticsz   sB   
r_   F      ?c                 C   s   i }|   D ]=\}}|| }|| d }	|| d }
|s+d||	 |
|	   d }n||	 |
|	  }|| }| }|||< |||< q|S )a  
    Rescale some dataset element to the range [-1,+1] or [0,+1].

    If `non_negative_only` is True, then the target range will be [0,+1]. Else, it will be [-1,+1].

    The `scale_multiplier` can be used to change the final range. For example, if `scale_multiplier==2.0`, then
    we use [-2,+2] instead of [-1,+1] (or [0,+2] instead of [0,+1] if `non_negative_only==True`).

    Args:
        data (dict): Dataset dictionary
        dataset_stats (dict): Dataset statistics (pre-normalization)
        data_key (str): Key to the item that should be normalized (e.g., "actions", "proprio")
        scale_multiplier (float): Multiplier to adjust scale from [-1,+1] to [-scale_multiplier,+scale_multiplier]

    Returns:
        dict: Rescaled dataset
    _min_max      )rT   copy)r[   dataset_statsdata_keynon_negative_onlyscale_multiplierZrescaled_datar\   r]   arrcurr_mincurr_maxrescaled_arrZrescaled_episoder   r   r    rescale_data   s   
rn   c           	      C   s^   | | }|| d }|| d }|s!d|| ||   d }n|| ||  }|| }|S )a  
    Rescale a single episode's data to the range [-1,+1] or [0,+1].

    Args:
        episode_data (dict): Single episode data dictionary
        dataset_stats (dict): Dataset statistics (pre-normalization)
        data_key (str): Key to the item that should be normalized (e.g., "actions", "proprio")
        non_negative_only (bool): If True, scale to [0,+1], else [-1,+1]
        scale_multiplier (float): Multiplier to adjust scale

    Returns:
        np.ndarray: Rescaled array
    ra   rb   rc   rd   r   )	r]   rf   rg   rh   ri   rj   rk   rl   rm   r   r   r    rescale_episode_data   s   ro   imagestarget_sizec                 C   s   t | jdksJ dt | j | jdd ||dfkr!|  S | jd }| jd }tj||||f| jd}t|D ]}tt	| | 
||f||< q;|S )aM  
    Resizes multiple images to some target size.

    Assumes that the resulting images will be square.

    Args:
        images (np.ndarray): Input images with shape (T, H, W, C)
        target_size (int): Target image size (square)

    Returns:
        np.ndarray: Resized images with shape (T, target_size, target_size, C)
    r5   z)Expected 4 dimensions in images but got: Nr4   r   )r(   )lenr>   re   r)   emptyr(   ranger1   r   r+   resize)rp   rq   
num_imagesCZresized_imagesir   r   r    resize_images   s   "

$rz   strongerc              
   C   s  | j \}}}}||ksJ d| jtjksJ d| j | dddd} g }t| }d}||k rs|}|d |k r[t| | | |d  r[|d7 }|d |k r[t| | | |d  sE|d }	|	| }
|||	|
f |d7 }||k s1tj	j
t||ddd	\}}}}|rtdd
d }nd}|rtddd }tddd }tddd }n!tddd }tddd }tddd }tddd }g }t|D ]I\}\}}	}
| | }tj|||||||gdd}|rtj||dd}t||}t||}t||}t||}t|
D ]}|| q*qt|}|dddd}|S )a  
    Apply image augmentations to a batch of images represented as a torch.Tensor of shape (C, T, H, W).

    Args:
        images: A torch.Tensor of shape (C, T, H, W) and dtype torch.uint8 representing a set of images.
        stronger (bool): Whether to apply stronger augmentations

    Returns:
        A torch.Tensor of the same shape and dtype with augmentations applied.
    z$Image height and width must be equalz.Expected images dtype == torch.uint8 but got: rd   r   rc   r4   )?r|   )r`   r`   )r%   scaleratio   g        gffffff?g?g333333?gffffff?      ?g      ?g?g333333?gg?T)size	antialiasF)expand)r>   r(   torchr*   permuters   equalr   TRandomResizedCrop
get_paramszerosFloatTensoruniform_item	enumerateFresized_croprotateadjust_brightnessadjust_contrastadjust_saturation
adjust_hueru   r<   )rp   r{   _HWZunique_groupsrw   ry   Zgroup_startZ	group_end
group_sizejhwanglebrightness_factorcontrast_factorsaturation_factor
hue_factorresults	group_idxr%   Zaugmented_imagesr   r   r    apply_image_aug!  sb   $$

r   Tfinal_image_sizenormalize_imagesuse_image_augstronger_image_augc                 C   s   t | tjsJ dt|  | jtjksJ d| j t| jdkr+| jd dks3J d| j t| |} t	| d} t
| } |rKt| |d} |ri| t
j} | d	 } tjg d
g d
dd}|| } | S | t
j} | S )a  
    Preprocesses images for training.

    Resizes to final_image_size, permutes from (T, H, W, C) to (C, T, H, W),
    converts to torch.Tensor, optionally applies image augmentations, and optionally normalizes (no need to
    normalize if, e.g., the dataloader logic will normalize later).

    Args:
        images (np.ndarray): Images to be preprocessed
        final_image_size (int): Target size for resized images (square)
        normalize_images (bool): Whether the images should be normalized in the end
        stronger_image_aug (bool): Whether to apply stronger image augmentations

    Returns:
        torch.Tensor: Preprocessed images
    z/Images are not of type `np.ndarray`! Got type: z0Images do not have dtype `np.uint8`! Got dtype: r5   r4   z8Unexpected images shape! Expected (T, H, W, 3) but got: )r4   r   rd   rc   )r{   g     o@)r   r   r   T)rX   rY   inplace)
isinstancer)   r:   typer(   r*   rs   r>   rz   	transposer   
from_numpyr   tofloat32r   	Normalize)rp   r   r   r   r   	norm_funcr   r   r    preprocess_image{  s$   


r   )N)r"   )Fr`   )F)FTF)r,   r   numpyr)   r   PILr   torchvisionr   r   Ztorchvision.transformsr   r   r   strboollistr!   r:   intr@   rD   rF   r_   rn   ro   rz   Tensorr   r   r   r   r   r    <module>   sB   +%	
9
."]