o
    o
ju1                     @   s   d Z ddlmZ ddlZddlZddlZddlmZ ddl	m
Z ejg dejdZejg dejdZG d	d
 d
eZG dd deZdd Zdd ZdS )ac  History-window dataset for the autoregressive transformer policy.

Wraps the per-demo cached arrays produced by prerender_dataset.py and yields
8-frame (or N-frame) history windows: past H frames + their EEF pixel coords,
plus the next-step EEF pixel as prediction target.

Designed to be a drop-in alternative to CachedTrajectoryDataset for train_ar.py.
    )PathN)Dataset)Rotation)g
ףp=
?gv/?gCl?dtype)gZd;O?gy&1?g?c                   @   >   e Zd ZdZ						ddd	Zd
d Zdd Zdd ZdS )HistoryTrajectoryDatasetu  Yields (history_imgs[H,3,448,448], history_eef_xy[H,2], target_eef_xy[2]) windows.

    Layout under cache_root mirrors CachedTrajectoryDataset (data.py):
        <benchmark>/task_<id>/demo_<idx>/
            frames/000000.png ...
            pix_uv.npy        (T, 2)  EEF pixel coords in image-pixel space
            eef_pos.npy       (T, 3)  — kept for stats, not required here
            ...

    Sample index = (demo_idx, t) where t is the frame whose NEXT step is the prediction
    target (so we need pix_uv[t+1] to exist). History: pix_uv[t-H+1 .. t] (left-padded
    by repeating the earliest available frame if the demo starts within H of t).
    libero_spatialN        r   c              
      s^  || _ || _|| _t|| }| std| t|d}	 d ur. fdd|	D }	g | _g | _	|	D ]_}
d}t|
dD ]S}|dkrM||krM nH|d }| sVqAt|d}|s`qAt
|}|d	k riqA|t|d
 |d}t
| j}| j| t|d D ]
}| j	||f q|d7 }qAq6tdt
| j dt
| j	 d| d| d	 d S )NCache not found: task_*c                    (   g | ]}t |jd d  v r|qS _r   intnamesplit.0dtask_ids $/data/cameron/para/libero/data_ar.py
<listcomp>9      ( z5HistoryTrajectoryDataset.__init__.<locals>.<listcomp>r   demo_*frames*.png   
pix_uv.npy)frame_pathspix_uvTr   zHistoryTrajectoryDataset:  demos, z samples (H=	, stride=))
image_sizehistory_lenframe_strider   existsFileNotFoundErrorsortedglobdemossampleslennploadappendrangeprint)self
cache_rootbenchmark_namer   r*   r+   r,   	max_demos
bench_root	task_dirstask_dirtask_demo_countdemo_dir
frames_dirr$   r&   demodemo_idxtr   r   r   __init__%   sP   


z!HistoryTrajectoryDataset.__init__c                 C   
   t | jS Nr3   r2   r9   r   r   r   __len__[      
z HistoryTrajectoryDataset.__len__c                 C      t t|}t |t jtjd }|jd | j	ks$|jd | j	kr1t j
|| j	| j	ft jd}|t t }t|ddd S Ng     o@r   r   )interpolationr"   cv2imreadstrZcvtColorZCOLOR_BGR2RGBastyper4   float32shaper*   resizeZINTER_LINEARIMAGENET_MEANIMAGENET_STDtorch
from_numpypermutefloatr9   pathZbgrrgbr   r   r   _load_frame^   s    z$HistoryTrajectoryDataset._load_framec           	         s  j | \}j| d }j j fddt D }t |d }tjfdd|D dd}tjfdd|D dd		tj
}d
 | 	tj
}t|djd }t|djd }|t| t| tj|tjdtjtjddS )Nr&   c                    s&   g | ]}t d  d |   qS )r   r   )maxr   k)HsrE   r   r   r   o   s   & z8HistoryTrajectoryDataset.__getitem__.<locals>.<listcomp>r   c                       g | ]}  d  | qS r$   ra   r   irC   r9   r   r   r   s       r   dimc                       g | ]} d  | qS r%   r   rj   rC   r   r   r   t       axisr%   r   )Zhistory_imgsZhistory_eef_xyZtarget_eef_xyrD   start_t)r2   r1   r+   r,   r7   minrZ   stackr4   rT   rU   clipr*   r[   r]   tensorlong)	r9   idxrD   r&   Zhist_idxtgt_idximgseef_xy	target_xyr   )re   rC   rf   r9   rE   r   __getitem__g   s$   
$z$HistoryTrajectoryDataset.__getitem__)r	   Nr
   r   r   r   __name__
__module____qualname____doc__rF   rK   ra   r   r   r   r   r   r      s    
6	r   c                   @   r   )WindowTrajectoryDataseta  Yields W consecutive frames + their EEF pixel coords, for multi-target AR training.

    For a W-frame window, the AR model with attention context H predicts the EEF at each step
    t in [H, W-1] using frames[t-H:t]. So one window contributes (W - H) supervision targets,
    all sharing one DINO forward.

    Sample = (demo_idx, start_t). The window spans frames [start_t, start_t + W - 1] (clamped
    at demo end via left-pad of the last valid frame).
    r	   Nr
      r   r   c                    s  || _ || _|| _t|| }| std| t|d}	 d ur. fdd|	D }	g | _g | _	|	D ]|}
d}t|
dD ]p}|dkrM||krM ne|d }| sVqAt|d}|ret
|d	k rfqAt
|}|t|d
 t|d t|d t|d t|d t|d |d}t
| j}| j| t|D ]
}| j	||f q|d7 }qAq6tdt
| j dt
| j	 d| d| d	 d S )Nr   r   c                    r   r   r   r   r   r   r   r      r   z4WindowTrajectoryDataset.__init__.<locals>.<listcomp>r   r   r    r!   r"   r#   zeef_pos.npyzeef_quat.npyzgripper.npyzcam_extrinsic.npyzcam_K_norm.npy)r$   r%   eef_poseef_quatgrippercam_extrinsic
cam_K_normr&   r   zWindowTrajectoryDataset: r'   z samples (W=r(   r)   )r*   
window_lenr,   r   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   )r9   r:   r;   r   r*   r   r,   r<   r=   r>   r?   r@   rA   rB   r$   r&   rC   rD   rv   r   r   r   rF      sV   



z WindowTrajectoryDataset.__init__c                 C   rG   rH   rI   rJ   r   r   r   rK      rL   zWindowTrajectoryDataset.__len__c                 C   rM   rN   rP   r^   r   r   r   ra      s    z#WindowTrajectoryDataset._load_framec                    sl  j | \}j| d  j}j fddt|D tjfddD dd}tjfddD ddtj	}tjfd	dD ddtj	}tjfd
dD ddtj	}tjfddD ddtj	}t
|djd }ztjdd |D ddtj	}	W n ty   t|}	Y nw  d tjfddtD tjd}
|d  }d}d}|t| t| t| t|	 t| t| |
tj|tjdtjtjdd
}|d ur&| }|d  j9  < |d  j9  < t| |d< |d ur4t| |d< |S )Nr&   c                    s"   g | ]}t  d  |  qS )r   )rw   rc   )r&   rf   rv   r   r   r      s   " z7WindowTrajectoryDataset.__getitem__.<locals>.<listcomp>c                    rg   rh   ri   rj   rl   r   r   r      rm   r   rn   c                    rp   rq   r   rj   rr   r   r   r      rs   rt   c                    rp   )r   r   rj   rr   r   r   r      rs   c                    rp   )r   r   rj   rr   r   r   r      rs   c                    rp   )r   r   rj   rr   r   r   r      rs   r   c                 S   s   g | ]
}t |d qS )xyz)ScipyRZ	from_quatZas_euler)r   qr   r   r   r      s    c                    s&   g | ]\}}| k p|  kqS r   )index)r   rd   wi)	last_realwin_idxr   r   r      s    r   r   r   )
window_imgswindow_eef_xywindow_eef_posZwindow_eef_quatwindow_eef_eulerwindow_gripperZwindow_eef_start
valid_maskrD   rv   cam_K)r2   r1   r   r,   r7   rZ   rx   r4   rT   rU   ry   r*   
ValueError
zeros_likerz   	enumerateboolcopygetr[   r]   r{   )r9   r|   rD   Wr~   r   r   r   r   Z	eef_eulerr   Zeef_start_xyzr   r   outr   r   )r&   rC   r   rf   r9   rv   r   r   r      sZ   
$$$$$



z#WindowTrajectoryDataset.__getitem__)r	   Nr
   r   r   r   r   r   r   r   r   r      s    
8r   c                 C   sL   || }| d |   d|d }| d |   d|d }|| | S )zYConvert (B, 2) pixel coords in [0, image_size) to flat grid indices in [0, grid_size**2).).r   r   r   ).r   )r{   clamp)r   r*   	grid_sizecellgxgyr   r   r   target_xy_to_grid_idx	  s   r   c                 C   sB   | | }| | }|| }t j| d | | d | gddS )uC   Inverse of target_xy_to_grid_idx — returns the cell center pixel.g      ?rn   )rZ   rx   r]   )r|   r*   r   r   r   r   r   r   r   grid_idx_to_pixel  s   r   )r   pathlibr   rQ   numpyr4   rZ   torch.utils.datar   Zscipy.spatial.transformr   r   arrayrU   rX   rY   r   r   r   r   r   r   r   r   <module>   s    n 