o
    fju"                     @   s4  d Z ddlZddlZddlmZ ddlm  mZ ddlm	Z	m
Z
mZ dd ZG dd de	Zedkreej r=d	nd
Zedddde Zedd e D Zeded d\ZZeedeeeZeedeeeZeedee Zejg dg dg dgge ej deZ!e"d#d$eddeZ%de%ddddf< e!& Z'e%& Z(de(ddddf< ee!e%dddd d eZ)ee'e(dddd d eZ*e+  eeeee)e'e(e!e%e*	Z,W d   n1 sw   Y  e,- D ]\Z.Z/e/dured!e. d"e0e/j1  qdS dS )#u  DinoVolumeQuery2View with a dual-frustum sampling strategy.

Two volumes of world XYZ points are sampled:
  1. BEV-anchored: world XYZ at each (z_bin, y_grid_bev, x_grid_bev) — current approach
  2. Wrist-anchored: world XYZ at each (z_bin, y_grid_wrist, x_grid_wrist) — NEW

Each volume's voxels are scored against BOTH views (direct lookup for the home view,
grid_sample projection for the other). The two volumes are stacked along an "anchor" axis
so the CE loss is over the union of sample points.
    N)DinoVolumeQuery2Viewproject_world_to_wrist_uv_grid!build_bev_world_xyz_table_batchedc                 C   s  | j }| jd }	|  }
|
dddf  t|9  < |
dddf  t|9  < t|t| }tj||d d | }tj||d d | }tj||dd\}}t|}tj|||gdd		dd
j
}t|
}||d|	dd dd}|	|	||d
}|dddd
dd
f }|dddd
d
f }td||}tj||||d}|d d}t| dk t|d|}|d|dd|dddf |	ddd | }||	dddd
|d|d  }|S )zNSame as build_bev_world_xyz_table_batched but rays come from the wrist camera.r   N   device      ?xy)indexingdim      zbij,bhwj->bhwi).r   gư>gư)r   shapeclonefloattorcharangemeshgrid	ones_likestackreshapeTinverse	unsqueezeexpand	transposeeinsumlinspacewhereabs	full_likeview)ZK_norm_wristwrist_extrinsicn_height_bins
min_height
max_heightHW
image_sizer   BKscaleysxsgrid_xgrid_yonesuv1K_invrays_camR_cwt_cw
rays_worldheightsrwzrwz_safesxyz r>   @/data/cameron/para/libero/model_dino_volume_query_dualfrustum.py#build_wrist_world_xyz_table_batched   s,   
8

 0&r@   c                   @   s   e Zd ZdZ	dddZdS )DinoVolumeQuery2ViewDualFrustumuC  Adds a wrist-anchored second volume alongside the BEV-anchored one.

    Output:
      volume_logits: (B, T, Z, 2, H, W) — stacked [bev-anchored, wrist-anchored] along axis 3.
      Trainer should flatten to (B, T, Z*2*H*W) for CE, with the GT label re-encoded to
      always live in the BEV-anchor slot (anchor=0).
    Nc
           I         s  |j d  jjj}
}}|\}}\|\}}\}} }f||fks2J ttj|tj|j	dtj
|tj|j	dg}|d}tj||gdd| }|}|d d d |f }|d d |d f } fdd}tj||jjfddd	}tj||jjfddd	}|}|}|j d
d  \}}|d |j   d|d } |d |j   d|d }!tj |j	d}"||"d d |!| f }#tj|#||gdd}$|$d |
| |
 |}%j}&|&d |
d |
 d}'|%}(jD ]})|)|(|'}(q|(}(|( |
|}*|*}+|*},j dkrM!|* |
dj"}-n!|*}-j#j$j%}.}/}0|+dd |.f }1|+d|.d|. f }2|+dd|. d|. |/ f }3|+dd|. |/ d f }4t&d|3j'}5t&d|4j}6|5dd}7|6ddd}8t&d|1|}9t(  t)|||j}:W d    n	1 sw   Y  |:j \};}<}=}>}?tj*||:|;|<|= |>ddddd|;|.|<|=|>}@t&d|2|@}A|9d|A |7 |8 }Bt&d|2|}Ct(  t)|	||j}DW d    n	1 s"w   Y  tj*||D|;|<|= |>ddddd|;|.|<|=|>}Et&d|1|E}F|Cd|F |7 |8 }Gtj+|B|Ggdd}H|H|B|G|,|-||dS )Nr   )dtyper   r   r   c                    s"   |   jdddd S )Nr   r   r   r   )r   	embed_dimpermute
contiguous)pr+   Hp_bWp_bselfr>   r?   _to_gridP   s   "z9DinoVolumeQuery2ViewDualFrustum.forward.<locals>._to_gridbilinearF)sizemodealign_corners).r   ).r   r   r   per_axisr   .r   zbtc, zc -> btzzbtc, tc -> btzbtc, bchw -> bthwzerosT)rN   padding_moderO   zbtc, bczhw -> btzhw)volume_logitsvol_bev	vol_wristgripper_logitsrotation_logitspixel_featspixel_feats_wrist),r   n_windowr%   d_model_extract_dino_tokensr   catrR   longr   r2   view_embr   
cross_attnFinterpolate	pred_size
refine_bevrefine_wristr*   clampr   
input_projr   r   t_cond_projt_sinblocks
final_normr#   q_head	grip_headrotation_moderot_head
n_rot_binsd_featd_sin_zd_sin_tr   z_sinno_gradr   grid_sampler   )IrJ   rgb_bev	rgb_wriststart_pix_bevbev_xyz_tablewrist_K_normr$   
bev_K_normbev_extrinsicwrist_xyz_tabler   Zdcls_bevpatches_bev	cls_wristpatches_wristHp_wWp_wn_pview_idsview_ejoint	pat_bev_xpat_wrist_xrK   feat_bev_upfeat_wrist_upF_bevF_wristr(   r)   sxsyb_idxeef_featq_inq_in_btcond_tcond_bthblkpenult	q_spatialgripperrotationd_Fd_zd_tq_F_bev	q_F_wristq_zq_tscore_zscore_tz_termt_termscore_bev_yxZuv_wrist_gridBvZvHvWv_ZF_w_sampled_for_bevZscore_wrist_on_bevrU   Zscore_wrist_yxZuv_bev_gridZF_b_sampled_for_wristZscore_bev_on_wristrV   rT   r>   rG   r?   forward;   s   



""  








z'DinoVolumeQuery2ViewDualFrustum.forward)NNN)__name__
__module____qualname____doc__r   r>   r>   r>   r?   rA   2   s    
rA   __main__cudacpu     1d_pca)r[   r*   ro   c                 c   s    | ]
}|j r| V  qd S )N)requires_gradnumel).0rF   r>   r>   r?   	<genexpr>   s    r   zTrainable: ,)r   r   r   r   )333333?r   r   )r   r   r   )r   r   r   )rB      r   g      ?g333333?    g?8   z  z: )2r   osr   torch.nnnntorch.nn.functional
functionalrb   model_dino_volume_query_2viewr   r   r   r@   rA   r   r   r   is_availabletoevalmsum
parametersn_tprintr+   ZIMGrandrx   ry   sptensorfloat32bev_Keyer   repeatbev_extr   wrist_K	wrist_extZbev_xyzZ	wrist_xyzrv   outitemskvtupler   r>   r>   r>   r?   <module>   s>    

u.2
$