o
    ixL                     @   sr   d dl mZmZmZ d dlZd dlmZ d dlmZ d dl	m
Z
mZ d dlmZmZmZmZ G dd dejZdS )    )ListSequenceTupleN)Dict)_make_fusion_block_make_scratch)Permutecreate_uv_gridcustom_interpolateposition_grid_to_embedc                       s^  e Zd ZdZdddddddd	d
ddddededededededee dededededeeef ddf fddZ		d5de
ej d ed!ed"ed#edeeejf fd$d%Zde
ej d ed!ed"edeeejf f
d&d'Zde
ej deeje
ej f fd(d)Zd6d+ejd!ed ed,edejf
d-d.Zd/edejfd0d1Z	2d7d+ejdedejfd3d4Z  ZS )8DualDPTa.  
    Dual-head DPT for dense prediction with an always-on auxiliary head.

    Architectural notes:
      - Sky/object branches are removed.
      - `intermediate_layer_idx` is fixed to (0, 1, 2, 3).
      - Auxiliary head has its **own** fusion blocks (no fusion_inplace / no sharing).
      - Auxiliary head is internally multi-level; **only the final level** is returned.
      - Returns a **dict** with keys from `head_names`, e.g.:
          { main_name, f"{main_name}_conf", aux_name, f"{aux_name}_conf" }
      - `feature_only` is fixed to False.
          expexpp1   )r   i      r   T         )depthray)
patch_size
output_dim
activationconf_activationfeaturesout_channels	pos_embed
down_ratioaux_pyramid_levelsaux_out1_conv_num
head_namesdim_inr   r   r   r   r   r   r   r   r    r!   r"   returnNc                   s  t    |_|_|_|_|	_|
_|_|\_	_
d_t _t fdd|D _ttj|d |d ddddtj|d |d ddddt tj|d	 |d	 d	dddg_tt||d
d_t|j_t|j_t|j_t|d
dj_|dtjd d	dddj_ttjd d	dddtjddtj|ddddj_ t|j_!t|j_"t|j_#t|d
dj_$tfddt%jD j_&d}|rt'dtt'dgng tfddt%jD j_(d S )N)r   r   r      c              	      s    g | ]}t j |d d ddqS )r   r   kernel_sizestridepadding)nnConv2d).0oc)r#    </data/cameron/da3_repo/src/depth_anything_3/model/dualdpt.py
<listcomp>Q        z$DualDPT.__init__.<locals>.<listcomp>r   r   r&   r   r   r%   F)expand)Zhas_residual    Tinplacec                    s   g | ]}  qS r.   )_make_aux_out1_blockr,   _)head_features_1selfr.   r/   r0      s    )r   r   r%   r   )r   r%   r   r   c                    sR   g | ]%}t jt j d  ddddgt jddt jdddddR  qS )r   r%   r   r&   Tr4      r   )r*   
Sequentialr+   ReLUr7   )r9   head_features_2ln_seqr.   r/   r0      s    	
))super__init__r   r   r   r   r   
aux_levelsr!   	head_mainhead_auxintermediate_layer_idxr*   	LayerNormnorm
ModuleListprojectsConvTranspose2dIdentityr+   resize_layersr   listscratchr   
refinenet1
refinenet2
refinenet3
refinenet4output_conv1r<   r=   output_conv2refinenet1_auxrefinenet2_auxrefinenet3_auxrefinenet4_auxrangeoutput_conv1_auxr   output_conv2_aux)r:   r#   r   r   r   r   r   r   r   r   r    r!   r"   Zuse_ln	__class__)r#   r9   r>   r?   r:   r/   rA   ,   sr   


	zDualDPT.__init__   featsHWpatch_start_idx
chunk_sizec                    s   |d d j \  fdd|D }|du s|kr7| ||||} fdd| D }t|S g td  |D ] t|   | fdd|D |||}| qAfddd  D } fd	d| D }t|S )
aD  
        Args:
            aggregated_tokens_list: List of 4 tensors [B, S, T, C] from transformer.
            images:                [B, S, 3, H, W], in [0, 1].
            patch_start_idx:       Patch-token start in the token sequence (to drop non-patch tokens).
            frames_chunk_size:     Optional chunking along S for memory.

        Returns:
            Dict[str, Tensor] with keys based on `head_names`, e.g.:
                self.head_main, f"{self.head_main}_conf",
                self.head_aux,  f"{self.head_aux}_conf"
            Shapes:
              main:    [B, S, out_dim, H/down_ratio, W/down_ratio]
              main_cf: [B, S, 1,       H/down_ratio, W/down_ratio]
              aux:     [B, S, 7,       H/down_ratio, W/down_ratio]
              aux_cf:  [B, S, 1,       H/down_ratio, W/down_ratio]
        r   c                    s"   g | ]}|d     qS )r   )reshaper,   feat)BCNSr.   r/   r0      s   " z#DualDPT.forward.<locals>.<listcomp>Nc                    0   i | ]\}}||j  g|jd d R  qS r   N)rd   shaper,   kvrg   rj   r.   r/   
<dictcomp>      0 z#DualDPT.forward.<locals>.<dictcomp>c                    s   g | ]}|  qS r.   r.   re   )s0s1r.   r/   r0      s    c                    s*   i | ]  t j fd dD ddqS )c                    s   g | ]}|  qS r.   r.   )r,   out_dictro   r.   r/   r0      s    z.DualDPT.forward.<locals>.<dictcomp>.<listcomp>r   )dim)torchcat)r,   )	out_dictsrw   r/   rr      s    c                    rk   rl   )viewrm   rn   rq   r.   r/   rr      rs   )rm   _forward_implitemsr   rY   minappendkeys)r:   r_   r`   ra   rb   rc   rv   r.   )rg   rh   ri   rj   r{   rt   ru   r/   forward   s*   

zDualDPT.forwardc                 C   s  |d j \}}}|| j || j }}	g }
t| jD ]?\}}|| d d |d f }| |}|ddd||||	}| j| |}| jrM| 	|||}| j
| |}|
| q| |
\}}t|| j | j }t|	| j | j }t|||fddd}| jr| 	|||}| j|}|dddd}| |dd d	f | j}| |d
 | j}|d	 }| jr| 	|||}| jjd	 |}|dddd}| |dd d	f d}| |d
 | j}| j|d	| j d|| j|| j d|iS )Nr   r   r   bilinearT)modealign_cornersr%   .).r   linear_conf)rm   r   	enumeraterE   rG   permuterd   rI   r   _add_pos_embedrL   r   _fuseintr   r
   rN   rT   _apply_activation_singler   r   r[   rC   squeezerD   )r:   r_   r`   ra   rb   rg   r8   rh   phpwZresized_featsZ	stage_idxZtake_idxxZ
fused_mainZfused_aux_pyrZh_outZw_outZmain_logitsfmapZ	main_predZ	main_confZlast_auxZlast_aux_logitsZ	fmap_lastZaux_predZaux_confr.   r.   r/   r}      sH   
zDualDPT._forward_implc                    sp  |\}}}} j |} j |} j |} j |}	 j j|	|jdd d}
 j j|	|jdd d}g } jdkrD|	|  j j
|
||jdd d}
 j j|||jdd d} jdkrj|	|  j j|
||jdd d}
 j j|||jdd d} jdkr|	|  j |
|}
 j ||}|	|  j |
}
 fddt|D }|
|fS )z
        Feature pyramid fusion.
        Returns:
            fused_main: Tensor at finest scale (after refinenet1)
            aux_pyr:    List of aux tensors at each level (pre out_conv1_aux)
        r   N)sizer   r%   c                    s    g | ]\}} j j| |qS r.   )rN   rZ   )r,   iauxr:   r.   r/   r0   5  r1   z!DualDPT._fuse.<locals>.<listcomp>)rN   Z	layer1_rnZ	layer2_rnZ	layer3_rnZ	layer4_rnrR   rm   rX   rB   r   rQ   rW   rP   rV   rO   rU   rS   r   )r:   r_   l1l2l3l4Zl1_rnZl2_rnZl3_rnZl4_rnoutZaux_outZaux_listr.   r   r/   r     s0   






zDualDPT._fuse皙?r   ratioc                 C   sp   |j d |j d }}t|||| |j|jd}t||j d | }|dddd |j d ddd}|| S )z5Simple UV positional embedding added to feature maps.r   )aspect_ratiodtypedevicer   r   r   N)rm   r	   r   r   r   r   r2   )r:   r   ra   r`   r   r   r   per.   r.   r/   r   9  s
   $zDualDPT._add_pos_embedin_chc                 C   s   | j dkr;tt||d dddt|d |dddt||d dddt|d |dddt||d dddS | j dkrbtt||d dddt|d |dddt||d dddS | j dkrutt||d dddS td| j  d)zCFactory for the aux pre-head stack before the final 1x1 projection.r   r   r%   r   zaux_out1_conv_num z not supported)r!   r*   r<   r+   
ValueError)r:   r   r.   r.   r/   r6   A  s"   


zDualDPT._make_aux_out1_blockr   c                 C   s   t |tr	| n|}|dkrt|S |dkrt|S |dkr(t|d S |dkr1t|S |dkr:t|S |dkrEtjj	
|S |dkrNt|S |S )	z
        Apply activation to single channel output, maintaining semantic consistency with value branch in multi-channel case.
        Supports: exp / relu / sigmoid / softplus / tanh / linear / expp1
        r   expm1r   r   relusigmoidsoftplustanh)
isinstancestrlowerry   r   r   r   r   r*   
functionalr   r   )r:   r   r   actr.   r.   r/   r   U  s    




z DualDPT._apply_activation_single)r^   )r   )r   )__name__
__module____qualname____doc__r   r   r   boolr   rA   r   ry   Tensorr   r   r}   r   floatr   r*   r<   r6   r   __classcell__r.   r.   r\   r/   r      s    	

v
4
(>$+r   )typingr   r   r   ry   torch.nnr*   addictr   Zdepth_anything_3.model.dptr   r   Z'depth_anything_3.model.utils.head_utilsr   r	   r
   r   Moduler   r.   r.   r.   r/   <module>   s   