o
    i	                     @   sR   d dl mZ d dlmZ d dlmZ d dlmZ d dlm	Z	 G dd dej
ZdS )    N)MlpBlock)extri_intri_to_pose_encoding)affine_inversec                       s\   e Zd ZdZ							ddeded	ed
edededef fddZdefddZ  Z	S )	CameraEncz
    CameraHead predicts camera parameters from token representations using iterative refinement.

    It applies a series of transformer blocks (the "trunk") to dedicated camera tokens.
       	         {Gz?dim_outdim_intrunk_depth
target_dim	num_heads	mlp_ratioinit_valuesc           	         sl   t    || _|| _tj fddt|D  | _t | _	t | _
t| d  dd| _d S )Nc                    s   g | ]
}t  d qS ))dimr   r   r   r   ).0_r   r   r   r    </data/cameron/da3_repo/src/depth_anything_3/model/cam_enc.py
<listcomp>-   s    z&CameraEnc.__init__.<locals>.<listcomp>   r   )in_featureshidden_featuresout_featuresdrop)super__init__r   r   nn
Sequentialrangetrunk	LayerNorm
token_norm
trunk_normr   pose_branch)	selfr   r   r   r   r   r   r   kwargs	__class__r   r   r!      s    
zCameraEnc.__init__returnc                 C   s@   t |}t|||}| |}| |}| |}| |}|S )N)r   r   r)   r'   r%   r(   )r*   extixtZ
image_sizeZc2wspose_encodingZpose_tokensr   r   r   forward@   s   



zCameraEnc.forward)r   r	   r
   r	   r   r
   r   )
__name__
__module____qualname____doc__intfloatr!   tupler2   __classcell__r   r   r,   r   r      s6    "r   )torch.nnr"   Z&depth_anything_3.model.utils.attentionr   Z"depth_anything_3.model.utils.blockr   &depth_anything_3.model.utils.transformr   depth_anything_3.utils.geometryr   Moduler   r   r   r   r   <module>   s   