o
    {i                     @   s   d dl Z d dlmZmZ d dlZd dlm  mZ d dl	m
Z
mZ d dlmZmZ dedefddZded	ed
edefddZG dd dejZG dd dejZG dd dejZdS )    N)ListTuple)cat_keep_shapesuncat_with_shapes)Tensornnxreturnc                 C   s&   | j ddd\}}tj| |gddS )N   dim)chunktorchcat)r   x1x2 r   C/data/cameron/keygrip/volume_dino_tracks/dinov3/layers/attention.pyrope_rotate_half   s   r   sincosc                 C   s   | | t | |  S N)r   )r   r   r   r   r   r   
rope_apply   s   r   c                       s.   e Zd Z fddZdedefddZ  ZS )LinearKMaskedBiasc                    sT   t  j|i | | j}|d dksJ | jd ur(| dtj| jtjd d S d S )N   r   	bias_mask)
fill_value)	super__init__out_featuresbiasregister_bufferr   	full_likemathnan)selfargskwargso	__class__r   r   r      s   
zLinearKMaskedBias.__init__inputr	   c                 C   s4   | j d ur| j | j| j j nd }t|| j|S r   )r!   r   todtypeFlinearweight)r&   r,   Zmasked_biasr   r   r   forward&   s   $zLinearKMaskedBias.forward)__name__
__module____qualname__r   r   r2   __classcell__r   r   r*   r   r      s    r   c                       s   e Zd Z							ddededed	ed
edededdf fddZdededeeeef B deeef fddZ	ddededefddZ
ddee fddZddedefddZ  ZS )SelfAttention   FT        Nr   	num_headsqkv_bias	proj_bias	attn_drop	proj_dropmask_k_biasr	   c	                    sv   t    || _|| }	|	d | _|rtntj}
|
||d ||d| _t|| _	tj||||d| _
t|| _d S )N      r   )r!   device)r   r   r:   scaler   r   LinearqkvDropoutr=   projr>   )r&   r   r:   r;   r<   r=   r>   r?   rA   head_dimZlinear_classr*   r   r   r   ,   s   

zSelfAttention.__init__qkropec                 C   s*  |j }|j }|\}}|j }|j|d}|j|d}|jd }	|	|jd  }
|
dks+J |d d d d d |
d d f }t|d d d d |
d d d f ||}tj||fdd}|d d d d d |
d d f }t|d d d d |
d d d f ||}tj||fdd}|j|d}|j|d}||fS )N)r.   r   r   )r.   r-   shaper   r   r   )r&   rH   rI   rJ   Zq_dtypeZk_dtyper   r   Z
rope_dtypeNprefixZq_prefixZk_prefixr   r   r   
apply_ropeB   s$   
 ( (zSelfAttention.apply_roper   c                 C   s2   |  |}| j|||d}| |}| |}|S )N)rD   	attn_biasrJ   )rD   compute_attentionrF   r>   )r&   r   rP   rJ   rD   Zattn_vr   r   r   r2   W   s
   


zSelfAttention.forwardc                 C   s   t |t |ks
J t|\}}}| |}t|||}g }	tt|||D ]\}
\}}
}|	| j|||d q&t|	\}}}| |}t|||S )N)rP   rJ   )	lenr   rD   r   	enumeratezipappendrQ   rF   )r&   x_listrP   Z	rope_listZx_flatshapes
num_tokensZqkv_flatZqkv_listZatt_out_rD   rJ   r   r   r   forward_list^   s   

zSelfAttention.forward_listrD   c                 C   s   |d u sJ |j \}}}| jj}|||d| j|| j }t|d\}}	}
dd ||	|
fD \}}	}
|d ur@| ||	|\}}	tjj	
||	|
}|dd}||||gS )Nr   r
   c                 S      g | ]}| d dqS    r
   	transpose.0tr   r   r   
<listcomp>q       z3SelfAttention.compute_attention.<locals>.<listcomp>r]   )rL   rD   in_featuresreshaper:   r   unbindrO   r   
functionalscaled_dot_product_attentionr_   )r&   rD   rP   rJ   BrM   rY   CrH   rI   vr   r   r   r   rQ   j   s   zSelfAttention.compute_attention)r8   FTr9   r9   FN)NN)r3   r4   r5   intboolfloatr   r   r   rO   r2   r   rZ   rQ   r6   r   r   r*   r   r7   +   s:    
.r7   c                       s   e Zd Z					ddedededed	ed
eddf fddZ	ddedB dedB deddfddZddededefddZ	  Z
S )CausalSelfAttentionr8   FTr9   r   r:   r;   r<   r=   r>   r	   Nc                    sf   t    || _|| _|| }|d | _tj||d |d| _|| _tj|||d| _	t
|| _d S )Nr@   r   )r!   )r   r   r   r:   rB   r   rC   rD   r=   rF   rE   r>   )r&   r   r:   r;   r<   r=   r>   rG   r*   r   r   r   z   s   
	
zCausalSelfAttention.__init__      ?init_attn_stdinit_proj_stdfactorc                 C   s   |p| j d }|p|| }tjj| jj|d tjj| jj|d | jjd ur/tj| jj | jjd ur?tj| jj d S d S )Nr@   )std)	r   r   initnormal_rD   r1   rF   r!   zeros_)r&   rr   rs   rt   r   r   r   init_weights   s   z CausalSelfAttention.init_weightsr   	is_causalc           
      C   s   |j \}}}| |||d| j|| j }t|d\}}}	dd |||	fD \}}}	tjjj|||	d | j	r:| j
nd|d}|dd |||}| | |}|S )Nr   r
   c                 S   r[   r\   r^   r`   r   r   r   rc      rd   z/CausalSelfAttention.forward.<locals>.<listcomp>r   )	attn_mask	dropout_prz   r]   )rL   rD   rf   r:   r   rg   r   rh   ri   trainingr=   r_   
contiguousviewr>   rF   )
r&   r   rz   rj   rM   rk   rD   rH   rI   rl   r   r   r   r2      s    zCausalSelfAttention.forward)r8   FTr9   r9   )NNrq   )T)r3   r4   r5   rm   rn   ro   r   ry   r   r2   r6   r   r   r*   r   rp   y   s@    
 rp   )r$   typingr   r   r   torch.nn.functionalr   rh   r/   Zdinov3.utilsr   r   r   r   r   rC   r   Moduler7   rp   r   r   r   r   <module>   s   N