o
    i%                  	   @   s  U d dl Z d dlZd dlmZmZmZmZmZ d dlZd dl	Z	d dl	m
Z
mZ ddlmZmZ ddlmZ ddlmZ ddlmZ e d	Zejd
du Zzer^d dlmZmZmZ dZneW n eyl   dZY nw G dd de
j Z!	d#dedeegef de"defddZ#d#ddZ$d$ddZ%i Z&eeef e'd< d$ddZ(		d%dee deeegef de"defdd Z)G d!d" d"e!Z*dS )&    N)CallableListAnyTupleDict)nnTensor   )	AttentionMemEffAttention)DropPath)
LayerScale)Mlpdinov2ZXFORMERS_DISABLED)fmhascaled_index_addindex_select_catTFc                       s   e Zd Zddddddddejejeefdedede	d	e
d
e
de
de	de	de	dedejf dedejf dedejf dedejf ddf fddZdedefddZ  ZS )Blockg      @FT        Ndim	num_heads	mlp_ratioqkv_bias	proj_biasffn_biasdrop	attn_drop	drop_path	act_layer.
norm_layer
attn_class	ffn_layerreturnc                    s   t    ||| _|||||||d| _|	rt||	dnt | _|
dkr*t|
nt | _	||| _
t|| }||||||d| _|	rLt||	dnt | _|
dkrYt|
nt | _|
| _d S )N)r   r   r   r   Z	proj_drop)init_valuesr   )in_featureshidden_featuresr   r   bias)super__init__norm1attnr   r   Identityls1r   
drop_path1norm2intmlpls2Z
drop_path2sample_drop_ratio)selfr   r   r   r   r   r   r   r   r#   r   r   r   r    r!   Zmlp_hidden_dim	__class__ 9/data/cameron/moge_repo/moge/model/dinov2/layers/block.pyr(   +   s0   



zBlock.__init__xc                    s   dt dt f fdd}dt dt f fdd} jr0 jdkr0t|| jd}t|| jd}|S  jrL jd	krL| || }| || }|S ||| }||| }|S )
Nr8   r"   c                           | S Nr,   r*   r)   r8   r3   r6   r7   attn_residual_funcY      z)Block.forward.<locals>.attn_residual_funcc                    r9   r:   r1   r0   r.   r<   r=   r6   r7   ffn_residual_func\   r?   z(Block.forward.<locals>.ffn_residual_funcg?)residual_funcr2   r   )r   trainingr2   "drop_add_residual_stochastic_depthr-   )r3   r8   r>   rA   r6   r=   r7   forwardX   s*   zBlock.forward)__name__
__module____qualname__r   GELU	LayerNormr
   r   r/   floatboolr   Moduler(   r   rE   __classcell__r6   r6   r4   r7   r   *   sV    	-r   r   r8   rB   r2   r"   c                 C   s   | j \}}}tt|d|  d}tj|| jdd | }| | }||}	| d}
|	d}	|| }tj|
d||	j| j	d|d}|
| S )Nr	   devicer   dtypealpha)shapemaxr/   torchrandpermrP   flatten	index_addtorR   view_as)r8   rB   r2   bndsample_subset_sizebrangeZx_subsetresidualx_flatresidual_scale_factorx_plus_residualr6   r6   r7   rD   t   s   


rD   c                 C   sJ   | j \}}}tt|d|  d}tj|| jdd | }|| }||fS )Nr	   rO   )rU   rV   r/   rW   rX   rP   )r8   r2   r]   r^   r_   r`   ra   rd   r6   r6   r7   get_branges_scales   s
   rf   c                 C   s^   |d u r|  d}| d}tj|d||j| jd|d}|S t| ||j| jd||d}|S )Nr	   r   rQ   rS   )scalingrT   )rY   rW   rZ   r[   rR   r   )r8   ra   rb   rd   scaling_vectorrc   re   r6   r6   r7   add_residual   s   

ri   attn_bias_cachec                 C   s   |durdd |D ndd | D }t dd t|| D }|t vrLg }t|| D ]\}}t|D ]
}||jd  q3q+tj	|}||_
|t|< |duretdd | D |dd	| d
 jd	 }	nt dd | D }
tj|
dd}	t| |	fS )zc
    this will perform the index select, cat the tensors, and provide the attn_bias from cache
    Nc                 S      g | ]}|j d  qS r   rU   ).0r]   r6   r6   r7   
<listcomp>       z)get_attn_bias_and_cat.<locals>.<listcomp>c                 S   rk   rl   rm   rn   r8   r6   r6   r7   ro      rp   c                 s   s"    | ]\}}||j d  fV  qdS )r	   Nrm   )rn   r]   r8   r6   r6   r7   	<genexpr>   s     z(get_attn_bias_and_cat.<locals>.<genexpr>r	   c                 S   s   g | ]}| d qS r	   )rY   rq   r6   r6   r7   ro      rp   r   c                 s   s,    | ]}| d dg|jdd V  qdS )r	   rt      N)reshaperU   rq   r6   r6   r7   rr      s   * )r   )tupleziprj   keysrangeappendrU   r   ZBlockDiagonalMaskZfrom_seqlensZ_batch_sizesr   viewrW   cat)x_listbrangesbatch_sizesZ
all_shapesZseqlensr]   r8   _	attn_biasZcat_tensorsZtensors_bs1r6   r6   r7   get_attn_bias_and_cat   s    $*r   r~   c              	      s    fdd| D }dd |D }dd |D }t | |\}}||||d}	g }
t| ||	|D ]\}}}}|
t|||||| q0|
S )Nc                    s   g | ]}t | d qS )r2   )rf   rq   r   r6   r7   ro      s    z;drop_add_residual_stochastic_depth_list.<locals>.<listcomp>c                 S      g | ]}|d  qS rl   r6   rn   sr6   r6   r7   ro          c                 S   r   rs   r6   r   r6   r6   r7   ro      r   r   )r   splitrx   r{   ri   r\   )r~   rB   r2   rh   Zbranges_scalesr   Zresidual_scale_factorsr   Zx_catZresidual_listoutputsr8   ra   rb   rd   r6   r   r7   'drop_add_residual_stochastic_depth_list   s   r   c                       s6   e Zd Zdee dee fddZ fddZ  ZS )NestedTensorBlockr~   r"   c                    s  t  jtsJ  jrP jdkrPddtdtf fdd}ddtdtf fdd}t|| jt  jtr7 jj	ndd	}t|| jt  jtrJ j
j	ndd	}|S ddtdtf fd
d}ddtdtf fdd}t|\}}||||d }||| }||S )zL
        x_list contains a list of tensors to nest together and run
        r   Nr8   r"   c                    s    j  | |dS Nr   )r*   r)   r8   r   r=   r6   r7   r>      s   z<NestedTensorBlock.forward_nested.<locals>.attn_residual_funcc                    s      | S r:   )r0   r.   r   r=   r6   r7   rA      s   z;NestedTensorBlock.forward_nested.<locals>.ffn_residual_func)rB   r2   rh   c                    s      j | |dS r   r;   r   r=   r6   r7   r>      s   c                    r9   r:   r@   r   r=   r6   r7   rA      r?   r   r:   )
isinstancer*   r   rC   r2   r   r   r,   r   gammar1   r   r   )r3   r~   r>   rA   r   r8   r6   r=   r7   forward_nested   s.   
z NestedTensorBlock.forward_nestedc                    s:   t |trt |S t |trtstd| |S t)Nz-xFormers is required for using nested tensors)r   r   r'   rE   listXFORMERS_AVAILABLEAssertionErrorr   )r3   Zx_or_x_listr4   r6   r7   rE      s   


zNestedTensorBlock.forward)rF   rG   rH   r   r   r   rE   rN   r6   r6   r4   r7   r      s    (r   )r   r:   )r   N)+loggingostypingr   r   r   r   r   warningsrW   r   r   	attentionr
   r   r   r   Zlayer_scaler   r0   r   	getLoggerloggerenvirongetXFORMERS_ENABLEDZxformers.opsr   r   r   r   ImportErrorrM   r   rK   rD   rf   ri   rj   __annotations__r   r   r   r6   r6   r6   r7   <module>   s`   
	
M




