
    |2g%              	       Z   U d dl Z d dlZd dlmZmZmZmZmZ d dlZd dl	Z	d dl	m
Z
mZ ddlmZmZ ddlmZ ddlmZ ddlmZ  e j*                  d	      Zej.                  j1                  d
      du Z	 erd dlmZmZmZ dZ ej>                  d       n ej>                  d       e 	  G d de
jB                        Z"	 d dedeegef   de#defdZ$d dZ%d!dZ&i Z'eeef   e(d<   d!dZ)	 	 d"dee   deeegef   de#defdZ* G d de"      Z+y# e $ r dZ ej>                  d       Y w xY w)#    N)CallableListAnyTupleDict)nnTensor   )	AttentionMemEffAttention)DropPath)
LayerScale)Mlpdinov2XFORMERS_DISABLED)fmhascaled_index_addindex_select_catTzxFormers is available (Block)zxFormers is disabled (Block)Fz!xFormers is not available (Block)c                   "    e Zd Zddddddddej                  ej
                  eefdedede	d	e
d
e
de
de	de	de	dedej                  f   dedej                  f   dedej                  f   dedej                  f   ddf fdZdedefdZ xZS )Blockg      @FT        Ndim	num_heads	mlp_ratioqkv_bias	proj_biasffn_biasdrop	attn_drop	drop_path	act_layer.
norm_layer
attn_class	ffn_layerreturnc                    t         |            ||      | _         |||||||      | _        |	rt	        ||	      nt        j                         | _        |
dkD  rt        |
      nt        j                         | _	         ||      | _
        t        ||z        } ||||||      | _        |	rt	        ||	      nt        j                         | _        |
dkD  rt        |
      nt        j                         | _        |
| _        y )N)r   r   r   r   	proj_drop)init_valuesr   )in_featureshidden_featuresr!   r   bias)super__init__norm1attnr   r   Identityls1r   
drop_path1norm2intmlpls2
drop_path2sample_drop_ratio)selfr   r   r   r   r   r   r   r   r(   r    r!   r"   r#   r$   mlp_hidden_dim	__class__s                   I/home/cameronsmith/repos/FeatUp/featup/featurizers/dinov2/layers/block.pyr-   zBlock.__init__,   s    " 	_

	 @K:c{;PRP[P[P]1:S(9-bkkm_
S9_-*
 @K:c{;PRP[P[P]1:S(9-bkkm!*    xc                     dt         dt         f fd}dt         dt         f fd} j                  rA j                  dkD  r2t        || j                        }t        || j                        }|S  j                  rE j                  dkD  r6| j	                   ||            z   }| j	                   ||            z   }|S | ||      z   }| ||      z   }|S )Nr>   r%   c                 b    j                  j                  j                  |                   S Nr1   r/   r.   r>   r9   s    r<   attn_residual_funcz)Block.forward.<locals>.attn_residual_funcZ   s#    88DIIdjjm455r=   c                 b    j                  j                  j                  |                   S rA   r6   r5   r3   rC   s    r<   ffn_residual_funcz(Block.forward.<locals>.ffn_residual_func]   s#    88DHHTZZ]344r=   g?)residual_funcr8   r   )r	   trainingr8   "drop_add_residual_stochastic_depthr2   )r9   r>   rD   rG   s   `   r<   forwardzBlock.forwardY   s    	6& 	6V 	6	5 	5F 	5 ==T33c920"&"8"8A
 3/"&"8"8A  ]]t55;DOO$6q$9::ADOO$5a$899A  &q))A%a((Ar=   )__name__
__module____qualname__r   GELU	LayerNormr   r   r4   floatboolr   Moduler-   r	   rK   __classcell__r;   s   @r<   r   r   +   s   
 .0gg/1||/8.1++++ ++ 	++
 ++ ++ ++ ++ ++ ++ CN+++ S"))^,++ S"))^,++ CN+++  
!++Z F r=   r   r>   rH   r8   r%   c                    | j                   \  }}}t        t        |d|z
  z        d      }t        j                  || j
                        d | }| |   } ||      }	| j                  d      }
|	j                  d      }	||z  }t        j                  |
d||	j                  | j                        |      }|j                  |       S )Nr
   devicer   dtypealpha)shapemaxr4   torchrandpermrX   flatten	index_addtorZ   view_as)r>   rH   r8   bndsample_subset_sizebrangex_subsetresidualx_flatresidual_scale_factorx_plus_residuals                r<   rJ   rJ   u   s     ggGAq!Sa*;&;!<=qAnnQqxx02E3EFFyH X&HYYq\F"H 22 oofa1779S[pqO""1%%r=   c                     | j                   \  }}}t        t        |d|z
  z        d      }t        j                  || j
                        d | }||z  }||fS )Nr
   rW   )r]   r^   r4   r_   r`   rX   )r>   r8   re   rf   rg   rh   ri   rm   s           r<   get_branges_scalesrp      sc    ggGAq!Sa*;&;!<=qAnnQqxx02E3EFF 22(((r=   c                    |X| j                  d      }|j                  d      }t        j                  |d||j                  | j                        |      }|S t        | ||j                  | j                        ||      }|S )Nr
   r   rY   r[   )scalingr\   )ra   r_   rb   rc   rZ   r   )r>   ri   rk   rm   scaling_vectorrl   rn   s          r<   add_residualrt      s    1##A&//&!VX[[qww[=W_tu
  +vx{{{1>Qf
 r=   attn_bias_cachec                    ||D cg c]  }|j                   d    c}n| D cg c]  }|j                   d    c}}t        d t        ||       D              }|t        j	                         vrsg }t        ||       D ]3  \  }}t        |      D ]   }|j                  |j                   d          " 5 t        j                  j                  |      }||_
        |t        |<   |Kt        | D cg c]  }|j                  d       c}|      j                  dd| d   j                   d         }	n)t        d | D              }
t        j                  |
d      }	t        |   |	fS c c}w c c}w c c}w )zc
    this will perform the index select, cat the tensors, and provide the attn_bias from cache
    r   c              3   D   K   | ]  \  }}||j                   d    f  yw)r
   N)r]   ).0re   r>   s      r<   	<genexpr>z(get_attn_bias_and_cat.<locals>.<genexpr>   s      L41a1771:Ls    r
   c              3   `   K   | ]&  }|j                  d dg|j                  dd        ( yw)r
   rz      N)reshaper]   )rx   r>   s     r<   ry   z(get_attn_bias_and_cat.<locals>.<genexpr>   s-     MAIIq"&;qwwqr{&;<Ms   ,.)r   )r]   tuplezipru   keysrangeappendr   BlockDiagonalMaskfrom_seqlens_batch_sizesr   ra   viewr_   cat)x_listbrangesre   r>   batch_sizes
all_shapesseqlens_	attn_biascat_tensorstensors_bs1s              r<   get_attn_bias_and_catr      sX    4;3Fw/!1771:/agLh\]QWWUVZLhKL3{F3KLLJ--//V, 	+DAq1X +qwwqz*+	+ **77@	!,	&/
#&f'E		!'EwOTTUVXZ\bcd\e\k\kln\opMfMMii3:&33# 0Lh (Fs   E#E(-E-r   c           
      ~   | D cg c]  }t        ||       }}|D cg c]  }|d   	 }}|D cg c]  }|d   	 }}t        | |      \  }	}
|	j                   ||
|	            }g }t        | |||      D ]4  \  }}}}|j	                  t        |||||      j                  |             6 |S c c}w c c}w c c}w )N)r8   r   r
   r   )rp   r   splitr   r   rt   rd   )r   rH   r8   rs   r>   branges_scalessr   residual_scale_factorsr   x_catresidual_listoutputsri   rk   rm   s                   r<   'drop_add_residual_stochastic_depth_listr      s     [aaUV(>OPaNa+,qt,G,,:;qad;; -VW=Iu OOM%9$MNMG69&'=Zp6q l2682|Avx9NP^_gghijklN b,;s   B0B5B:c                   :     e Zd Zdee   dee   fdZ fdZ xZS )NestedTensorBlockr   r%   c           	          t         j                  t              sJ  j                  rɉ j                  dkD  rddt
        dt
        f fd}ddt
        dt
        f fd}t        || j                  t         j                  t              r j                  j                  nd      }t        || j                  t         j                  t              r j                  j                  nd      }|S ddt
        dt
        f fd}ddt
        dt
        f fd	}t        |      \  }}| |||
      z   }| ||      z   }|j                  |      S )zL
        x_list contains a list of tensors to nest together and run
        r   Nr>   r%   c                 H    j                  j                  |       |      S Nr   )r/   r.   r>   r   r9   s     r<   rD   z<NestedTensorBlock.forward_nested.<locals>.attn_residual_func   s    yyA)yDDr=   c                 D    j                  j                  |             S rA   )r5   r3   r   s     r<   rG   z;NestedTensorBlock.forward_nested.<locals>.ffn_residual_func   s    xx

1..r=   )rH   r8   rs   c                 f    j                  j                  j                  |       |            S r   rB   r   s     r<   rD   z<NestedTensorBlock.forward_nested.<locals>.attn_residual_func   s'    xx		$**Q-9	 MNNr=   c                 b    j                  j                  j                  |                   S rA   rF   r   s     r<   rG   z;NestedTensorBlock.forward_nested.<locals>.ffn_residual_func   s#    xxA 788r=   r   rA   )
isinstancer/   r   rI   r8   r	   r   r1   r   gammar6   r   r   )r9   r   rD   rG   r   r>   s   `     r<   forward_nestedz NestedTensorBlock.forward_nested   s0    $))_55==T33c9Ef E E/V / / =0"&"8"81;DHHj1Qtxx~~W[	F =/"&"8"81;DHHj1Qtxx~~W[	F MOf O O9V 9 9 18LIq&qI>>A%a((A??1%%r=   c                     t        |t              rt        |   |      S t        |t              r"t
        st        d      | j                  |      S t        )Nz-xFormers is required for using nested tensors)r   r	   r,   rK   listXFORMERS_AVAILABLEAssertionErrorr   )r9   x_or_x_listr;   s     r<   rK   zNestedTensorBlock.forward   sL    k6*7?;//T*%$%TUU&&{33  r=   )rL   rM   rN   r   r	   r   rK   rT   rU   s   @r<   r   r      s)    &&T&\ &&d6l &&P! !r=   r   )r   rA   )r   N),loggingostypingr   r   r   r   r   warningsr_   r   r	   	attentionr   r   r    r   layer_scaler   r5   r   	getLoggerloggerenvirongetXFORMERS_ENABLEDxformers.opsr   r   r   r   warnImportErrorrS   r   rQ   rJ   rp   rt   ru   __annotations__r   r   r    r=   r<   <module>r      s{    	 3 3    1  #  
		8	$ ::>>"56$> 7II!5645 	7GBII GZ  #&&VHf,-& & 	&0)	 %'eSj! &46  #	LVSM612 
 .1! 1!]  7HMM567s   25D D*)D*