
    |2grA                        d dl Z d dlZd dlmZ d dlZd dlZd dlmZ d ZddZ	dde
defdZ G d d	ej                        Z G d
 dej                        Z G d dej                        Z G d dej                        Z G d dej                        Z G d dej                        ZddZddZddZ G d dej                        Z G d dej                        Zy)    N)partialc                    d }||d|z  z
  k  s||d|z  z   kD  rt        j                  dd       t        j                         5   |||z
  |z        } |||z
  |z        }| j	                  d|z  dz
  d|z  dz
         | j                          | j                  |t        j                  d      z         | j                  |       | j                  ||       | cd d d        S # 1 sw Y   y xY w)Nc                 d    dt        j                  | t        j                  d      z        z   dz  S )N      ?       @)matherfsqrt)xs    :/home/cameronsmith/repos/FeatUp/featup/featurizers/DINO.pynorm_cdfz(_no_grad_trunc_normal_.<locals>.norm_cdf   s(    TXXa$))B-/00B66       zjmean is more than 2 std from [a, b] in nn.init.trunc_normal_. The distribution of values may be incorrect.)
stacklevel   r   )minmax)warningswarntorchno_graduniform_erfinv_mul_r   r
   add_clamp_)tensormeanstdabr   lus           r   _no_grad_trunc_normal_r$   
   s    7 	q1s7{q1s7{ 2 E!"	$ 
  a$h#%&a$h#%& 	A	1q519- 	 	C$))B-'(D 	!#+  s   BC&&C/c                      t        | ||||      S N)r$   )r   r   r   r    r!   s        r   trunc_normal_r'   .   s    !&$Q::r   	drop_probtrainingc                    |dk(  s|s| S d|z
  }| j                   d   fd| j                  dz
  z  z   }|t        j                  || j                  | j
                        z   }|j                          | j                  |      |z  }|S )N        r   r   r   )dtypedevice)shapendimr   randr-   r.   floor_div)r   r(   r)   	keep_probr/   random_tensoroutputs          r   	drop_pathr7   4   s}    BhIIWWQZMDAFFQJ//E

5 QQMUU9-FMr   c                   *     e Zd ZdZd fd	Zd Z xZS )DropPathz^Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
    c                 8    t         t        |           || _        y r&   )superr9   __init__r(   )selfr(   	__class__s     r   r<   zDropPath.__init__C   s    h&("r   c                 D    t        || j                  | j                        S r&   )r7   r(   r)   r=   r   s     r   forwardzDropPath.forwardG   s    DNNDMM::r   r&   __name__
__module____qualname____doc__r<   rA   __classcell__r>   s   @r   r9   r9   ?   s    #;r   r9   c                   B     e Zd Zddej                  df fd	Zd Z xZS )MlpNr+   c                     t         |           |xs |}|xs |}t        j                  ||      | _         |       | _        t        j                  ||      | _        t        j                  |      | _        y r&   )	r;   r<   nnLinearfc1actfc2Dropoutdrop)r=   in_featureshidden_featuresout_features	act_layerrR   r>   s         r   r<   zMlp.__init__L   s_    #2{)8[99[/:;99_l;JJt$	r   c                     | j                  |      }| j                  |      }| j                  |      }| j                  |      }| j                  |      }|S r&   )rN   rO   rR   rP   r@   s     r   rA   zMlp.forwardU   sH    HHQKHHQKIIaLHHQKIIaLr   )rC   rD   rE   rL   GELUr<   rA   rG   rH   s   @r   rJ   rJ   K   s    48tWYW^W^eg %r   rJ   c                   (     e Zd Zd fd	ZddZ xZS )	Attentionc                 4   t         |           || _        ||z  }|xs |dz  | _        t	        j
                  ||dz  |      | _        t	        j                  |      | _        t	        j
                  ||      | _	        t	        j                  |      | _
        y )Ng         bias)r;   r<   	num_headsscalerL   rM   qkvrQ   	attn_dropproj	proj_drop)	r=   dimr_   qkv_biasqk_scalerb   rd   head_dimr>   s	           r   r<   zAttention.__init___   sz    ")#1T!1
99S#'9I.IIc3'	I.r   c                     |j                   \  }}}| j                  |      j                  ||d| j                  || j                  z        j	                  ddddd      }|d   |d   |d   }	}}||j                  dd      z  | j                  z  }
|
j                  d      }
| j                  |
      }
|
|	z  j                  dd      j                  |||      }| j                  |      }| j                  |      }||
|fS )	Nr\   r   r   r      re   )r/   ra   reshaper_   permute	transposer`   softmaxrb   rc   rd   )r=   r   
return_qkvBNCra   qkvattns              r   rA   zAttention.forwardj   s    ''1ahhqk!!!Q4>>1;NOWWXY[\^_abdefa&#a&#a&a1AKKB''4::5|||#~~d#AX  A&..q!Q7IIaLNN1$|r   )   FNr+   r+   )F)rC   rD   rE   r<   rA   rG   rH   s   @r   rZ   rZ   ^   s    	/r   rZ   c                   `     e Zd Zddddddej                  ej
                  f fd	ZddZ xZS )Block      @FNr+   c                    t         |            |
|      | _        t        ||||||      | _        |dkD  rt        |      nt        j                         | _         |
|      | _	        t        ||z        }t        |||	|      | _        y )N)r_   rf   rg   rb   rd   r+   )rS   rT   rV   rR   )r;   r<   norm1rZ   ry   r9   rL   Identityr7   norm2intrJ   mlp)r=   re   r_   	mlp_ratiorf   rg   rR   rb   r7   rV   
norm_layermlp_hidden_dimr>   s               r   r<   zBlock.__init__z   s~     	_
9x(V_koq	09B),BKKM_
S9_-3R[bfgr   c                     | j                  | j                  |            \  }}}|r|S || j                  |      z   }|| j                  | j                  | j	                  |                  z   }|r|||fS |S r&   )ry   r   r7   r   r   )r=   r   return_attentionrr   yry   ra   s          r   rA   zBlock.forward   sq    yyA/4Kq!!txx

1677dC<r   )FF)	rC   rD   rE   rL   rX   	LayerNormr<   rA   rG   rH   s   @r   r|   r|   y   s0     77LLh&r   r|   c                   *     e Zd ZdZd fd	Zd Z xZS )
PatchEmbedz Image to Patch Embedding
    c                     t         |           ||z  ||z  z  }|| _        || _        || _        t        j                  ||||      | _        y )N)kernel_sizestride)r;   r<   img_size
patch_sizenum_patchesrL   Conv2drc   )r=   r   r   in_chans	embed_dimr   r>   s         r   r<   zPatchEmbed.__init__   sO    :-(j2HI $&IIh	zR\]	r   c                     |j                   \  }}}}| j                  |      }|j                   d   dz  dk(  r|d d d d d dd df   }|j                  d      j                  dd      S )Nrk   r   r   rl   )r/   rc   flattenrp   )r=   r   rs   ru   HWs         r   rA   zPatchEmbed.forward   si    WW
1aIIaL772;?a!QSbS.!Ayy|%%a++r   )      r\      rB   rH   s   @r   r   r      s    ^,r   r   c                        e Zd ZdZdgdddddddd	d
dddej
                  f fd	Zd Zd Zd Z	d Z
d ZddZd ZddZ xZS )VisionTransformerz Vision Transformer r   r   r\   r   r      r}   FNr+   c                    t         |           |x| _        | _        t	        |d   |||      | _        | j
                  j                  }t        j                  t        j                  dd|            | _        t        j                  t        j                  d|dz   |            | _        t        j                  |      | _        t        j                  d||      D cg c]  }|j!                          }}t        j"                  t%        |      D cg c]  }t'        ||||	|
||||   |	       c}      | _         ||      | _        |dkD  rt        j,                  ||      nt        j.                         | _        t3        | j                  d       t3        | j                  d       | j5                  | j6                         y c c}w c c}w )Nr   )r   r   r   r   r   )p)	re   r_   r   rf   rg   rR   rb   r7   r   {Gz?r   )r;   r<   num_featuresr   r   patch_embedr   rL   	Parameterr   zeros	cls_token	pos_embedrQ   pos_droplinspaceitem
ModuleListranger|   blocksnormrM   r   headr'   apply_init_weights)r=   r   r   r   num_classesr   depthr_   r   rf   rg   	drop_rateattn_drop_ratedrop_path_rater   kwargsr   r   dprir>   s                       r   r<   zVisionTransformer.__init__   sy     	-66DN%a[Z(V_a&&22ekk!Q	&BCekk![1_i&PQ

Y/!&>5!IJAqvvxJJmm 5\	%#  iRZem.CFWac%# $
 y)	 :EqBIIi5bkkm	dnn#.dnn#.

4%%& K%#s    G
Gc                    t        |t        j                        rjt        |j                  d       t        |t        j                        r8|j
                  +t        j                  j                  |j
                  d       y y y t        |t        j                        rUt        j                  j                  |j
                  d       t        j                  j                  |j                  d       y y )Nr   r   r   r   )	
isinstancerL   rM   r'   weightr^   init	constant_r   r=   ms     r   r   zVisionTransformer._init_weights   s    a#!((,!RYY'AFF,>!!!&&!, -?'2<<(GGaffa(GGahh, )r   c                    |j                   d   dz
  }| j                  j                   d   dz
  }||k(  r||k(  r| j                  S | j                  d d df   }| j                  d d dd f   }|j                   d   }|| j                  j                  z  }	|| j                  j                  z  }
|	dz   |
dz   }
}	t        j
                  j                  |j                  dt        t        j                  |            t        t        j                  |            |      j                  dddd      |	t        j                  |      z  |
t        j                  |      z  fd      }t        |	      |j                   d	   k(  rt        |
      |j                   d   k(  sJ |j                  dddd      j                  dd|      }t        j                  |j                  d      |fd
      S )Nr   r   rl   g?r\   r   bicubic)scale_factormoderk   rm   )r/   r   r   r   rL   
functionalinterpolatern   r   r   r
   ro   r   cat	unsqueeze)r=   r   whnpatchrt   class_pos_embedpatch_pos_embedre   w0h0s              r   interpolate_pos_encodingz*VisionTransformer.interpolate_pos_encoding   s   aNN  #a'Q;16>>!..A...AB/ggbk$""---$""--- c28B--33##As499Q<'8#diil:KSQYYZ[]^`acdetyy|+R$))A,->? 4 

 2w///33B?CXCXY[C\8\\)11!Q1=EEaSQyy/33A6HaPPr   c                     |j                   \  }}}}| j                  |      }| j                  j                  |dd      }t	        j
                  ||fd      }|| j                  |||      z   }| j                  |      S )Nrl   r   rm   )r/   r   r   expandr   r   r   r   )r=   r   rs   ncr   r   
cls_tokenss          r   prepare_tokensz VisionTransformer.prepare_tokens   s{    gg2q!Q ^^**1b"5
IIz1o1- --aA66}}Qr   c                     | j                  |      }| j                  D ]
  } ||      } | j                  |      }|d d df   S )Nr   r   r   r   r=   r   blks      r   rA   zVisionTransformer.forward  sH    ";; 	CAA	IIaLAwr   c                 |    | j                  |      }| j                  D ]
  } ||      } | j                  |      }|S r&   r   r   s      r   forward_featszVisionTransformer.forward_feats  s@    ";; 	CAA	IIaLr   c                 t   | j                  |      }g }g }g }t        | j                        D ]  \  }} ||d      \  }}	}
t        | j                        |z
  |k  s0|r!|j	                  | j                  |             n|j	                  |       |j	                  |
       |j	                  |	        |||fS )NT)rr   r   	enumerater   lenappendr   )r=   r   nr   featattnsqkvsr   r   ry   ra   s              r   get_intermediate_featz'VisionTransformer.get_intermediate_feat  s    ", 	#FAsqT2LAtS4;;!#q(KK		!-KKNC T"	# UD  r   c                     | j                  |      }t        | j                        D ]4  \  }}|t        | j                        dz
  k  r	 ||      }* ||d      c S  y )Nr   T)r   )r   r   r   r   )r=   r   r   r   s       r   get_last_selfattentionz(VisionTransformer.get_last_selfattention&  s\    ", 	5FAs3t{{#a''F 1t44	5r   c                     | j                  |      }g }t        | j                        D ]I  \  }} ||      }t        | j                        |z
  |k  s*|j	                  | j                  |             K |S r&   r   )r=   r   r   r6   r   r   s         r   get_intermediate_layersz)VisionTransformer.get_intermediate_layers/  sm    ", 	,FAsAA4;;!#q(diil+	, r   )r   Tr,   )rC   rD   rE   rF   rL   r   r<   r   r   r   rA   r   r   r   r   rG   rH   s   @r   r   r      sg     % " "LL)'V-Q, !"5r   r   c                 ^    t        d	| dddddt        t        j                  d      d|}|S )
N   r   r\   rj   Tư>epsr   r   r   r_   r   rf   r    r   r   rL   r   r   r   models      r   vit_tinyr   :  >     MB!q'",,D"AMEKME Lr   c                 ^    t        d	| dddddt        t        j                  d      d|}|S )
N  r      rj   Tr   r   r   r   r   r   s      r   	vit_smallr   A  r   r   c                 ^    t        d| dddddt        t        j                  d      d|}|S )	Nr   r   rj   Tr   r   r   r   r   r   s      r   vit_baser   H  s?     MB"PQ'",,D"AMEKME Lr   c                   0     e Zd Z	 	 d fd	Zd Zd Z xZS )DINOHeadc                    t         
|           t        |d      }|dk(  rt        j                  ||      | _        nt        j                  ||      g}|r$|j                  t        j                  |             |j                  t        j                                t        |dz
        D ]p  }	|j                  t        j                  ||             |r$|j                  t        j                  |             |j                  t        j                                r |j                  t        j                  ||             t        j                  | | _        | j                  | j                         t        j                  j                  t        j                  ||d            | _        | j                  j                   j"                  j%                  d       |rd| j                  j                   _        y y )Nr   r   Fr]   )r;   r<   r   rL   rM   r   r   BatchNorm1drX   r   
Sequentialr   r   utilsweight_norm
last_layerweight_gdatafill_requires_grad)r=   in_dimout_dimuse_bnnorm_last_layernlayers
hidden_dimbottleneck_dimlayers_r>   s             r   r<   zDINOHead.__init__P  s_   gq/a<yy8DHii
34FbnnZ89MM"'')$7Q;' )bii
J?@MM".."<=bggi(	)
 MM"))J?@}}f-DH

4%%&((..ryyW\/]^  %%++A.5:DOO$$2 r   c                    t        |t        j                        rjt        |j                  d       t        |t        j                        r8|j
                  +t        j                  j                  |j
                  d       y y y y )Nr   r   r   )r   rL   rM   r'   r   r^   r   r   r   s     r   r   zDINOHead._init_weightsh  sZ    a#!((,!RYY'AFF,>!!!&&!, -?' $r   c                     | j                  |      }t        j                  j                  |dd      }| j	                  |      }|S )Nrl   r   )re   r   )r   rL   r   	normalizer  r@   s     r   rA   zDINOHead.forwardn  s=    HHQKMM##A2#3OOAr   )FTr\   i      )rC   rD   rE   r<   r   rA   rG   rH   s   @r   r   r   O  s    bf #;0-r   r   c                   ,     e Zd Z fdZd ZddZ xZS )DINOFeaturizerc                    t         |           || _        || _        || _        t        |d      | _        d|v rt        j                  d      d   }|j                         D ci c](  \  }}|j                  dd      j                  dd      |* }}}|j                         D ci c]  \  }}d	|vs|| }}}nBd
|v rt        j                  d      d   }|j                         D ci c](  \  }}|j                  dd      j                  dd      |* }}}|j                         D ci c]  \  }}d	|vs|| }}}nd|v rt        j                  d      d   }|j                         D ci c](  \  }}|j                  dd      j                  dd      |* }}}|j                         D ci c]  \  }}d	|vs|| }}}n0d|v rdj                  |j                  d      d d       }t        j                  dj                  |            d   }|j                         D ci c](  \  }}|j                  dd      j                  dd      |* }}}|j                         D ci c]  \  }}d	|vs|| }}}nvd|v r9t        j                  j                  d| j                        j                         }n9|7t!        j"                  | j                  d      }|j                         }|d= |d= || j                  j%                  d       |dk(  rd| _        y d| _        y c c}}w c c}}w c c}}w c c}}w c c}}w c c}}w c c}}w c c}}w )Nr   )r   r   z3d-dinoz../models/3d-dino-co3d.pthteacherzmodule. z	backbone.zhead.z
iarpa-dinoz../models/dino_iarpa.pthzchk-dinoz7../models/dino_deitsmall16_pretrain_full_checkpoint.pthft_dinor  rl   z../models/{}.pthdinozfacebookresearch/dino:mainT)
pretrainedzhead.weightz	head.bias)strictr   r   r   )r;   r<   archr   	feat_typer   r   r   loaditemsreplacejoinsplitformathub
state_dicttimmcreate_modelload_state_dictn_feats)	r=   r   r   r!  r)  rw   rx   
temp_modelr>   s	           r   r<   zDINOFeaturizer.__init__x  s<   	$"!
 $@A)LJ[e[k[k[mnSWSTVW!))Ir2::;KQNnJn+5+;+;+=R41aPQAQ!Q$RJRT!$>?	JJ[e[k[k[mnSWSTVW!))Ir2::;KQNnJn+5+;+;+=R41aPQAQ!Q$RJR4$]^_hiJ[e[k[k[mnSWSTVW!))Ir2::;KQNnJn+5+;+;+=R41aPQAQ!Q$RJR$88DJJsOCR01D$6$=$=d$CDYOJ[e[k[k[mnSWSTVW!))Ir2::;KQNnJn+5+;+;+=R41aPQAQ!Q$RJR t^(DdiiP[[]J**499FJ#..0J=);'JJ&&z$&?;DLDL? oR oR oR oRsH   &-L6)L<6L</-M2M?M8-M;MM3-M6M M c                 8    | j                   j                  |      S r&   )r   rA   )r=   imgs     r   get_cls_tokenzDINOFeaturizer.get_cls_token  s    zz!!#&&r   c                 n   |j                   d   | j                  z  dk(  sJ |j                   d   | j                  z  dk(  sJ | j                  j                  ||      \  }}}|d   |d   |d   }}}|j                   d   | j                  z  }|j                   d   | j                  z  }| j                  dk(  r@|d d dd d d f   j                  |j                   d   ||d      j                  dddd      }	n| j                  dk(  rx|dd d d d dd d d f   }
|
j                  dddd      j                  d	d
      }|j                  |j                   d   |||j                   d         j                  dddd      }	n$t        dj                  | j                              |r|	|d d dd d f   fS |	S )Nr   r   r\   )r   tokenr   rl   keyrk   )	start_dimend_dimzUnknown feat type:{})
r/   r   r   r   r!  rn   ro   r   
ValueErrorr'  )r=   r0  r   include_clsr   ry   ra   feat_hfeat_w
image_featr   descs               r   rA   zDINOFeaturizer.forward  s   		!t.!34		!t.!34**::3!:DdCq'47CFCd1010>>W$aQh//

1vvrRZZ[\^_abdefJ^^u$Aq!QRN#A99Q1a(002r0JDdjjmVVTZZPQ]SAq!$  3::4>>JKKtAq!G},,r   )r   F)rC   rD   rE   r<   r1  rA   rG   rH   s   @r   r  r  v  s    +Z'r   r  )r+   r   g       r   )r+   F)r   )r   r   	functoolsr   r*  r   torch.nnrL   r$   r'   floatboolr7   Moduler9   rJ   rZ   r|   r   r   r   r   r   r   r  r   r   r   <module>rB     s         !H;E $ 	;ryy 	;")) &		 6BII >, ,*J		 JZ#ryy #NIRYY Ir   