o
    悫ib                     @   sH  d dl Z d dlZd dlmZ d dlmZmZ d dlZd dlm	  m
Z d dlmZmZ d dlmZ d dlm	Z	 d dlmZ e eZeejedkrud	Zd d
lmZmZ ejd	dddejdd	ddejddd	ddd	d	d	diZnd dl m!Z! dZe!Zi Ze"dej d zd dl#Z#d dl$Z#d	Z%W n   dZ%e"d Y dd Z&dd Z'dd Z(dd Z)dd Z*G dd de	j+Z,G dd de	j+Z-dd  Z.d!d" Z/G d#d$ d$e	j+Z0G d%d& d&e	j+Z1G d'd( d(e	j+Z2G d)d* d*e	j+Z3G d+d, d,e	j+Z4G d-d. d.e	j+Z5G d/d0 d0e	j+Z6G d1d2 d2e	j+Z7G d3d4 d4e	j+Z8dS )5    N)
isfunction)AnyOptional)	rearrangerepeat)version)nn)
checkpoint2.0.0T)
SDPBackend
sdp_kernelF)enable_mathenable_flashenable_mem_efficient)nullcontextzsNo SDP backend available, likely because you are running in pytorch versions < 2.0. In fact, you are using PyTorch z'. You might want to consider upgrading.z+no module 'xformers'. Processing without...c                 C   s   | d uS N )valr   r   ?/data/cameron/vidgen/generative-models/sgm/modules/attention.pyexists=   s   r   c                 C   s   dd | D   S )Nc                 S   s   i | ]}|d qS )Tr   ).0elr   r   r   
<dictcomp>B   s    zuniq.<locals>.<dictcomp>)keys)arrr   r   r   uniqA   s   r   c                 C   s   t | r| S t|r| S |S r   )r   r   )r   dr   r   r   defaultE   s   r   c                 C   s   t | jj S r   )torchfinfodtypemaxtr   r   r   max_neg_valueK   s   r$   c                 C   s*   | j d }dt| }| | | | S )N   )shapemathsqrtuniform_)tensordimstdr   r   r   init_O   s   
r.   c                       $   e Zd Z fddZdd Z  ZS )GEGLUc                    s    t    t||d | _d S )N   )super__init__r   Linearproj)selfdim_indim_out	__class__r   r   r3   X   s   
zGEGLU.__init__c                 C   s&   |  |jddd\}}|t| S )Nr1   r%   r,   )r5   chunkFgelu)r6   xgater   r   r   forward\   s   zGEGLU.forward__name__
__module____qualname__r3   rA   __classcell__r   r   r9   r   r0   W   s    r0   c                       s&   e Zd Zd	 fdd	Zdd Z  ZS )
FeedForwardN   F        c                    sh   t    t|| }t||}|stt||t nt||}t|t	|t||| _
d S r   )r2   r3   intr   r   
Sequentialr4   GELUr0   Dropoutnet)r6   r,   r8   multgludropout	inner_dim
project_inr9   r   r   r3   b   s   


zFeedForward.__init__c                 C   s
   |  |S r   )rN   )r6   r?   r   r   r   rA   p   s   
zFeedForward.forward)NrH   FrI   rB   r   r   r9   r   rG   a   s    rG   c                 C   s   |   D ]}|   q| S )z<
    Zero out the parameters of a module and return it.
    )
parametersdetachzero_)modulepr   r   r   zero_modulet   s   rY   c                 C   s   t jjd| dddS )N    gư>T)
num_groupsnum_channelsepsaffine)r   r   	GroupNorm)in_channelsr   r   r   	Normalize}   s   ra   c                       s&   e Zd Zd fdd	Zdd Z  ZS )LinearAttentionrH   rZ   c                    sD   t    || _|| }tj||d ddd| _t||d| _d S )N   r&   Fbias)r2   r3   headsr   Conv2dto_qkvto_out)r6   r,   rf   dim_head
hidden_dimr9   r   r   r3      s
   
zLinearAttention.__init__c                 C   sv   |j \}}}}| |}t|d| jdd\}}}	|jdd}td||	}
td|
|}t|d| j||d	}| |S )
Nz*b (qkv heads c) h w -> qkv b heads c (h w)rc   )rf   qkvr%   r;   zbhdn,bhen->bhdezbhde,bhdn->bhenz"b heads c (h w) -> b (heads c) h w)rf   hw)r'   rh   r   rf   softmaxr   einsumri   )r6   r?   bcrm   rn   rl   qkvcontextoutr   r   r   rA      s   


zLinearAttention.forward)rH   rZ   rB   r   r   r9   r   rb      s    rb   c                       sf   e Zd ZdZ						ddeded	ed
ee dededef fddZ	de
jde
jfddZ  ZS )SelfAttention)xformersr   r(      FNrI   ry   r,   	num_headsqkv_biasqk_scale	attn_drop	proj_drop	attn_modec           	         sz   t    || _|| }|p|d | _tj||d |d| _t|| _t||| _	t|| _
|| jv s8J || _d S )N      rc   rd   )r2   r3   r{   scaler   r4   rl   rM   r~   r5   r   ATTENTION_MODESr   )	r6   r,   r{   r|   r}   r~   r   r   head_dimr9   r   r   r3      s   


zSelfAttention.__init__r?   returnc           
      C   sf  |j \}}}| |}| jdkr8t|dd| jd }|d |d |d }}}tjj	|||}t|d}no| jd	kret|d
d| jd}|d |d |d }}}t
j|||}t|d| jd}nB| jdkrt|dd| jd}|d |d |d }}}||dd | j }	|	jdd}	| |	}	|	| dd|||}nt| |}| |}|S )Nr   zB L (K H D) -> K B H L Drc   )KHr   r&   r1   zB H L D -> B L (H D)ry   zB L (K H D) -> K B L H DzB L H D -> B L (H D))r   r(   r%   r;   )r'   rl   r   r   r{   floatr   r   
functionalscaled_dot_product_attentionry   opsmemory_efficient_attention	transposer   ro   r~   reshapeNotImplementedr5   r   )
r6   r?   BLCrl   rs   rt   ru   attnr   r   r   rA      s6   







zSelfAttention.forward)rz   FNrI   rI   ry   )rC   rD   rE   r   rJ   boolr   r   strr3   r   TensorrA   rF   r   r   r9   r   rx      s0    rx   c                       r/   )SpatialSelfAttentionc                    s~   t    || _t|| _tjj||dddd| _tjj||dddd| _	tjj||dddd| _
tjj||dddd| _d S )Nr&   r   kernel_sizestridepadding)r2   r3   r`   ra   normr   r   rg   rs   rt   ru   proj_out)r6   r`   r9   r   r   r3      s   





zSpatialSelfAttention.__init__c                 C   s   |}|  |}| |}| |}| |}|j\}}}}	t|d}t|d}td||}
|
t|d  }
tj	j
j|
dd}
t|d}t|
d}
td||
}t|d|d	}| |}|| S )
Nb c h w -> b (h w) czb c h w -> b c (h w)zbij,bjk->bikr   r1   r;   zb i j -> b j izb c (h w) -> b c h wrm   )r   rs   rt   ru   r'   r   r   rp   rJ   r   r   ro   r   )r6   r?   h_rs   rt   ru   rq   rr   rm   rn   w_r   r   r   rA      s"   








zSpatialSelfAttention.forwardrB   r   r   r9   r   r      s    r   c                       s:   e Zd Z					d
 fdd	Z				ddd	Z  ZS )CrossAttentionNrz   @   rI   c                    s   t    || }t||}|d | _|| _tj||dd| _tj||dd| _tj||dd| _	t
t||t|| _|| _d S )Nr   Frd   )r2   r3   r   r   rf   r   r4   to_qto_kto_vrK   rM   ri   backend)r6   	query_dimcontext_dimrf   rj   rQ   r   rR   r9   r   r   r3      s   
	


zCrossAttention.__init__r   c                    sJ  | j  |d ur|jd }tj||gdd}| |}t||}| |}| |}	|rU|jd | dks6J |jd | }
t|d d | d|
d}t|	d d | d|
d}	t	 fdd|||	f\}}}		 t
di t| j  tj|||	|d}W d    n1 sw   Y  ~~~	t|d	 d
}|d ur|d d |d f }| |S )Nr&   r;   r   b ... -> (b n) ...nc                    s   t | d dS )Nzb n (h d) -> b h n dr   )r   r"   r   r   r   <lambda>9  s    z(CrossAttention.forward.<locals>.<lambda>)	attn_maskzb h n d -> b n (h d)r   r   )rf   r'   r   catr   r   r   r   r   mapr   BACKEND_MAPr   r=   r   r   ri   )r6   r?   rv   maskadditional_tokensn_times_crossframe_attn_in_selfn_tokens_to_maskrs   rt   ru   n_cprw   r   r   r   rA     s:   





zCrossAttention.forward)Nrz   r   rI   NNNNr   rB   r   r   r9   r   r      s    r   c                       s2   e Zd Z	d
 fdd	Z				ddd	Z  ZS )MemoryEfficientCrossAttentionNrz   r   rI   c                    s   t    td| jj d| d| d| d| d || }t||}|| _|| _t	j
||dd| _t	j
||dd| _t	j
||dd| _t	t	
||t	|| _d | _d S )	NzSetting up z. Query dim is z, context_dim is z and using z heads with a dimension of .Frd   )r2   r3   logpydebugr:   rC   r   rf   rj   r   r4   r   r   r   rK   rM   ri   attention_op)r6   r   r   rf   rj   rQ   kwargsrR   r9   r   r   r3   ]  s*   


z&MemoryEfficientCrossAttention.__init__r   c              
      s8  |d ur|j d }tj||gdd}|}t||}|d urE|j d |j d krE|j d |j d  dkrE|j|j d |j d  dd}|}|}	|rt|j d | dks\J t|d d | d|d}t|	d d | d|d}	|j \ }
}
t	 fdd|||	f\}}}	t
tjt
dkrd	}|j d }t|| }t }t|D ]"}t|| |d | }|tjj|| || |	| d jd
 qt|d}ntjj|||	d jd
}t|rt|d j|j d jdddd |j d jj }|d ur|d d |d f }|S )Nr&   r;   r   r   r   c                    sH   |  d | jd jjdddd j | jd j S )Nrc   r&   r   r1   )	unsqueezer   r'   rf   rj   permute
contiguousr"   rq   r6   r   r   r     s
    z7MemoryEfficientCrossAttention.forward.<locals>.<lambda>z0.0.21i   )	attn_biasopr1   rc   )r'   r   r   r   r   repeat_interleaver   r   r   r   r   parsery   __version__r(   ceillistrangesliceappendr   r   r   r   NotImplementedErrorr   r   rf   rj   r   ri   )r6   r?   rv   r   r   r   r   rs   rt   ru   _max_bsN	n_batchesrw   i_batchbatchr   r   r   rA   u  sp   


4




	

z%MemoryEfficientCrossAttention.forward)Nrz   r   rI   r   rB   r   r   r9   r   r   [  s    r   c                       sN   e Zd ZeedZ							d fdd	Z		dd
dZ		dddZ  Z	S )BasicTransformerBlockro   softmax-xformersrI   NTFro   c                    sR  t    |	| jv sJ |	dkr"ts"td|	 dtj d d}	n|	dkr;ts;td ts4	J dt	d d	}	| j|	 }t
tjt
d
krW|
d u sVt|
tsVJ n|
d u s]J || _|||||| jrj|nd |
d| _t|||d| _|||||||
d| _t|| _t|| _t|| _|| _| jrt| jj d d S d S )Nro   zAttention mode 'z' is not available. Falling back to native attention. This is not a problem in Pytorch >= 2.0. FYI, you are running with PyTorch version r   zKWe do not support vanilla attention anymore, as it is too expensive. Sorry.Fz?Please install xformers via e.g. 'pip install xformers==0.0.16'z-Falling back to xformers efficient attention.r   r
   )r   rf   rj   rQ   r   r   rQ   rP   )r   r   rf   rj   rQ   r   z is using checkpointing)r2   r3   r   XFORMERS_IS_AVAILABLEr   warnr   r   SDP_IS_AVAILABLEinfor   r   
isinstancer   disable_self_attnattn1rG   ffattn2r   	LayerNormnorm1norm2norm3r	   r   r:   rC   )r6   r,   n_headsd_headrQ   r   gated_ffr	   r   r   sdp_backendattn_clsr9   r   r   r3     sd   


zBasicTransformerBlock.__init__r   c                 C   sj   d|i}|d ur| d|i |d ur| d|i |r#| d|i | jr-t| j||S | jdi |S )Nr?   rv   r   r   r   )updater	   _forward)r6   r?   rv   r   r   r   r   r   r   rA     s   zBasicTransformerBlock.forwardc                 C   sb   | j | || jr|nd || js|ndd| }| j| |||d| }| | || }|S )Nr   )rv   r   r   )rv   r   )r   r   r   r   r   r   r   )r6   r?   rv   r   r   r   r   r   r   *  s&   zBasicTransformerBlock._forward)rI   NTTFro   N)NNr   
rC   rD   rE   r   r   r   r3   rA   r   rF   r   r   r9   r   r     s     
B
r   c                       sF   e Zd ZeedZ					d fdd	Zddd	Zdd
dZ  Z	S ) BasicTransformerSingleLayerBlockr   rI   NTro   c	           
         sh   t    || jv sJ | j| }	|	|||||d| _t|||d| _t|| _t|| _	|| _
d S )N)r   rf   rj   rQ   r   r   )r2   r3   r   r   rG   r   r   r   r   r   r	   )
r6   r,   r   r   rQ   r   r   r	   r   r   r9   r   r   r3   I  s   


z)BasicTransformerSingleLayerBlock.__init__c                 C   s   t | j||S r   )r	   r   r6   r?   rv   r   r   r   rA   c  s   z(BasicTransformerSingleLayerBlock.forwardc                 C   s0   | j | ||d| }| | || }|S )Nrv   )r   r   r   r   r   r   r   r   r   h  s   z)BasicTransformerSingleLayerBlock._forward)rI   NTTro   r   r   r   r   r9   r   r   B  s    
r   c                       s<   e Zd ZdZ								d fdd		Zdd
dZ  ZS )SpatialTransformera  
    Transformer block for image-like data.
    First, project the input (aka embedding)
    and reshape to b, t, d.
    Then apply standard transformer action.
    Finally, reshape to image
    NEW: use_linear for more efficiency instead of the 1x1 convs
    r&   rI   NFro   Tc                    s  t    td| jj d| d| d d	 tr%tts%gtrhttrh|t	krgt
| jj d dt	 d| d|d	 g  d

 ttfdds`J d|d	 g n	d u rqd g| || _ t|| _|stj|ddd	d| _nt|| _t f	ddt|D | _|sttj|ddd	d| _n	tt|| _|| _d S )Nzconstructing z
 of depth z w/ z channels and z heads.z: Found context dims z0, which does not match the specified 'depth' of z. Setting context_dim to r   z now.c                    s   |  d kS )Nr   r   )r?   )r   r   r   r     s    z-SpatialTransformer.__init__.<locals>.<lambda>z8need homogenous context_dim to match depth automaticallyr&   r   c                    s*   g | ]}t |  d 	qS ))rQ   r   r   r   r	   r   )r   )r   r   		attn_typer   r   r   rQ   rR   r   r   use_checkpointr   r   
<listcomp>  s    z/SpatialTransformer.__init__.<locals>.<listcomp>)r2   r3   r   r   r:   rC   r   r   r   lenr   allr   r`   ra   r   r   rg   proj_inr4   
ModuleListr   transformer_blocksrY   r   
use_linear)r6   r`   r   r   depthrQ   r   r   r   r   r   r   r9   r   r   r3   x  sf   






zSpatialTransformer.__init__c           
      C   s   t |ts|g}|j\}}}}|}| |}| js| |}t|d }| jr-| |}t| j	D ]\}}	|dkrBt
|dkrBd}|	||| d}q2| jrS| |}t|d||d }| jse| |}|| S )Nr   r   r&   r   zb (h w) c -> b c h w)rm   rn   )r   r   r'   r   r   r   r   r   	enumerater   r   r   )
r6   r?   rv   rq   rr   rm   rn   x_iniblockr   r   r   rA     s(   





zSpatialTransformer.forward)r&   rI   NFFro   TNr   )rC   rD   rE   __doc__r3   rA   rF   r   r   r9   r   r   n  s    Ir   c                       sj   e Zd Z			ddededededee d	ed
ef fddZ	ddej	deej	 dej	fddZ
  ZS )SimpleTransformerNrI   Tr,   r   rf   rj   r   rQ   r	   c           	         sF   t    tg | _t|D ]}| jt|||||d|d qd S )Nr   )rQ   r   r   r	   )r2   r3   r   r   layersr   r   r   )	r6   r,   r   rf   rj   r   rQ   r	   r   r9   r   r   r3     s   

zSimpleTransformer.__init__r?   rv   r   c                 C   s   | j D ]}|||}q|S r   )r  )r6   r?   rv   layerr   r   r   rA     s   
zSimpleTransformer.forward)NrI   Tr   )rC   rD   rE   rJ   r   r   r   r3   r   r   rA   rF   r   r   r9   r   r    s6    r  )9loggingr(   inspectr   typingr   r   r   torch.nn.functionalr   r   r=   einopsr   r   	packagingr   torch.utils.checkpointr	   	getLoggerrC   r   r   r   r   torch.backends.cudar   r   MATHFLASH_ATTENTIONEFFICIENT_ATTENTIONr   
contextlibr   r   ry   xformers.opsr   r   r   r   r$   r.   Moduler0   rG   rY   ra   rb   rx   r   r   r   r   r   r   r  r   r   r   r   <module>   s    

	8-\pw,k