o
    [i>                     @   s   d dl Z d dlmZ d dlZd dlZdd ZdddZG dd dejZ	G d	d
 d
ejZ
G dd dejZG dd dejZG dd dejZG dd dejZG dd deZG dd dejZdS )    Nc                 C   s   | t |  S N)torchsigmoid)x r   =/data/cameron/vidgen/unified_video_action/./simple_uva/vae.pynonlinearity	   s   r       c                 C   s   t jj|| dddS )Ngư>T)
num_groupsnum_channelsepsaffine)r   nn	GroupNorm)in_channelsr
   r   r   r   	Normalize   s   r   c                       $   e Zd Z fddZdd Z  ZS )Upsamplec                    s6   t    || _| jrtjj||dddd| _d S d S )N      kernel_sizestridepaddingsuper__init__	with_convr   r   Conv2dconvselfr   r   	__class__r   r   r      s   

zUpsample.__init__c                 C   s(   t jjj|ddd}| jr| |}|S )N       @nearest)scale_factormode)r   r   
functionalinterpolater   r   r!   r   r   r   r   forward   s   
zUpsample.forward__name__
__module____qualname__r   r+   __classcell__r   r   r"   r   r      s    r   c                       r   )
Downsamplec                    s6   t    || _| jrtjj||dddd| _d S d S )Nr      r   r   r   r    r"   r   r   r   %   s   

zDownsample.__init__c                 C   sF   | j rd}tjjj||ddd}| |}|S tjjj|ddd}|S )N)r   r   r   r   constantr   )r'   valuer2   )r   r   )r   r   r   r(   padr   
avg_pool2d)r!   r   r5   r   r   r   r+   .   s   
zDownsample.forwardr,   r   r   r"   r   r1   $   s    	r1   c                       s.   e Zd Zdddd fdd
Zdd Z  ZS )	ResnetBlockNFi   )out_channelsconv_shortcuttemb_channelsc                   s   t    || _|d u r|n|}|| _|| _t|| _tjj	||dddd| _
|dkr3tj||| _t|| _tj|| _tjj	||dddd| _| j| jkrp| jrbtjj	||dddd| _d S tjj	||dddd| _d S d S )Nr   r   r   r   )r   r   r   r8   use_conv_shortcutr   norm1r   r   r   conv1Linear	temb_projnorm2Dropoutdropoutconv2r9   nin_shortcut)r!   r   r8   r9   rB   r:   r"   r   r   r   9   s2   
	





zResnetBlock.__init__c                 C   s   |}|  |}t|}| |}|d ur'|| t|d d d d d d f  }| |}t|}| |}| |}| j| jkrQ| j	rL| 
|}|| S | |}|| S r   )r<   r   r=   r?   r@   rB   rC   r   r8   r;   r9   rD   )r!   r   tembhr   r   r   r+   ]   s    

&




zResnetBlock.forwardr,   r   r   r"   r   r7   8   s    $r7   c                       r   )	AttnBlockc                    s~   t    || _t|| _tjj||dddd| _tjj||dddd| _	tjj||dddd| _
tjj||dddd| _d S )Nr   r   r   )r   r   r   r   normr   r   r   qkvproj_out)r!   r   r"   r   r   r   u   s   





zAttnBlock.__init__c                 C   s   |}|  |}| |}| |}| |}|j\}}}}	|||||	 }|ddd}|||||	 }t||}
|
t	|d  }
tj
jj|
dd}
|||||	 }|
ddd}
t||
}|||||	}| |}|| S )Nr   r2   r   g      ࿩dim)rH   rI   rJ   rK   shapereshapepermuter   bmmintr   r(   softmaxrL   )r!   r   Zh_rI   rJ   rK   bcrF   ww_r   r   r   r+      s$   




zAttnBlock.forwardr,   r   r   r"   r   rG   t   s    rG   c                       s>   e Zd Zdddddddddd	dd
 fdd
Zdd Z  ZS )Encoder   r   r   r   r2   r2      r2   )           T   r]   )chout_chch_multnum_res_blocksattn_resolutionsrB   resamp_with_convr   
resolution
z_channelsdouble_zc             
      s  t    || _d| _t|| _|| _|	| _|| _t	j
j|| jdddd| _|	}dt| }t
 | _t| jD ]X}t
 }t
 }|||  }|||  }t| jD ]}|t||| j|d |}||v rn|t| qSt
 }||_||_|| jd krt|||_|d }| j| q8t
 | _t||| j|d| j_t|| j_t||| j|d| j_t|| _t	j
j||rd|
 n|
dddd| _d S )Nr   r   r   r   r   r   r8   r:   rB   r2   ) r   r   r`   temb_chlennum_resolutionsrc   rf   r   r   r   r   conv_intuple
ModuleListdownrangeappendr7   rG   Moduleblockattnr1   
downsamplemidblock_1attn_1block_2r   norm_outconv_out)r!   r`   ra   rb   rc   rd   rB   re   r   rf   rg   rh   ignore_kwargscurr_res
in_ch_multi_levelru   rv   block_in	block_outi_blockrq   r"   r   r   r      sx   






zEncoder.__init__c                 C   s   d }|  |g}t| jD ]D}t| jD ](}| j| j| |d |}t| j| jdkr7| j| j| |}|| q|| jd krQ|| j| 	|d  q|d }| j
||}| j
|}| j
||}| |}t|}| |}|S )Nr   r   )rn   rr   rm   rc   rq   ru   rl   rv   rs   rw   rx   ry   rz   r{   r|   r   r}   )r!   r   rE   hsr   r   rF   r   r   r   r+      s&   

zEncoder.forwardr,   r   r   r"   r   rY      s    SrY   c                       s>   e Zd Zdddddddddd	d
d fdd
Zdd Z  ZS )DecoderrZ   r   r[   r2   r   r^   Tr_   r]   F)r`   ra   rb   rc   rd   rB   re   r   rf   rg   give_pre_endc             
      s  t    || _d| _t|| _|| _|	| _|| _|| _	dt
| }||| jd   }|	d| jd   }d|
||f| _td| jt| j tjj|
|dddd| _t | _t||| j|d| j_t|| j_t||| j|d| j_t | _tt| jD ]R}t }t }|||  }t| jd D ]}|t||| j|d |}||v r|t| qt }||_ ||_!|dkrt"|||_#|d }| j$d| qt%|| _&tjj||dddd| _'d S )	Nr   ri   r   r2   z+Working with z of shape {} = {} dimensions.r   r   rj   )(r   r   r`   rk   rl   rm   rc   rf   r   r   ro   Zz_shapeprintformatnpprodr   r   r   rn   rt   rx   r7   ry   rG   rz   r{   rp   upreversedrr   rs   ru   rv   r   upsampleinsertr   r|   r}   )r!   r`   ra   rb   rc   rd   rB   re   r   rf   rg   r   r~   r   r   r   r   ru   rv   r   r   r   r"   r   r   r     s~   








zDecoder.__init__c                 C   s   |j | _d }| |}| j||}| j|}| j||}tt| j	D ]7}t| j
d D ]!}| j| j| ||}t| j| jdkrP| j| j| |}q/|dkr]| j| |}q&| jrc|S | |}t|}| |}|S )Nr   r   )rO   Zlast_z_shapern   rx   ry   rz   r{   r   rr   rm   rc   r   ru   rl   rv   r   r   r|   r   r}   )r!   zrE   rF   r   r   r   r   r   r+   n  s*   


zDecoder.forwardr,   r   r   r"   r   r     s    Yr   c                   @   s@   e Zd ZdddZdd ZdddZg d	fd
dZdd ZdS )DiagonalGaussianDistributionFc                 C   s   || _ tj|ddd\| _| _t| jdd| _|| _td| j | _t| j| _	| jr@t
| jj| j jd | _	| _d S d S )Nr2   r   rM   g      >g      4@      ?device)
parametersr   chunkmeanlogvarclampdeterministicexpstdvar
zeros_liketor   )r!   r   r   r   r   r   r     s   z%DiagonalGaussianDistribution.__init__c                 C   s*   | j | jt| j jj| jjd  }|S )Nr   )r   r   r   randnrO   r   r   r   r*   r   r   r   sample  s   
z#DiagonalGaussianDistribution.sampleNc                 C   s   | j r	tdgS |d u r%dtjt| jd| j d | j g dd S dtjt| j|j d|j | j|j  d | j |j g dd S )Nr^   r   r2   g      ?r   r2   r   rM   )r   r   Tensorsumpowr   r   r   )r!   otherr   r   r   kl  s&   
zDiagonalGaussianDistribution.klr   c                 C   sR   | j r	tdgS tdtj }dtj|| j t|| j	 d| j
  |d S )Nr^   r$   r   r2   rM   )r   r   r   r   logpir   r   r   r   r   )r!   r   dimsZlogtwopir   r   r   nll  s    z DiagonalGaussianDistribution.nllc                 C   s   | j S r   )r   )r!   r   r   r   r'     s   z!DiagonalGaussianDistribution.mode)Fr   )r-   r.   r/   r   r   r   r   r'   r   r   r   r   r     s    

	r   c                       sD   e Zd Z		d fdd	Zdd Zdd Zd	d
 ZdddZ  ZS )AutoencoderKLTNc           	         s   t    |j}|j}t||d| _t||d| _|| _| jr!dnd}t	j
d| || d| _t	j
||d| _|| _|d urMtj|rO| | d S d S d S )N)rb   rg   r2   r   )r   r   vae_embed_dimrb   rY   encoderr   decoderuse_variationalr   r   r   
quant_convpost_quant_conv	embed_dimospathexistsinit_from_ckpt)	r!   autoencoder_pathddconfigr   Z
output_dirkwargsr   rb   multr"   r   r   r     s   
zAutoencoderKL.__init__c                 C   sn   t j|ddd }| j|dd}td td td t|j td	 t|j td
|  td d S )Ncpu)map_locationmodelF)strictzC-------------------------------------------------------------------zLoading pre-trained KL-VAEzMissing keys:zUnexpected keys:zRestored from )r   loadload_state_dictr   missing_keysunexpected_keys)r!   r   sdmsgr   r   r   r     s   

zAutoencoderKL.init_from_ckptc                 C   s<   |  |}| |}| jst|t|fd}t|}|S )Nr   )r   r   r   r   cat	ones_liker   )r!   r   rF   ZmomentsZ	posteriorr   r   r   encode  s   

zAutoencoderKL.encodec                 C   s   |  |}| |}|S r   )r   r   )r!   r   decr   r   r   decode  s   

zAutoencoderKL.decoder   c                 C   s   |r	|  |||S | ||S r   )Ztraining_stepZvalidation_step)r!   inputsdisabletrainZoptimizer_idxr   r   r   r+     s   zAutoencoderKL.forward)TN)TTr   )	r-   r.   r/   r   r   r   r   r+   r0   r   r   r"   r   r     s    r   )r	   )r   torch.nnr   numpyr   r   r   r   rt   r   r1   r7   rG   rY   r   objectr   r   r   r   r   r   <module>   s   
<.r|3