o
    9iX                     @   sF  d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
mZmZmZ d dlZd dlZd dlmZ d dlmZ d dlmZ ddlmZ dd	lmZ dd
lmZmZmZmZ e  e!Z"G dd dej#Z$G dd de$Z%G dd de%Z&G dd de&Z'G dd de&Z(G dd de$Z)G dd dej*Z+G dd de&Z,dS )    N)abstractmethod)contextmanager)AnyDictListOptionalTupleUnion)	rearrange)version   )AbstractRegularizer)LitEma)defaultget_nested_attributeget_obj_from_strinstantiate_from_configc                       s   e Zd ZdZ			ddedef dedef def fddZd	edeef fd
dZ	e
defddZdd ZedddZe
dejfddZe
dejfddZdd ZdefddZ  ZS )AbstractAutoencodera   
    This is the base class for all autoencoders, including image autoencoders, image autoencoders with discriminators,
    unCLIP models, etc. Hence, it is fairly general, and specific features
    (e.g. discriminator training, encoding, decoding) must be implemented in subclasses.
    Njpg	ema_decaymonitor	input_keyc                    s   t    || _|d u| _|d ur|| _| jr.t| |d| _tdt	t
| j  d ttjtdkr>d| _d S d S )N)decayzKeeping EMAs of .z2.0.0F)super__init__r   use_emar   r   	model_emalogpyinfolenlistbuffersr   parsetorch__version__automatic_optimization)selfr   r   r   	__class__ @/data/cameron/vidgen/generative-models/sgm/models/autoencoder.pyr      s   

 
zAbstractAutoencoder.__init__ckptc                 C   s8   |d u rd S t |trdd|id}t|}||  d S )Nz'sgm.modules.checkpoint.CheckpointEngine	ckpt_pathtargetparams)
isinstancestrr   )r'   r,   enginer*   r*   r+   
apply_ckpt1   s   
zAbstractAutoencoder.apply_ckptreturnc                 C      t  NNotImplementedErrorr'   batchr*   r*   r+   	get_input<   s   zAbstractAutoencoder.get_inputc                 O   s   | j r
| |  d S d S r7   )r   r   r'   argskwargsr*   r*   r+   on_train_batch_end@   s   z&AbstractAutoencoder.on_train_batch_endc              
   c   s    | j r| j|   | j|  |d urt| d z!d V  W | j r<| j|   |d ur>t| d d S d S d S | j rX| j|   |d urYt| d w w w )Nz: Switched to EMA weightsz: Restored training weights)r   r   store
parameterscopy_tor   r   restore)r'   contextr*   r*   r+   	ema_scopeE   s(   zAbstractAutoencoder.ema_scopec                 O      t d)Nz-encode()-method of abstract base class calledr8   r=   r*   r*   r+   encodeT      zAbstractAutoencoder.encodec                 O   rG   )Nz-decode()-method of abstract base class calledr8   r=   r*   r*   r+   decodeX   rI   zAbstractAutoencoder.decodec                 C   s<   t d|d  d t|d |fd|i|dt S )Nzloading >>> r/   z <<< optimizer from configlrr0   )r   r   r   getdict)r'   r0   rK   cfgr*   r*   r+   !instantiate_optimizer_from_config\   s   
z5AbstractAutoencoder.instantiate_optimizer_from_configc                 C   r6   r7   r8   r'   r*   r*   r+   configure_optimizersb   s   z(AbstractAutoencoder.configure_optimizers)NNr   r7   )__name__
__module____qualname____doc__r	   floatr2   r   rM   r4   r   r   r<   r@   r   rF   r$   TensorrH   rJ   rO   rQ   __classcell__r*   r*   r(   r+   r      s0    

r   c                !       s:  e Zd ZdZdddddddddddddeded	ed
edeedf dedeeee	   deee
  deeee	   deee
  dededede	e
f dee	 deee	  f fddZdedejfddZdefddZdefddZd d! Z	"	"dBd#ejd$ed%edeejeeje
f f fd&d'Zd(ejdejfd)d*Zd#ejdeejeje
f fd+d,Z	dCde
d-ed.edejfd/d0Zde
d-efd1d2Zde
d-edefd3d4ZdDde
d-ed6e	defd7d8Zd9eee	  d:ee
 deeee	ef  ef fd;d<Zdeejj  fd=d>Z!e" 	dEde
d?ee de
fd@dAZ#  Z$S )FAutoencodingEnginez
    Base class for all image autoencoders that we train, like VQGAN or AutoencoderKL
    (we also restore them explicitly as special cases for legacy reasons).
    Regularizations such as KL or VQ are moved to the regularizer class.
    N      ?r   g      @)optimizer_configlr_g_factortrainable_ae_paramsae_optimizer_argstrainable_disc_paramsdisc_optimizer_argsdisc_start_iterdiff_boost_factorckpt_enginer-   additional_decode_keysencoder_configdecoder_configloss_configregularizer_configr[   r\   r]   r^   r_   r`   ra   rb   rc   r-   rd   c                   sL  t  j|i | d| _t|| _t|| _t|| _t|| _t|ddi| _	|| _
|| _|| _|| _| jd urVt|dd tt| jD | _t| jt| jksUJ ni g| _|	| _| jd urt|
dd tt| jD | _t| jt| jks~J ni g| _|d ur|d u sJ dtd | t|| tt|g | _d S )	NFr/   ztorch.optim.Adamc                 S      g | ]}i qS r*   r*   .0_r*   r*   r+   
<listcomp>       z/AutoencodingEngine.__init__.<locals>.<listcomp>c                 S   ri   r*   r*   rj   r*   r*   r+   rm      rn   z#Can't set ckpt_engine and ckpt_pathz>Checkpoint path is deprecated, use `checkpoint_egnine` instead)r   r   r&   r   encoderdecoderlossregularizationr   r[   rb   ra   r\   r]   ranger    r^   r_   r`   r   warnr4   setrd   )r'   re   rf   rg   rh   r[   r\   r]   r^   r_   r`   ra   rb   rc   r-   rd   r>   r?   r(   r*   r+   r   m   sF   





zAutoencodingEngine.__init__r;   r5   c                 C   s
   || j  S r7   )r   r:   r*   r*   r+   r<      s   
zAutoencodingEngine.get_inputc                 C   sh   g }t | jdr|t| j 7 }t | jdr |t| j 7 }|t| j  }|t| j  }|S )N$get_trainable_autoencoder_parametersget_trainable_parameters)	hasattrrq   r!   rv   rr   rw   ro   rB   rp   r'   r0   r*   r*   r+   get_autoencoder_params   s   z)AutoencodingEngine.get_autoencoder_paramsc                 C   s&   t | jdrt| j }|S g }|S )Nrw   )rx   rq   r!   rw   ry   r*   r*   r+   get_discriminator_params   s
   z+AutoencodingEngine.get_discriminator_paramsc                 C   s
   | j  S r7   )rp   get_last_layerrP   r*   r*   r+   r|      s   
z!AutoencodingEngine.get_last_layerFxreturn_reg_logunregularizedc                 C   s6   |  |}|r|t fS | |\}}|r||fS |S r7   )ro   rM   rr   )r'   r}   r~   r   zreg_logr*   r*   r+   rH      s   

zAutoencodingEngine.encoder   c                 K   s   | j |fi |}|S r7   )rp   )r'   r   r?   r}   r*   r*   r+   rJ      s   zAutoencodingEngine.decodec                 K   s.   | j |dd\}}| j|fi |}|||fS )NT)r~   )rH   rJ   )r'   r}   additional_decode_kwargsr   r   decr*   r*   r+   forward   s   
zAutoencodingEngine.forward	batch_idxoptimizer_idxc                    s<  |   } fdd| j D }| |fi |\}}}t| jdr<||| j|  d|| dfdd| jjD nt |dkr{| j||fi }	t	|	t
rW|	\}
}n|	}
d|
 i}| j|d	d
d
d
d	d | jd|
  d
d	d	d
d |
S |dkr| j||fi \}}| j|d	d
d
d
d |S td| )Nc                       i | ]}| | qS r*   r*   rk   keyr;   r*   r+   
<dictcomp>   s    
z:AutoencodingEngine.inner_training_step.<locals>.<dictcomp>forward_keystrainr   r   global_step
last_layersplitregularization_logautoencoderc                    r   r*   r*   rk   k
extra_infor*   r+   r          r   ztrain/loss/recFT)prog_barloggeron_stepon_epoch	sync_distrq   )r   r   r   r      )r   r   r   r   zUnknown optimizer )r<   rd   intersectionrx   rq   r   r|   r   rM   r1   tupledetachlog_dictlogmeanr9   )r'   r;   r   r   r}   r   r   xrecr   out_lossaelosslog_dict_aedisclosslog_dict_discr*   )r;   r   r+   inner_training_step   s\   


	



z&AutoencodingEngine.inner_training_stepc                 C   s   |   }t|ts|g}|t| }| j| jk rd}|| }|  |  | j|||d}| 	| W d    n1 s>w   Y  |
  d S )Nr   )r   )
optimizersr1   r!   r    r   ra   	zero_gradtoggle_modelr   manual_backwardstep)r'   r;   r   optsr   optrq   r*   r*   r+   training_step  s   

z AutoencodingEngine.training_stepc                 C   sT   |  ||}|   | j ||dd}|| W d    |S 1 s#w   Y  |S )N_ema)postfix)_validation_steprF   update)r'   r;   r   r   log_dict_emar*   r*   r+   validation_step*  s   

z"AutoencodingEngine.validation_step r   c                    s  |  |}| |\}}}t| jdr-|d| j|  d| || d  fdd| jjD  nt  | j||fi  }t|trD|\}	}
n|}	d| d|		 i}
|
}d v rkd	 d< | j||fi  \}}|
| | jd| d|
d| d d
d | j|d
d |S )Nr   r   valr   c                    r   r*   r*   r   r   r*   r+   r   ?  r   z7AutoencodingEngine._validation_step.<locals>.<dictcomp>z	/loss/recr   r   T)r   )r<   rx   rq   r   r|   r   rM   r1   r   r   r   r   r   )r'   r;   r   r   r}   r   r   r   r   r   r   full_log_dictr   r   r*   r   r+   r   1  s<   
	



z#AutoencodingEngine._validation_stepparameter_namesoptimizer_argsc                 C   s   g }d}t ||D ]H\}}g }|D ]6}g }	t|}
|  D ]\}}t|
|r3|	| || 7 }qt|	dkrBt	d|  |
|	 q|d|i| q	||fS )Nr   z$Did not find parameters for pattern r0   )ziprecompilenamed_parametersmatchappendnumelr    r   rt   extend)r'   r   r   groups
num_paramsnamesr>   r0   pattern_pattern_paramspatternp_nameparamr*   r*   r+   get_param_groupsW  s"   

z#AutoencodingEngine.get_param_groupsc                 C   s   | j d u r
|  }n| | j | j\}}td|d | jd u r'|  }n| | j| j\}}td|d | 	|t
| jd| j | j}|g}t|dkr`| 	|| j| j}|| |S )Nz,Number of trainable autoencoder parameters: ,z.Number of trainable discriminator parameters: rZ   r   )r]   rz   r   r^   r   r   r_   r{   r`   rO   r   r\   learning_rater[   r    r   )r'   	ae_paramsnum_ae_paramsdisc_paramsnum_disc_paramsopt_aer   opt_discr*   r*   r+   rQ   k  s4   






z'AutoencodingEngine.configure_optimizersadditional_log_kwargsc              	      s  t  }i }| }|fdd| jD  | |fi |\}}}||d< ||d< dtt|dd|  }	|	dd d	|	 d |d
< d	t| j	|	 dd d |d< t
| jdri|| j|| |  C | |fi |\}}
}|
|d< dtt|
dd|  }|dd d	| d |d< d	t| j	| dd d |d< W d    n1 sw   Y   r|  | |fi |\}}}dd fdd D  }|||< |S )Nc                    r   r*   r*   r   r   r*   r+   r     r   z1AutoencodingEngine.log_images.<locals>.<dictcomp>inputsreconstructionsg      ?g      rZ   r   g       @diffg        r   
diff_boost
log_imagesreconstructions_emadiff_emadiff_boost_emazreconstructions--c                    s   g | ]}| d  |  qS )=r*   r   )r   r*   r+   rm     s    z1AutoencodingEngine.log_images.<locals>.<listcomp>)rM   r<   r   rd   r   r$   absclampclamp_rb   rx   rq   r   rF   join)r'   r;   r   r?   r   r   r}   rl   r   r   xrec_emar   xrec_addlog_strr*   )r   r;   r+   r     sB   

	
zAutoencodingEngine.log_images)FF)r   )r   r7   )%rR   rS   rT   rU   r   r	   rV   r   r   r2   rM   intr   r$   rW   r<   r!   rz   r{   r|   boolr   rH   rJ   r   r   r   r   r   r   r   optim	OptimizerrQ   no_gradr   rX   r*   r*   r(   r+   rY   f   s    
	



=



<&

rY   c                       sz   e Zd Zdef fddZdef fddZ	ddejd	e	de
ejeejef f fd
dZdejdejfddZ  ZS )AutoencodingEngineLegacy	embed_dimc                    s   | dd | _| d}| dd }| dd }t jdd|dd|dd| tjd	|d
  |d  d	|d
  | d	| _tj||d d	| _|| _	| 
t|| d S )Nmax_batch_sizeddconfigr-   rc   z*sgm.modules.diffusionmodules.model.Encoderr.   z*sgm.modules.diffusionmodules.model.Decoder)re   rf   r   double_z
z_channelsr*   )popr   r   r   r$   nnConv2d
quant_convpost_quant_convr   r4   r   )r'   r   r?   r   r-   rc   r(   r*   r+   r     s,   
	z!AutoencodingEngineLegacy.__init__r5   c                    s   t   }|S r7   )r   rz   ry   r(   r*   r+   rz     s   
z/AutoencodingEngineLegacy.get_autoencoder_paramsFr}   r~   c           
      C   s   | j d u r| |}| |}n:|jd }| j }tt|| }t }t|D ]}| ||| |d |  }| |}|	| q(t
|d}| |\}}	|rW||	fS |S Nr   r   )r   ro   r   shaper   mathceilr!   rs   r   r$   catrr   )
r'   r}   r~   r   Nbs	n_batchesi_batchz_batchr   r*   r*   r+   rH     s    



zAutoencodingEngineLegacy.encoder   c           	      K   s   | j d u r| |}| j|fi |}|S |jd }| j }tt|| }t }t|D ]}| ||| |d |  }| j|fi |}|	| q-t
|d}|S r   )r   r   rp   r   r   r   r   r!   rs   r   r$   r   )	r'   r   decoder_kwargsr   r   r   r  r  	dec_batchr*   r*   r+   rJ     s   


zAutoencodingEngineLegacy.decodeF)rR   rS   rT   r   r   r!   rz   r$   rW   r   r	   r   rM   rH   rJ   rX   r*   r*   r(   r+   r     s    
r   c                          e Zd Z fddZ  ZS )AutoencoderKLc                    s4   d|v r| d|d< t jddddii| d S )N
lossconfigrg   rh   r/   Asgm.modules.autoencoding.regularizers.DiagonalGaussianRegularizerr*   r   r   r   r'   r?   r(   r*   r+   r     s   

zAutoencoderKL.__init__rR   rS   rT   r   rX   r*   r*   r(   r+   r        r  c                       s.   e Zd Z	ddededef fddZ  ZS )AutoencoderLegacyVQFr   n_embedsane_index_shapec                    sH   d|v rt d |d|d< t jddd|||ddi| d S )	Nr	  z8Parameter `lossconfig` is deprecated, use `loss_config`.rg   rh   z>sgm.modules.autoencoding.regularizers.quantize.VectorQuantizer)n_ee_dimr  r.   r*   )r   rt   r   r   r   )r'   r   r  r  r?   r(   r*   r+   r     s   


zAutoencoderLegacyVQ.__init__r  )rR   rS   rT   r   r   r   rX   r*   r*   r(   r+   r    s    r  c                       sR   e Zd Z fddZdedefddZdedefddZdedefd	d
Z  ZS )IdentityFirstStagec                    s   t  j|i | d S r7   )r   r   r=   r(   r*   r+   r   &  s   zIdentityFirstStage.__init__r}   r5   c                 C      |S r7   r*   )r'   r}   r*   r*   r+   r<   )     zIdentityFirstStage.get_inputc                 O   r  r7   r*   r'   r}   r>   r?   r*   r*   r+   rH   ,  r  zIdentityFirstStage.encodec                 O   r  r7   r*   r  r*   r*   r+   rJ   /  r  zIdentityFirstStage.decode)	rR   rS   rT   r   r   r<   rH   rJ   rX   r*   r*   r(   r+   r  %  s
    r  c                       s   e Zd Z			ddejdedeeef ee f de	de
ee	ef  f fdd	Zd
ejfddZ	ddejdedeef d
ejfddZ  ZS )AEIntegerWrapper   r  rr   Nmodelr   regularization_keyencoder_kwargsc                    sR   t    || _t|drt|dsJ dt||| _|| _t|ddi| _d S )NrH   rJ   zNeed AE interfacer~   T)	r   r   r  rx   r   rr   r   r   r  )r'   r  r   r  r  r(   r*   r+   r   4  s   
zAEIntegerWrapper.__init__r5   c                 C   sR   | j rJ | jj d| jj|fi | j\}}t|ts J |d }t|dS )Nz" only supports inference currentlymin_encoding_indiceszb ... -> b (...))	trainingr)   rR   r  rH   r  r1   rM   r
   )r'   r}   rl   r   indsr*   r*   r+   rH   D  s   
zAEIntegerWrapper.encoder   c                 C   sh   t || j}|d ur#t|dksJ d| t|d|d |d d}| j|}t|d}| j|S )Nr   zUnhandeled shape zb (h w) -> b h wr   r   )hwzb h w c -> b c h w)r   r   r    r
   rr   get_codebook_entryr  rJ   )r'   r   r   r!  r*   r*   r+   rJ   M  s   
zAEIntegerWrapper.decode)r  rr   Nr7   )rR   rS   rT   r   Moduler	   r   r   r   r2   r   r   r   r   r$   rW   rH   r   r!   rJ   rX   r*   r*   r(   r+   r  3  s,    
r  c                       r  )AutoencoderKLModeOnlyc                    s:   d|v r| d|d< t jdddddidi| d S )	Nr	  rg   rh   r
  sampleFr.   r*   r  r  r(   r*   r+   r   [  s   

zAutoencoderKLModeOnly.__init__r  r*   r*   r(   r+   r%  Z  r  r%  )-loggingr   r   abcr   
contextlibr   typingr   r   r   r   r   r	   pytorch_lightningplr$   torch.nnr   einopsr
   	packagingr   !modules.autoencoding.regularizersr   modules.emar   utilr   r   r   r   	getLoggerrR   r   LightningModuler   rY   r   r  r  r  r$  r  r%  r*   r*   r*   r+   <module>   s2     
P  QG'