o
    9i0                     @   s   d dl Z d dlmZ d dlmZmZmZmZmZm	Z	 d dl
Zd dlZd dlmZmZ d dlmZ d dlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZm Z m!Z!m"Z" G dd dej#Z$dS )    N)contextmanager)AnyDictListOptionalTupleUnion)
ListConfig	OmegaConf)	load_file)LambdaLR   )UNCONDITIONAL_CONFIG)VideoDecoder)OPENAIUNETWRAPPER)LitEma)defaultdisabled_trainget_obj_from_strinstantiate_from_configlog_txt_as_imgc                        s  e Zd Z																dEdedeeef dedeeef dedeeef d	edeeef d
edeeef dedef dedef dede	de	dedee
df dededee f fddZdeddfddZdd Zdd Ze dd  Ze d!d" Zd#d$ Zd%edefd&d'Zd(d) Zd*d+ Zd,d- ZedFd.d/Zd0d1 Zd2d3 Ze 		4	dGd5ed6eedf d7ed8edee
f fd9d:Z e d%ed;edefd<d=Z!e 	>	?	dHd%ed@edAedBe
e def
dCdDZ"  Z#S )IDiffusionEngineNFH.?      ?jpgconditioner_configsampler_configoptimizer_configscheduler_configloss_fn_confignetwork_wrapper	ckpt_pathuse_emaema_decay_ratescale_factor	input_keylog_keysno_cond_logcompile_modelen_and_decode_n_samples_a_timec                    s  t    || _|| _t|ddi| _t|}tt|	t||d| _	t|| _
|d ur0t|nd | _tt|t| _|| _| | |d urKt|nd | _|| _| jrkt| j	|d| _tdtt| j  d || _|| _|| _|
d ur}| |
 || _d S )Ntargetztorch.optim.AdamW)r(   )decayzKeeping EMAs of .)super__init__r&   r%   r   r   r   r   r   modeldenoisersamplerr   conditionerr   _init_first_stageloss_fnr"   r   	model_emaprintlenlistbuffersr$   disable_first_stage_autocastr'   init_from_ckptr)   )selfnetwork_configdenoiser_configfirst_stage_configr   r   r   r   r   r    r!   r"   r#   r$   r:   r%   r&   r'   r(   r)   r/   	__class__ >/data/cameron/vidgen/generative-models/sgm/models/diffusion.pyr.      sD   




zDiffusionEngine.__init__pathreturnc                 C   s   | drtj|ddd }n| drt|}nt| j|dd\}}td| d	t| d
t| d t|dkrCtd|  t|dkrRtd|  d S d S )Nckptcpu)map_location
state_dictsafetensorsF)strictzRestored from z with z missing and z unexpected keysr   zMissing Keys: zUnexpected Keys: )endswithtorchloadload_safetensorsNotImplementedErrorload_state_dictr6   r7   )r<   rD   sdmissing
unexpectedrB   rB   rC   r;   U   s   


zDiffusionEngine.init_from_ckptc                 C   s0   t | }t|_| D ]}d|_q|| _d S )NF)r   evalr   train
parametersrequires_gradfirst_stage_model)r<   configr/   paramrB   rB   rC   r3   i   s
   
z!DiffusionEngine._init_first_stagec                 C   s
   || j  S Nr%   )r<   batchrB   rB   rC   	get_inputp   s   
zDiffusionEngine.get_inputc                 C   s   d| j  | }t| j|jd }t|jd | }g }tjd| j dB t	|D ]5}t
| jjtrDdt||| |d |  i}ni }| jj||| |d |  fi |}|| q*W d    n1 sjw   Y  tj|dd}|S )Nr   r   cudaenabled	timesteps   dim)r$   r   r)   shapemathceilrM   autocastr:   range
isinstancerY   decoderr   r7   decodeappendcat)r<   z	n_samplesn_roundsall_outnkwargsoutrB   rB   rC   decode_first_stageu   s&   "
z"DiffusionEngine.decode_first_stagec                 C   s   t | j|jd }t|jd | }g }tjd| j d$ t|D ]}| j	
||| |d |  }|| q#W d    n1 sEw   Y  tj|dd}| j| }|S )Nr   r`   ra   rd   re   )r   r)   rg   rh   ri   rM   rj   r:   rk   rY   encodero   rp   r$   )r<   xrr   rs   rt   ru   rw   rq   rB   rB   rC   encode_first_stage   s   
z"DiffusionEngine.encode_first_stagec                 C   s0   |  | j| j| j||}| }d|i}||fS )Nloss)r4   r/   r0   r2   mean)r<   rz   r^   r|   	loss_mean	loss_dictrB   rB   rC   forward   s   zDiffusionEngine.forwardr^   c                 C   s4   |  |}| |}| j|d< | ||\}}||fS )Nglobal_step)r_   r{   r   )r<   r^   rz   r|   r   rB   rB   rC   shared_step   s
   


zDiffusionEngine.shared_stepc                 C   sp   |  |\}}| j|ddddd | jd| jddddd | jd ur6|  jd d }| jd|ddddd |S )NTF)prog_barloggeron_stepon_epochr   r   lrlr_abs)r   log_dictlogr   r   
optimizersparam_groups)r<   r^   	batch_idxr|   r   r   rB   rB   rC   training_step   s$   

	zDiffusionEngine.training_stepc                 O   s    | j d u s
| jd u rtdd S )Nz6Sampler and loss function need to be set for training.)r1   r4   
ValueErrorr<   argsrv   rB   rB   rC   on_train_start   s   zDiffusionEngine.on_train_startc                 O   s   | j r| | j d S d S r\   )r"   r5   r/   r   rB   rB   rC   on_train_batch_end   s   z"DiffusionEngine.on_train_batch_endc              
   c   s    | j r| j| j  | j| j |d urt| d z!d V  W | j r=| j| j  |d ur?t| d d S d S d S | j rY| j| j  |d urZt| d w w w )Nz: Switched to EMA weightsz: Restored training weights)r"   r5   storer/   rW   copy_tor6   restore)r<   contextrB   rB   rC   	ema_scope   s(   zDiffusionEngine.ema_scopec                 C   s&   t |d |fd|i|dt S )Nr*   r   params)r   getdict)r<   r   r   cfgrB   rB   rC   !instantiate_optimizer_from_config   s   
z1DiffusionEngine.instantiate_optimizer_from_configc                 C   s   | j }t| j }| jjD ]}|jr|t|  }q| ||| j}| j	d urCt
| j	}td t||jddddg}|g|fS |S )Nz Setting up LambdaLR scheduler...)	lr_lambdasteprd   )	schedulerinterval	frequency)learning_rater8   r/   rW   r2   	embeddersis_trainabler   r   r   r   r6   r   schedule)r<   r   r   embedderoptr   rB   rB   rC   configure_optimizers   s"   


z$DiffusionEngine.configure_optimizers   conduc
batch_sizerg   c           	         s>   t j|g|R  j} fdd}j||||d}|S )Nc                    s   j j| ||fi  S r\   )r0   r/   )inputsigmacrv   r<   rB   rC   <lambda>   s
    
z(DiffusionEngine.sample.<locals>.<lambda>)r   )rM   randntodevicer1   )	r<   r   r   r   rg   rv   r   r0   samplesrB   r   rC   sample   s   	zDiffusionEngine.sampleru   c                    s:  || j  jdd \}}t }| jjD ]}| jdu s |j | jv r| js||j  d|  t tj	ru 
 dkrR fddt jd D  t||f |d d}nC 
 dkrr fd	dt jd D  t||f |d
 d}n#t t ttfrt d trt||f |d
 d}nt t |||j < q|S )z
        Defines heuristics to log different conditionings.
        These can be lists of strings (text-to-image), tensors, ints, ...
        r   Nrd   c                    s   g | ]
}t  |  qS rB   )stritem.0irz   rB   rC   
<listcomp>  s    z5DiffusionEngine.log_conditionings.<locals>.<listcomp>r      )sizec                    s(   g | ]}d  dd  |  D qS )rz   c                 S   s   g | ]}t |qS rB   )r   )r   xxrB   rB   rC   r     s    z@DiffusionEngine.log_conditionings.<locals>.<listcomp>.<listcomp>)jointolistr   r   rB   rC   r     s       )r%   rg   r   r2   r   r&   r'   rl   rM   Tensorrf   rk   r   rP   r   r	   r   )r<   r^   ru   image_himage_wr   r   xcrB   r   rC   log_conditionings   s2   

z!DiffusionEngine.log_conditionings   TNr   ucg_keysc                    s  dd j jD |r!ttfdd|s J d| d n}t }|}j j|tj jdkr9|ng d\}}	i }
t|j	d   |
jd   }||d	< |}||d
< ||  |D ]t| tjrt fdd||	f\|< |	< qn|rd j|f|j	dd  |	 d|
}W d    n1 sw   Y  |}||d< |S )Nc                 S   s   g | ]}|j qS rB   r]   )r   erB   rB   rC   r   .  s    z.DiffusionEngine.log_images.<locals>.<listcomp>c                    s   |  v S r\   rB   r   )conditioner_input_keysrB   rC   r   0  s    z,DiffusionEngine.log_images.<locals>.<lambda>z]Each defined ucg key for sampling must be in the provided conditioner input keys,but we have z vs. r   )force_uc_zero_embeddingsinputsreconstructionsc                    s   |  d    jS r\   )r   r   )y)r   kr<   rB   rC   r   L  s    Plottingrd   )rg   r   r   r   )r2   r   allmapr   r_   get_unconditional_conditioningr7   minrg   r   r   r{   rx   updater   rl   rM   r   r   r   )r<   r^   r   r   r   rv   r   rz   r   r   sampling_kwargsrq   r   rB   )r   r   r   r<   rC   
log_images%  sT   	


&
zDiffusionEngine.log_images)NNNNNNNFr   r   Fr   NFFNr\   )Nr   N)r   TN)$__name__
__module____qualname__r   r   r	   r
   r   boolfloatr   r   intr.   r;   r3   r_   rM   no_gradrx   r{   r   r   r   r   r   r   r   r   r   r   r   r   r   r   __classcell__rB   rB   r@   rC   r      s    	



A



&r   )%rh   
contextlibr   typingr   r   r   r   r   r   pytorch_lightningplrM   	omegaconfr	   r
   safetensors.torchr   rO   torch.optim.lr_schedulerr   modulesr    modules.autoencoding.temporal_aer   !modules.diffusionmodules.wrappersr   modules.emar   utilr   r   r   r   r   LightningModuler   rB   rB   rB   rC   <module>   s     