o
    Ó#¯i})  ã                   @   sÊ   d dl mZmZ d dlZd dlmZ d dlm  mZ d dl	m
Z
 ddlmZmZmZ ddlmZ ddlmZmZ dd	„ Zd
d„ Zddd„Zdd„ Zdd„ Zdd„ ZG dd„ deƒZG dd„ dejƒZdS )é    )ÚAnyÚUnionN©Ú	rearrangeé   )ÚNLayerDiscriminatorÚNLayerDiscriminator3DÚweights_init)ÚLPIPS)ÚdefaultÚprint0c                 C   s8   t  t d|  ¡¡}t  t d| ¡¡}d||  }|S )Nç      ð?ç      à?)ÚtorchÚmeanÚFÚrelu)Úlogits_realÚlogits_fakeZ	loss_realZ	loss_fakeÚd_loss© r   ú4/data/cameron/vidgen/VidTok/vidtok/modules/losses.pyÚhinge_d_loss   s   r   c                 C   s*   dt  t |  ¡¡t  t |¡¡  }|S )Nr   )r   r   r   Úsoftplus)r   r   r   r   r   r   Úvanilla_d_loss   s   &r   ç        c                 C   s   ||k r|} | S ©Nr   )ÚweightÚglobal_stepÚ	thresholdÚvaluer   r   r   Úadopt_weight   s   r!   c                 C   sR   t j||jd}||k}t  |||¡}t  || |¡}|||   t  t  |¡¡ S )z
    non-saturating loss
    )Údtype)r   Ú
zeros_liker"   ÚwhereÚlog1pÚexp)ÚlabelsÚlogitsÚzerosÚ	conditionZrelu_logitsZneg_abs_logitsr   r   r   Ú"_sigmoid_cross_entropy_with_logits    s
   r+   c                 C   s@   | j d }|  |d¡} tj| dd} t tt | ¡| d¡}|S )z 
    logits_fake: [B 1 H W]
    r   éÿÿÿÿ)Údim)r'   r(   )ÚshapeÚreshaper   r   r+   Ú	ones_like)r   ÚBZgen_lossr   r   r   Únon_saturate_gen_loss+   s
   
r2   c                 C   s<   t  t | |j ¡ d¡¡t  t |j| ¡ d¡¡ }|S )Né   )r   r   r   r   Úlogits_fake_emaÚpowÚlogits_real_ema)Z	real_predZ	fake_predÚ	lecam_emaÚregr   r   r   Ú	lecam_reg6   s   ÿr9   c                   @   s   e Zd Zddd„Zdd„ ZdS )	Ú	LeCAM_EMAr   ç+‡ÙÎ÷ï?c                 C   s   || _ || _|| _d S r   )r6   r4   Údecay)ÚselfÚinitr<   r   r   r   Ú__init__?   s   
zLeCAM_EMA.__init__c                 C   sP   | j | j t |¡ ¡ d| j   | _ | j| j t |¡ ¡ d| j   | _d S )Nr   )r6   r<   r   r   Úitemr4   )r=   r   r   r   r   r   ÚupdateD   s   &*zLeCAM_EMA.updateN)r   r;   )Ú__name__Ú
__module__Ú__qualname__r?   rA   r   r   r   r   r:   =   s    
r:   c                       s¸   e Zd Z															d$d	ed
edededededededededededededef def‡ fdd„Z	de
fdd„Zde
fdd„Zd%dd „Z		!	d&d"d#„Z‡  ZS )'ÚGeneralLPIPSWithDiscriminatorr   r   é   Ú3dÚhingeFr3   NÚ
disc_startÚlogvar_initÚdisc_num_layersÚdisc_in_channelsÚdisc_factorÚdisc_weightÚ	disc_typeÚperceptual_weightÚlecam_loss_weightÚ	disc_lossÚscale_input_to_tgt_sizeÚdimsÚlearn_logvarÚregularization_weightsÚgen_loss_cross_entropyc                    s  t ƒ  ¡  || _| jdkrtd|› dƒ || _|dv sJ ‚|| _tƒ  ¡ | _|	| _	t
 tjdd| ¡| _|| _|| _| jdv sCJ ‚| jdkrTt||d	d
 t¡| _nt||d	d
 t¡| _|| _|dkrhtnt| _|| _|| _t|i ƒ| _|| _|
| _| jdkrˆt ƒ | _!d S d S )Nr3   za[bold cyan]\[vidtok.modules.losses][GeneralLPIPSWithDiscriminator][/bold cyan] running with dims=zo. This means that for perceptual loss calculation, the LPIPS loss will be applied to each frame independently. )rH   Zvanillar   )Úsize)Ú2drG   rY   F)Zinput_ncÚn_layersZuse_actnormrH   r   )"Úsuperr?   rT   r   rS   Zpixel_weightr
   ÚevalÚperceptual_lossrP   ÚnnÚ	Parameterr   ÚonesÚlogvarrU   rO   r   Úapplyr	   Údiscriminatorr   Údiscriminator_iter_startr   r   rR   rM   Údiscriminator_weightr   rV   rW   rQ   r:   r7   )r=   rI   rJ   Zpixelloss_weightrK   rL   rM   rN   rO   rP   rQ   rR   rS   rT   rU   rV   rW   ©Ú	__class__r   r   r?   J   sH   


ÿ
ÿþÿþ
ÿz&GeneralLPIPSWithDiscriminator.__init__Úreturnc                 C   s
   | j  ¡ S r   )rc   Ú
parameters©r=   r   r   r   Úget_trainable_parameters   s   
z6GeneralLPIPSWithDiscriminator.get_trainable_parametersc                 c   s    | j r| jV  dE d H  d S )Nr   )rU   ra   rj   r   r   r   Ú$get_trainable_autoencoder_parameters„   s   €zBGeneralLPIPSWithDiscriminator.get_trainable_autoencoder_parametersc                 C   s¦   |d urt jj||ddd }t jj||ddd }nt jj|| jd ddd }t jj|| jd ddd }t  |¡t  |¡d  }t  |dd¡ ¡ }|| j }|S )NT)Úretain_graphr   g-Cëâ6?r   g     ˆÃ@)r   ÚautogradÚgradÚ
last_layerÚnormÚclampÚdetachre   )r=   Únll_lossÚg_lossrp   Z	nll_gradsZg_gradsÚd_weightr   r   r   Úcalculate_adaptive_weight‰   s   
z7GeneralLPIPSWithDiscriminator.calculate_adaptive_weightÚtrainc	                 C   sò  | j rtjjj||jdd … ddd}|dkrT|jd }	|jd }
| jdkr2tdd„ ||fƒ\}}t | 	¡ | 	¡  ¡}| j
dkrT|  | 	¡ | 	¡ ¡}|| j
|  }nt dg¡}|t | j¡ | j }|}|d uro|| }t |¡|jd  }t |¡|jd  }| jd	kr‘t|d
|
d 	¡ }|  |¡}| js t |¡ }nt|ƒ}| jdkrÇz
| j|||d}W n tyÆ   | jr¿J ‚t d¡}Y nw t d¡}t| j|| jd}||| |  }tƒ }|D ]"}|| jv r|| j| ||   }||  ¡  ¡ ||› d|› < qâ|  d !|¡| "¡  ¡  ¡ d !|¡| j ¡ d !|¡| ¡  ¡ d !|¡| ¡  ¡ d !|¡| ¡  ¡ d !|¡| ¡ d !|¡t |¡d !|¡| ¡  ¡ i¡ ||fS |dkr÷| jdkrp| jdkrptdd„ ||fƒ\}}|  | 	¡  ¡ ¡}|  | 	¡  ¡ ¡}t| j|| jd}|  #||¡}| j$dkr¯| j%  ||¡ t&||| j%ƒ}||| j$ |  }n|| }d !|¡| "¡  ¡  ¡ d !|¡| ¡  ¡ d !|¡| ¡  ¡ d !|¡t |¡d !|¡| ¡ i}| j$dkró|  d !|¡| ¡ i¡ ||fS d S )Nr3   ÚbicubicT)ÚmodeÚ	antialiasr   c                 S   ó
   t | dƒS ©Nzb c t h w -> (b t) c h wr   ©Úxr   r   r   Ú<lambda>©   ó   
 z7GeneralLPIPSWithDiscriminator.forward.<locals>.<lambda>r   rG   z(b t) c h w -> b c t h w)Út)rp   )r   ú/z{}/total_lossz	{}/logvarz{}/nll_lossz{}/rec_lossz	{}/p_lossz{}/d_weightz{}/disc_factorz	{}/g_lossr   rY   c                 S   r|   r}   r   r~   r   r   r   r€   é   r   z{}/disc_lossz{}/logits_realz{}/logits_fakez{}/non_saturated_d_lossz{}/lecam_loss)'rS   r   r^   Ú
functionalÚinterpolater.   rT   ÚmapÚabsÚ
contiguousrP   r]   ÚTensorr&   ra   ÚsumrO   r   rc   rW   r   r2   rM   rw   ÚRuntimeErrorÚtrainingÚtensorr!   rd   ÚdictrV   rs   rA   ÚformatÚclonerR   rQ   r7   r9   )r=   Úregularization_logÚinputsZreconstructionsÚoptimizer_idxr   rp   ÚsplitÚweightsÚbsr‚   Zrec_lossZp_lossrt   Zweighted_nll_lossr   ru   rv   rM   ÚlossÚlogÚkr   Znon_saturate_d_lossZ
lecam_lossr   r   r   r   Úforward–   s     



þ




þ
€øÿ
þûàz%GeneralLPIPSWithDiscriminator.forward)r   r   rF   rF   r   r   rG   r   r   rH   Fr3   FNFr   )Nrx   N)rB   rC   rD   ÚintÚfloatÚstrÚboolr   rŽ   r?   r   rk   rl   rw   rš   Ú__classcell__r   r   rf   r   rE   I   sl    ïþýûúùø	÷
öõôóòñ
ðï7
÷rE   )r   r   )Útypingr   r   r   Útorch.nnr^   Ztorch.nn.functionalr„   r   Úeinopsr   rc   r   r   r	   Úlpipsr
   Úutilr   r   r   r   r!   r+   r2   r9   Úobjectr:   ÚModulerE   r   r   r   r   Ú<module>   s     
