o
    Zi                     @   s   d dl Z d dlZd dlZd dlZddlmZmZ dd Z	G dd dej
ZG dd	 d	ej
ZG d
d dej
Zdd Zdd Zdd ZdddZG dd dZdd ZdS )    N   )#discretized_gaussian_log_likelihood	normal_klc                 C   s   | j ttdt| jdS )z6
    Take the mean over all non-batch dimensions.
    r   dim)meanlistrangelenshape)tensor r   V/data/cameron/vidgen/unified_video_action/./simple_uva/diffusion/gaussian_diffusion.py	mean_flat   s   r   c                   @   s(   e Zd ZdZe Ze Ze ZdS )ModelMeanTypez2
    Which type of output the model predicts.
    N)	__name__
__module____qualname____doc__enumauto
PREVIOUS_XSTART_XEPSILONr   r   r   r   r      s
    r   c                   @   s0   e Zd ZdZe Ze Ze Ze Z	dS )ModelVarTypez
    What is used as the model's output variance.
    The LEARNED_RANGE option has been added to allow the model to predict
    values between FIXED_SMALL and FIXED_LARGE, making its job easier.
    N)
r   r   r   r   r   r   LEARNEDFIXED_SMALLFIXED_LARGELEARNED_RANGEr   r   r   r   r   !   s    r   c                   @   s4   e Zd Ze Ze Ze Ze Zdd Z	dS )LossTypec                 C   s   | t jkp	| t jkS N)r   KLRESCALED_KL)selfr   r   r   is_vb6   s   zLossType.is_vbN)
r   r   r   r   r   MSERESCALED_MSEr!   r"   r$   r   r   r   r   r   .   s    r   c                 C   s@   |t j|t jd }t|| }t j| ||t jd|d |< |S )Ndtype)nponesfloat64intlinspace)
beta_startbeta_endnum_diffusion_timestepsZwarmup_fracbetasZwarmup_timer   r   r   _warmup_beta:   s   
r2   c                C   s   | dkrt j|d |d |t jdd }nK| dkr$t j|||t jd}n<| dkr0t|||d}n0| dkr<t|||d}n$| d	krK|t j|t jd }n| d
kr\dt j|d|t jd }nt| |j|fkshJ |S )z
    This is the deprecated API for creating beta schedules.
    See get_named_beta_schedule() for the new library of schedules.
    quad      ?r'      linearZwarmup10g?Zwarmup50constZjsd      ?r   )r)   r-   r+   r2   r*   NotImplementedErrorr   )Zbeta_scheduler.   r/   r0   r1   r   r   r   get_beta_scheduleC   s6   	


r:   c                 C   sL   | dkrd| }t d|d |d |dS | dkrt|dd S td	|  )
a?  
    Get a pre-defined beta schedule for the given name.
    The beta schedule library consists of beta schedules which remain similar
    in the limit of num_diffusion_timesteps.
    Beta schedules may be added, but should not be removed or changed once
    they are committed to maintain backwards compatibility.
    r6   i  g-C6?g{Gz?)r.   r/   r0   cosinec                 S   s    t | d d t j d d S )NgMb?gT㥛 ?r5   )mathcospi)tr   r   r   <lambda>{   s     z)get_named_beta_schedule.<locals>.<lambda>zunknown beta schedule: )r:   betas_for_alpha_barr9   )Zschedule_namer0   scaler   r   r   get_named_beta_schedulef   s   rC   +?c                 C   sP   g }t | D ]}||  }|d |  }|td||||  | qt|S )a#  
    Create a beta schedule that discretizes the given alpha_t_bar function,
    which defines the cumulative product of (1-beta) over time from t = [0,1].
    :param num_diffusion_timesteps: the number of betas to produce.
    :param alpha_bar: a lambda that takes an argument t from 0 to 1 and
                      produces the cumulative product of (1-beta) up to that
                      part of the diffusion process.
    :param max_beta: the maximum beta to use; use values lower than 1 to
                     prevent singularities.
    r   )r	   appendminr)   array)r0   	alpha_barZmax_betar1   it1t2r   r   r   rA      s   "
rA   c                   @   s.  e Zd ZdZdd Zdd Zd/ddZd	d
 Z	d0ddZdd Z	dd Z
d/ddZd/ddZ					d1ddZ								d2ddZ								d2ddZ					d3dd Z					d3d!d"Z								d4d#d$Z								d4d%d&Z	d5d'd(Zd6d)d*Zd+d, Zd5d-d.ZdS )7GaussianDiffusionac  
    Utilities for training and sampling diffusion models.
    Original ported from this codebase:
    https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/diffusion_utils_2.py#L42
    :param betas: a 1-D numpy array of betas for each diffusion timestep,
                  starting at T and going to 1.
    c                C   s  || _ || _|| _tj|tjd}|| _t|jdksJ d|dk	 r+|dk	 s-J t
|jd | _d| }tj|dd| _td| jd d | _t| jdd  d| _| jj| jfkscJ t| j| _td| j | _td| j | _td| j | _td| j d | _|d| j  d| j  | _t| jdkrtt| jd | jdd  ntg | _|t| j d| j  | _d| j t| d| j  | _d S )	Nr'   r   zbetas must be 1-Dr   r8   )axis        )model_mean_typemodel_var_type	loss_typer)   rG   r+   r1   r
   r   allr,   num_timestepscumprodalphas_cumprodrE   alphas_cumprod_prevalphas_cumprod_nextsqrtsqrt_alphas_cumprodsqrt_one_minus_alphas_cumprodloglog_one_minus_alphas_cumprodsqrt_recip_alphas_cumprodsqrt_recipm1_alphas_cumprodposterior_varianceposterior_log_variance_clippedposterior_mean_coef1posterior_mean_coef2)r#   r1   rP   rQ   rR   alphasr   r   r   __init__   s@   "zGaussianDiffusion.__init__c                 C   sB   t | j||j| }t d| j ||j}t | j||j}|||fS )a  
        Get the distribution q(x_t | x_0).
        :param x_start: the [N x C x ...] tensor of noiseless inputs.
        :param t: the number of diffusion steps (minus 1). Here, 0 means one step.
        :return: A tuple (mean, variance, log_variance), all of x_start's shape.
        r8   )_extract_into_tensorrZ   r   rV   r]   )r#   x_startr?   r   variancelog_variancer   r   r   q_mean_variance   s   

z!GaussianDiffusion.q_mean_varianceNc                 C   sJ   |du r	t |}|j|jksJ t| j||j| t| j||j|  S )ak  
        Diffuse the data for a given number of diffusion steps.
        In other words, sample from q(x_t | x_0).
        :param x_start: the initial data batch.
        :param t: the number of diffusion steps (minus 1). Here, 0 means one step.
        :param noise: if specified, the split-out normal noise.
        :return: A noisy version of x_start.
        N)th
randn_liker   rf   rZ   r[   )r#   rg   r?   noiser   r   r   q_sample   s   	
zGaussianDiffusion.q_samplec                 C   s   |j |j ksJ t| j||j | t| j||j |  }t| j||j }t| j||j }|j d |j d   krH|j d   krH|j d ksKJ  J |||fS )zm
        Compute the mean and variance of the diffusion posterior:
            q(x_{t-1} | x_t, x_0)
        r   )r   rf   rb   rc   r`   ra   )r#   rg   x_tr?   Zposterior_meanr`   ra   r   r   r   q_posterior_mean_variance   s"   

z+GaussianDiffusion.q_posterior_mean_varianceTc              
      s  |du ri }|j dd \}}|j |fksJ |||fi |}	t|	tr*|	\}	}
nd}
| jtjtjfv r{|	j ||d g|j dd R ksHJ tj|	|dd\}	}t	| j
||j }t	t| j||j }|d d }|| d| |  }t|}n<tjt| jd | jdd tt| jd | jdd ftj| j| j
fi| j \}}t	|||j }t	|||j } fdd}| jtjkr||	}n
|| j|||	d}| j|||d\}}}|j |j   kr|j   kr|j ksJ  J |||||
d	S )
a  
        Apply the model to get p(x_{t-1} | x_t), as well as a prediction of
        the initial x, x_0.
        :param model: the model, which takes a signal and a batch of timesteps
                      as input.
        :param x: the [N x C x ...] tensor at time t.
        :param t: a 1-D Tensor of timesteps.
        :param clip_denoised: if True, clip the denoised signal into [-1, 1].
        :param denoised_fn: if not None, a function which applies to the
            x_start prediction before it is used to sample. Applies before
            clip_denoised.
        :param model_kwargs: if not None, a dict of extra keyword arguments to
            pass to the model. This can be used for conditioning.
        :return: a dict with the following keys:
                 - 'mean': the model mean output.
                 - 'variance': the model variance output.
                 - 'log_variance': the log of 'variance'.
                 - 'pred_xstart': the prediction for x_0.
        Nr5   r   r   c                    s$   d ur| }  r|  ddS | S )NrN   r   )clamp)xclip_denoiseddenoised_fnr   r   process_xstart@  s
   z9GaussianDiffusion.p_mean_variance.<locals>.process_xstart)ro   r?   epsrg   ro   r?   )r   rh   ri   pred_xstartextra)r   
isinstancetuplerQ   r   r   r   rk   splitrf   ra   r)   r\   r1   expr   rE   r`   r   rP   r   r   _predict_xstart_from_epsrp   )r#   modelrr   r?   rt   ru   model_kwargsBCmodel_outputrz   model_var_valuesZmin_logZmax_logfracZmodel_log_varianceZmodel_variancerv   ry   Z
model_mean_r   rs   r   p_mean_variance  s^   

&
 
.z!GaussianDiffusion.p_mean_variancec                 C   s8   |j |j ksJ t| j||j | t| j||j |  S r    )r   rf   r^   r_   )r#   ro   r?   rw   r   r   r   r   \  s
   z*GaussianDiffusion._predict_xstart_from_epsc                 C   s(   t | j||j| | t | j||j S r    )rf   r^   r   r_   )r#   ro   r?   ry   r   r   r   _predict_eps_from_xstartc  s
   z*GaussianDiffusion._predict_eps_from_xstartc                 C   s2   |||fi |}|d   |d |    }|S )aZ  
        Compute the mean for the previous step, given a function cond_fn that
        computes the gradient of a conditional log probability with respect to
        x. In particular, cond_fn computes grad(log(p(y|x))), and we want to
        condition on y.
        This uses the conditioning strategy from Sohl-Dickstein et al. (2015).
        r   rh   )float)r#   cond_fn
p_mean_varrr   r?   r   gradientZnew_meanr   r   r   condition_meani  s   z GaussianDiffusion.condition_meanc           
      C   s   t | j||j}| |||d }|d|  |||fi |  }| }| ||||d< | j|d ||d\|d< }	}	|S )a1  
        Compute what the p_mean_variance output would have been, should the
        model's score function be conditioned by cond_fn.
        See condition_mean() for details on cond_fn.
        Unlike condition_mean(), this instead uses the conditioning strategy
        from Song et al (2020).
        ry   r   rx   r   )rf   rV   r   r   rY   copyr   rp   )
r#   r   r   rr   r?   r   rH   rw   outr   r   r   r   condition_scorew  s   "
z!GaussianDiffusion.condition_scorer8   c	                 C   s   | j ||||||d}	t|}
|dk jdgdgt|jd  R  }|dur5| j||	|||d|	d< |	d |td|	d	   |
 |  }||	d
 dS )a  
        Sample x_{t-1} from the model at the given timestep.
        :param model: the model to sample from.
        :param x: the current tensor at x_{t-1}.
        :param t: the value of t, starting at 0 for the first diffusion step.
        :param clip_denoised: if True, clip the x_start prediction to [-1, 1].
        :param denoised_fn: if not None, a function which applies to the
            x_start prediction before it is used to sample.
        :param cond_fn: if not None, this is a gradient function that acts
                        similarly to the model.
        :param model_kwargs: if not None, a dict of extra keyword arguments to
            pass to the model. This can be used for conditioning.
        :param temperature: temperature scaling during Diff Loss sampling.
        :return: a dict containing the following keys:
                 - 'sample': a random sample from the model.
                 - 'pred_xstart': a prediction of x_0.
        rt   ru   r   r   rN   r   Nr   r   r4   ri   ry   samplery   )	r   rk   rl   r   viewr
   r   r   r~   )r#   r   rr   r?   rt   ru   r   r   temperaturer   rm   nonzero_maskr   r   r   r   p_sample  s(   
(

zGaussianDiffusion.p_sampleFc                 C   4   d}| j |||||||||	|
d
D ]}|}q|d S )aO  
        Generate samples from the model.
        :param model: the model module.
        :param shape: the shape of the samples, (N, C, H, W).
        :param noise: if specified, the noise from the encoder to sample.
                      Should be of the same shape as `shape`.
        :param clip_denoised: if True, clip x_start predictions to [-1, 1].
        :param denoised_fn: if not None, a function which applies to the
            x_start prediction before it is used to sample.
        :param cond_fn: if not None, this is a gradient function that acts
                        similarly to the model.
        :param model_kwargs: if not None, a dict of extra keyword arguments to
            pass to the model. This can be used for conditioning.
        :param device: if specified, the device to create the samples on.
                       If not specified, use a model parameter's device.
        :param progress: if True, show a tqdm progress bar.
        :param temperature: temperature scaling during Diff Loss sampling.
        :return: a non-differentiable batch of samples.
        N)rm   rt   ru   r   r   deviceprogressr   r   )p_sample_loop_progressive)r#   r   r   rm   rt   ru   r   r   r   r   r   finalr   r   r   r   p_sample_loop  s    
zGaussianDiffusion.p_sample_loopc                 c       t |ttfs
J |dur|}ntj|  }tt| jddd }|	r0ddlm	} ||}|D ]6}t
|g|d   }t  | j||||||||
d}|V  |d }W d   n1 scw   Y  q2dS )a  
        Generate samples from the model and yield intermediate samples from
        each timestep of diffusion.
        Arguments are the same as p_sample_loop().
        Returns a generator over dicts, where each dict is the return value of
        p_sample().
        NrN   r   tqdm)rt   ru   r   r   r   r   )r{   r|   r   rk   randncudar	   rT   	tqdm.autor   r   no_gradr   )r#   r   r   rm   rt   ru   r   r   r   r   r   imgindicesr   rI   r?   r   r   r   r   r     s6   


z+GaussianDiffusion.p_sample_loop_progressiverO   c	                 C   s  | j ||||||d}	|dur| j||	|||d}	| |||	d }
t| j||j}t| j||j}|td| d|   td||   }t	|}|	d t| td| |d  |
  }|dk
 jdgdgt|jd  R  }||| |  }||	d d	S )
z]
        Sample x_{t-1} from the model using DDIM.
        Same usage as p_sample().
        r   Nr   ry   r   r5   r   rN   r   )r   r   r   rf   rV   r   rW   rk   rY   rl   r   r   r
   )r#   r   rr   r?   rt   ru   r   r   etar   rw   rH   Zalpha_bar_prevsigmarm   	mean_predr   r   r   r   r   ddim_sample  s8   
(zGaussianDiffusion.ddim_samplec	                 C   s   |dksJ d| j ||||||d}	|dur!| j||	|||d}	t| j||j| |	d  t| j||j }
t| j||j}|	d t| td| |
  }||	d dS )	zG
        Sample x_{t+1} from the model using DDIM reverse ODE.
        rO   z'Reverse ODE only for deterministic pathr   Nr   ry   r   r   )	r   r   rf   r^   r   r_   rX   rk   rY   )r#   r   rr   r?   rt   ru   r   r   r   r   rw   Zalpha_bar_nextr   r   r   r   ddim_reverse_sampleP  s,   z%GaussianDiffusion.ddim_reverse_samplec                 C   r   )zd
        Generate samples from the model using DDIM.
        Same usage as p_sample_loop().
        N)rm   rt   ru   r   r   r   r   r   r   )ddim_sample_loop_progressive)r#   r   r   rm   rt   ru   r   r   r   r   r   r   r   r   r   r   ddim_sample_loopy  s   
z"GaussianDiffusion.ddim_sample_loopc                 c   r   )z
        Use DDIM to sample from the model and yield intermediate samples from
        each timestep of DDIM.
        Same usage as p_sample_loop_progressive().
        NrN   r   r   )rt   ru   r   r   r   r   )r{   r|   r   rk   r   r   r	   rT   r   r   r   r   r   )r#   r   r   rm   rt   ru   r   r   r   r   r   r   r   r   rI   r?   r   r   r   r   r     s6   


z.GaussianDiffusion.ddim_sample_loop_progressivec                 C   s   | j |||d\}}}	| j|||||d}
t||	|
d |
d }t|td }t||
d d|
d  d }|j|jks?J t|td }t	|dk||}||
d	 d
S )ag  
        Get a term for the variational lower-bound.
        The resulting units are bits (rather than nats, as one might expect).
        This allows for comparison to other papers.
        :return: a dict with the following keys:
                 - 'output': a shape [N] tensor of NLLs or KLs.
                 - 'pred_xstart': the x_0 predictions.
        rx   )rt   r   r   ri          @r4   )ZmeansZ
log_scalesr   ry   )outputry   )
rp   r   r   r   r)   r\   r   r   rk   where)r#   r   rg   ro   r?   rt   r   Z	true_meanr   Ztrue_log_variance_clippedr   klZdecoder_nllr   r   r   r   _vb_terms_bpd  s"   
zGaussianDiffusion._vb_terms_bpdc                 C   s  |du ri }|du rt |}| j|||d}i }| jtjks%| jtjkrE| j||||d|dd |d< | jtjkrC|d  | j9  < |S | jtj	ksR| jtj
kr|||fi |}| jtjtjfv r|jdd \}	}
|j|	|
d g|jdd R ksJ t j||
dd	\}}t j| |gdd	}| j|d
dd|||ddd |d< | jtj
kr|d  | jd 9  < tj| j|||dd tj|tj|i| j }|j|j  kr|jksJ  J t|| d |d< d|v r|d |d  |d< |S |d |d< |S t| j)a[  
        Compute training losses for a single timestep.
        :param model: the model to evaluate loss on.
        :param x_start: the [N x C x ...] tensor of inputs.
        :param t: a batch of timestep indices.
        :param model_kwargs: if not None, a dict of extra keyword arguments to
            pass to the model. This can be used for conditioning.
        :param noise: if specified, the specific Gaussian noise to try to remove.
        :return: a dict with the key "loss" containing a tensor of shape [N].
                 Some mean or variance settings may also have other keys.
        N)rm   F)r   rg   ro   r?   rt   r   r   lossr5   r   r   )rc                 W   s   | S r    r   )r   argsr   r   r   r@     s    z3GaussianDiffusion.training_losses.<locals>.<lambda>)r   rg   ro   r?   rt   vbg     @@rx   r   mse)rk   rl   rn   rR   r   r!   r"   r   rT   r%   r&   rQ   r   r   r   r   r}   catdetachr   r   rp   r   r   rP   r   r9   )r#   r   rg   r?   r   rm   ro   termsr   r   r   r   Z
frozen_outtargetr   r   r   training_losses  sv   
*&"
z!GaussianDiffusion.training_lossesc                 C   sZ   |j d }tj| jd g| |jd}| ||\}}}t||ddd}t|t	d S )a;  
        Get the prior KL term for the variational lower-bound, measured in
        bits-per-dim.
        This term can't be optimized, as it only depends on the encoder.
        :param x_start: the [N x C x ...] tensor of inputs.
        :return: a batch of [N] KL values (in bits), one per batch element.
        r   r   r   rO   )Zmean1Zlogvar1Zmean2Zlogvar2r   )
r   rk   r   rT   r   rj   r   r   r)   r\   )r#   rg   
batch_sizer?   Zqt_meanr   Zqt_log_varianceZkl_priorr   r   r   
_prior_bpd4  s   
zGaussianDiffusion._prior_bpdc              
   C   sJ  |j }|jd }g }g }g }	tt| jddd D ]`}
tj|
g| |d}t|}| j|||d}t	  | j
||||||d}W d   n1 sMw   Y  ||d  |t|d | d	  | |||d }|	t|| d	  qtj|d
d}tj|d
d}tj|	d
d}	| |}|jd
d| }|||||	dS )as  
        Compute the entire variational lower-bound, measured in bits-per-dim,
        as well as other related quantities.
        :param model: the model to evaluate loss on.
        :param x_start: the [N x C x ...] tensor of inputs.
        :param clip_denoised: if True, clip denoised samples.
        :param model_kwargs: if not None, a dict of extra keyword arguments to
            pass to the model. This can be used for conditioning.
        :return: a dict containing the following keys:
                 - total_bpd: the total variational lower-bound, per batch element.
                 - prior_bpd: the prior term in the lower-bound.
                 - vb: an [N x T] tensor of terms in the lower-bound.
                 - xstart_mse: an [N x T] tensor of x_0 MSEs for each timestep.
                 - mse: an [N x T] tensor of epsilon MSEs for each timestep.
        r   NrN   r   )rg   r?   rm   )rg   ro   r?   rt   r   r   ry   r5   r   r   )	total_bpd	prior_bpdr   
xstart_mser   )r   r   r   r	   rT   rk   r   rl   rn   r   r   rE   r   r   stackr   sum)r#   r   rg   rt   r   r   r   r   r   r   r?   Zt_batchrm   ro   r   rw   r   r   r   r   r   calc_bpd_loopD  sD   


	
zGaussianDiffusion.calc_bpd_loopr    )TNN)TNNNr8   )NTNNNNFr8   )TNNNrO   )NTNNNNFrO   )TN)NN)r   r   r   r   re   rj   rn   rp   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rL      s    /

X


7
4
6
6
-
%
0

!JrL   c                 C   s\   t | j|jd|  }t|jt|k r$|d }t|jt|k s|t j||jd S )a  
    Extract values from a 1-D numpy array for a batch of indices.
    :param arr: the 1-D numpy array.
    :param timesteps: a tensor of indices into the array to extract.
    :param broadcast_shape: a larger shape of K dimensions with the batch
                            dimension equal to the length of timesteps.
    :return: a tensor of shape [batch_size, 1, ...] where the shape has K dims.
    r   ).N)rk   
from_numpytor   r   r
   r   zeros)arrZ	timestepsbroadcast_shaperesr   r   r   rf   |  s
   	rf   )rD   )r<   numpyr)   torchrk   r   Zdiffusion_utilsr   r   r   Enumr   r   r   r2   r:   rC   rA   rL   rf   r   r   r   r   <module>   s(   
	#
     m