
    ܱi                        d dl mZmZmZmZmZ d dlZd dlmZm	Z	 d dl
Z
d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZ ddej        fdZ G d de          Zd Z G d de          Z G d de          ZdS )    )CallableDictListOptionalUnionN)	rearrangerepeat)TextToVideoSDPipeline)StableVideoDiffusionPipeline)TextToVideoSDPipelineOutput)"StableVideoDiffusionPipelineOutput)LoraLoaderMixinTextualInversionLoaderMixin)randn_tensor)_resize_with_antialiasingnpvideoc                     | j         \  }}}}}g }t          |          D ]K}	| |	                             dddd          }
|                    |
|          }|                    |           L|S )N   r         )shaperangepermutepostprocessappend)r   	processoroutput_type
batch_sizechannels
num_framesheightwidthoutputs	batch_idx	batch_vidbatch_outputs               =/data/cameron/vidgen/Ctrl-World/models/pipeline_ctrl_world.pysvd_tensor2vidr)      s     7<k3J*feG:&& % %	)$,,Q1a88	 ,,YDD|$$$$N    c                       e Zd Z ej                    	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd	ed
edededef
d            ZdS )LatentToVideoPipelineN   2         "@        r   Tr   r!   num_inference_stepsetareturn_dictcallback_stepsc                    |p| j         j        j        | j        z  }|p| j         j        j        | j        z  }d}|                     |||||||           |t          |t                    rd}n4|%t          |t                    rt          |          }n|j	        d         }|
j
        }|dk    }||                    dd          nd}|                     ||||||||          }| j                            ||           || j        j        }nt          |          }|                     |	|          }t          |          || j        j        z  z
  }|}|rt%          j        ||g          n|}|                     |          5 }t+          |          D ]\  }} |rt%          j        |
gd	z            n|
}!| j                            |!|           }!|t%          j        ||          }|                      |!| |||||
          j        }"|r#|"                    d	          \  }#}$|#||$|#z
  z  z   }"|
j	        \  }%}&}'}}|
                    dd	ddd                              |%|'z  |&||          }
|"                    dd	ddd                              |%|'z  |&||          }" | j        j        |"| |
fi |j        }
|
dddf                             |%|'|&||                              dd	ddd          }
|t          |          dz
  k    s|dz   |k    rB|dz   | j        j        z  dk    r,|                                 |||z  dk    r ||| |
           	 ddd           n# 1 swxY w Y   |                     |
          }(|dk    r|(})ntA          |(          })tC          | d          r | j"        | j"        #                                 |s|)|
fS tI          |)          S )u  
        Function invoked when calling the pipeline for generation.

        Args:
            prompt (`str` or `List[str]`, *optional*):
                The prompt or prompts to guide the video generation. If not defined, one has to pass `prompt_embeds`.
                instead.
            height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
                The height in pixels of the generated video.
            width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
                The width in pixels of the generated video.
            num_frames (`int`, *optional*, defaults to 16):
                The number of video frames that are generated. Defaults to 16 frames which at 8 frames per seconds
                amounts to 2 seconds of video.
            num_inference_steps (`int`, *optional*, defaults to 50):
                The number of denoising steps. More denoising steps usually lead to a higher quality videos at the
                expense of slower inference.
            guidance_scale (`float`, *optional*, defaults to 7.5):
                Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
                `guidance_scale` is defined as `w` of equation 2. of [Imagen
                Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
                1`. Higher guidance scale encourages to generate videos that are closely linked to the text `prompt`,
                usually at the expense of lower video quality.
            negative_prompt (`str` or `List[str]`, *optional*):
                The prompt or prompts not to guide the video generation. If not defined, one has to pass
                `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
                less than `1`).
            eta (`float`, *optional*, defaults to 0.0):
                Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
                [`schedulers.DDIMScheduler`], will be ignored for others.
            generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
                One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
                to make generation deterministic.
            latents (`torch.FloatTensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for video
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
                tensor will ge generated by sampling using the supplied random `generator`. Latents should be of shape
                `(batch_size, num_channel, num_frames, height, width)`.
            prompt_embeds (`torch.FloatTensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
            negative_prompt_embeds (`torch.FloatTensor`, *optional*):
                Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
                weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
                argument.
            output_type (`str`, *optional*, defaults to `"np"`):
                The output format of the generate video. Choose between `torch.FloatTensor` or `np.array`.
            return_dict (`bool`, *optional*, defaults to `True`):
                Whether or not to return a [`~pipelines.stable_diffusion.TextToVideoSDPipelineOutput`] instead of a
                plain tuple.
            callback (`Callable`, *optional*):
                A function that will be called every `callback_steps` steps during inference. The function will be
                called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
            callback_steps (`int`, *optional*, defaults to 1):
                The frequency at which the `callback` function will be called. If not specified, the callback will be
                called at every step.
            cross_attention_kwargs (`dict`, *optional*):
                A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
                `self.processor` in
                [diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).

        Examples:

        Returns:
            [`~pipelines.stable_diffusion.TextToVideoSDPipelineOutput`] or `tuple`:
            [`~pipelines.stable_diffusion.TextToVideoSDPipelineOutput`] if `return_dict` is True, otherwise a `tuple.
            When returning a tuple, the first element is a list with the generated frames.
        r   Nr         ?scale)prompt_embedsnegative_prompt_embeds
lora_scaledevicetotalr   )encoder_hidden_statescross_attention_kwargscondition_latentmaskmotionr      ptfinal_offload_hookframes)%unetconfigsample_sizevae_scale_factorcheck_inputs
isinstancestrlistlenr   r<   get_encode_prompt	schedulerset_timesteps	timestepsprepare_extra_step_kwargsordertorchcatprogress_bar	enumeratescale_model_inputtensorsamplechunkr   reshapestepprev_sampleupdatedecode_latents
tensor2vidhasattrrF   offloadr   )*selfpromptr"   r#   r!   r1   guidance_scalenegative_promptr2   	generatorlatentsr8   r9   r   r3   callbackr4   r@   rA   rB   rV   rC   num_images_per_promptr   r<   do_classifier_free_guidancetext_encoder_lora_scaleextra_step_kwargsnum_warmup_stepsuncondition_latentr[   itlatent_model_input
noise_prednoise_pred_uncondnoise_pred_textbszchannelrH   video_tensorr   s*                                             r(   __call__zLatentToVideoPipeline.__call__!   s	   | O49+7$:OOM)58MM ! 	FE>?MSi	
 	
 	

 *VS"9"9JJJvt$<$<VJJ&,Q/J  '5s&:# :P9["&&w555ae 	  ++!''#9. , 	
 	
 	$$%8$HHH0II"%i.. !::9cJJ y>>,?$.BV,VV-Pk  B59&8:J%KLLL  rB%899 $	0\!),, #0 #01A\%iUYy1}%=%=%=bi"%)^%E%EFXZ[%\%\"%"\&@@@F!YY&*7+A%5! '     / l9C9I9I!9L9L6%!2^YjGj5k!kJ 7>m3WfeV!//!Q1a88@@vwX]_eff'//1aA>>FFsV|U\^cekll
 .$.-j!WZZHYZZf "$'*223PVWW__`acdfgijlmnn I***A9I/I/IqSTuX\XfXlNlpqNqNq '')))+N0Ba0G0G Aw///G#0$	0 $	0 $	0 $	0 $	0 $	0 $	0 $	0 $	0 $	0 $	0 $	0 $	0 $	0 $	0L **733$ EE|,,E 4-.. 	.43J3V#++--- 	$7##*%8888s   GM''M+.M+)NNNr-   r.   r/   Nr0   NNNNr   TNr   NNNNN)	__name__
__module____qualname__rY   no_gradintfloatboolr    r*   r(   r,   r,       s        U]__ #% $  $-H9 H9
 H9 !H9 H9 H9" #H9 H9 H9 _H9 H9 H9r*   r,   c                 t    || j         z
  }|dk     rt          d| j          d| d          | dd|z  z            S )zNAppends dimensions to the end of a tensor until it has target_dims dimensions.r   z
input has z dims but target_dims is z, which is less).)N)ndim
ValueError)xtarget_dimsdims_to_appends      r(   _append_dimsr      sR     16)Ncafcc{cccdddVg..//r*   c            $       d   e Zd Z ej                    ddddddddd	dd
dddddgddddddfdededee         dededededededee         dee         deeej	        e
ej	                 f                  deej                 dee         deeeeegdf                  de
e         def"d            ZdS ) CtrlWorldDiffusionPipeline@     N   r6         @      {Gz?r   pilrn   TFr"   r#   r!   r1   min_guidance_scalemax_guidance_scalefpsmotion_bucket_idnoise_aug_strengthdecode_chunk_sizenum_videos_per_promptrm   r   callback_on_step_end"callback_on_step_end_tensor_inputsr3   c                    |p| j         j        j        | j        z  }|p| j         j        j        | j        z  }||n| j         j        j        }||n|}| j         j        }|dk    }|}|j        d         }|r*t          j        |          }t          j	        ||g          }| j
        j        t          j        k    o| j
        j        j        }|j        d         dk    r| j                            |||          }t!          |j        ||j        |j                  }|r%| j
                            t          j                   |                     ||||          } |                     |j                  } |r%| j
                            t          j                   nH|| j
        j        j        z  } |rt          j	        | gd	z            } |                     |j                  } |L|j        \  }!}"}#}$}%||"z   }&|                     d
                              d
|&d
d
d
          } |rd| ddd|"f<   n,|                     d
                              d
|d
d
d
          } |                     |	|
||j        |||          }'|'                    |          }'| j                            ||           | j        j        }(| j         j        j        })|                     ||z  ||)|||j        |||	  	        }t          j        |||                              d          }*|*                    ||j                  }*|*                    ||z  d
          }*t=          |*|j                  }*|*| _         tC          |(          || j        j"        z  z
  }+tC          |(          | _#        |A|j        \  }!},}#}$}%tI          j        |dd|          }|rt          j	        |gd	z            n|}||rt          j	        |gd	z            n|}| %                    |          5 }-tM          |(          D ]\  }.}/|rt          j	        |gd	z            n|}0| j        '                    |0|/          }0|t          j	        ||0gd
          }0t          j	        |0| gd	          }0||.dk    rt          j	        |0|gd          }0|0                    | j         j                  }0|                    | j         j                  }|                      |0|/||'d|          d         }1||1ddddddd|$d|%f         }1||1dd|"dddddddf         }1|r(|1(                    d	          \  }2}3|2| j)        |3|2z
  z  z   }1| j        *                    |1|/|          j+        }|Bi }4|D ]}5tY                      |5         |4|5<    || |.|/|4          }6|6-                    d|          }|.tC          |(          d
z
  k    s|.d
z   |+k    r*|.d
z   | j        j"        z  dk    r|-.                                 	 ddd           n# 1 swxY w Y   |dk    su|r%| j
                            t          j                   |                    | j
        j                  }| /                    |||          }7ta          |7| j        |          }7n|}7| 1                                 |s|7|fS te          |7          S )   
        The call function to the pipeline for generation.

        Args:
            image (`PIL.Image.Image` or `List[PIL.Image.Image]` or `torch.FloatTensor`):
                Image or images to guide image generation. If you provide a tensor, it needs to be compatible with
                [`CLIPImageProcessor`](https://huggingface.co/lambdalabs/sd-image-variations-diffusers/blob/main/feature_extractor/preprocessor_config.json).
            height (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
                The height in pixels of the generated image.
            width (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
                The width in pixels of the generated image.
            num_frames (`int`, *optional*):
                The number of video frames to generate. Defaults to 14 for `stable-video-diffusion-img2vid` and to 25 for `stable-video-diffusion-img2vid-xt`
            num_inference_steps (`int`, *optional*, defaults to 25):
                The number of denoising steps. More denoising steps usually lead to a higher quality image at the
                expense of slower inference. This parameter is modulated by `strength`.
            min_guidance_scale (`float`, *optional*, defaults to 1.0):
                The minimum guidance scale. Used for the classifier free guidance with first frame.
            max_guidance_scale (`float`, *optional*, defaults to 3.0):
                The maximum guidance scale. Used for the classifier free guidance with last frame.
            fps (`int`, *optional*, defaults to 7):
                Frames per second. The rate at which the generated images shall be exported to a video after generation.
                Note that Stable Diffusion Video's UNet was micro-conditioned on fps-1 during training.
            motion_bucket_id (`int`, *optional*, defaults to 127):
                The motion bucket ID. Used as conditioning for the generation. The higher the number the more motion will be in the video.
            noise_aug_strength (`int`, *optional*, defaults to 0.02):
                The amount of noise added to the init image, the higher it is the less the video will look like the init image. Increase it for more motion.
            decode_chunk_size (`int`, *optional*):
                The number of frames to decode at a time. The higher the chunk size, the higher the temporal consistency
                between frames, but also the higher the memory consumption. By default, the decoder will decode all frames at once
                for maximal quality. Reduce `decode_chunk_size` to reduce memory usage.
            num_videos_per_prompt (`int`, *optional*, defaults to 1):
                The number of images to generate per prompt.
            generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
                A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
                generation deterministic.
            latents (`torch.FloatTensor`, *optional*):
                Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
                tensor is generated by sampling using the supplied random `generator`.
            output_type (`str`, *optional*, defaults to `"pil"`):
                The output format of the generated image. Choose between `PIL.Image` or `np.array`.
            callback_on_step_end (`Callable`, *optional*):
                A function that calls at the end of each denoising steps during the inference. The function is called
                with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
                callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
                `callback_on_step_end_tensor_inputs`.
            callback_on_step_end_tensor_inputs (`List`, *optional*):
                The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
                will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
                `._callback_tensor_inputs` attribute of your pipeline class.
            return_dict (`bool`, *optional*, defaults to `True`):
                Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a
                plain tuple.

        Returns:
            [`~pipelines.stable_diffusion.StableVideoDiffusionPipelineOutput`] or `tuple`:
                If `return_dict` is `True`, [`~pipelines.stable_diffusion.StableVideoDiffusionPipelineOutput`] is returned,
                otherwise a `tuple` is returned where the first element is a list of list with the generated frames.

        Examples:

        ```py
        from diffusers import StableVideoDiffusionPipeline
        from diffusers.utils import load_image, export_to_video

        pipe = StableVideoDiffusionPipeline.from_pretrained("stabilityai/stable-video-diffusion-img2vid-xt", torch_dtype=torch.float16, variant="fp16")
        pipe.to("cuda")

        image = load_image("https://lh3.googleusercontent.com/y-iFOHfLTwkuQSUegpwDdgKmOjRSTvPxat63dQLB25xkTs4lhIbRUFeNBWZzYf370g=s1200")
        image = image.resize((1024, 576))

        frames = pipe(image, num_frames=25, decode_chunk_size=8).frames[0]
        export_to_video(frames, "generated.mp4", fps=7)
        ```
        Nr6   r   r   r"   r#   rm   r<   dtyper   r   r   r0   r;   zb l c h w -> b (f l) (n c) h w)nfr=   dimF)r?   added_time_idsr3   frame_level_condrn   latentr   rG   )3rI   rJ   rK   rL   r!   r<   r   rY   
zeros_likerZ   vaer   float16force_upcastvideo_processor
preprocessr   tofloat32_encode_vae_imagescaling_factor	unsqueezer	   _get_add_time_idsrT   rU   rV   in_channelsprepare_latentslinspacer   r   _guidance_scalerQ   rX   _num_timestepseinopsr[   r\   r]   r`   rk   rb   rc   localspoprd   re   r)   maybe_free_model_hooksr   )8ri   imagetextr"   r#   r!   r1   r   r   r   r   r   r   r   rm   rn   r   r   r   r3   rB   
cond_wristhistoryr   his_cond_zeror<   rq   image_embeddingsr   negative_image_embeddingsneeds_upcastingnoiseimage_latentsBnum_hisCHWnum_frames_allr   rV   num_channels_latentsrk   rt   Fr[   rv   rw   rx   ry   rz   noise_pred_condcallback_kwargskcallback_outputsrH   s8                                                           r(   r   z#CtrlWorldDiffusionPipeline.__call__   s   R O49+7$:OOM)58MM#-#9ZZty?O?Z
1B1N--T^! '93&>#   %+A.
& 	X(-(89I(J(J%$y*CEU)VWW (.EM9Zdho>Z;r?a(33E&PU3VVE 	%,^c^ijjjE  1%-000 225&BWYtuuM),,-=-CDDM  1%-000!$(/"@@M* = !&	=/!*; < <),,-=-CDDM ")-Aw1a''1N)33A66==aQRTUWXYYM 0,/aaaj))33A66==aQPQSTUUM //"!'
 
 (**622 	$$%8$HHHN,	  $y/;&&.. "

 

 (:<NPZ[[eefghh'**67=AA'..z<Q/QSTUU%nglCC- y>>,?$.BV,VV!)nn!"=LAaAqz3SWX[efffJ6QaJ<>222WaJ2MZei	A...SZG%899 F	*\!),, E* E*1A\%iUYy1}%=%=%=bi"%)^%E%EFXZ[%\%\"&).G=O3PVW)X)X)X& &+Y0BM/RXY%Z%Z%Z")add).4F
3SYZ)[)[)[& &8%:%:49?%K%K"#3#6#6ty#G#G 
 "YY&*:#1 %%5 '   
 )!+AAAqqq2A2rrM!:J& ",AAAwxxAAAqqq,@!AJ / q9C9I9I!9L9L6%!2T5HO^oLo5p!pJ  .--j!WEEQ'3&(O? 9 9-3XXa[**';';D!Q'X'X$.229gFFGI***A9I/I/IqSTuX\XfXlNlpqNqNq '')))KE*F	* F	* F	* F	* F	* F	* F	* F	* F	* F	* F	* F	* F	* F	* F	*P h&& 1%-000jj00G((*>OPPF#FD,@kZZZFFF##%%% 	"'>!1@@@@s   H
X**X.1X.r   r   r   rY   r   r   r   r   r   	Generatorr   FloatTensorrO   r   r   r   r   r   r*   r(   r   r      s       U]__
 $(#%$'$' #"&+//0MQ/3%*KO9B 3{A {A 	{A
 {A SM{A !{A "{A "{A {A {A  {A $C={A  (}{A E%/43H"HIJ{A  %+,!{A" c]#{A$ 'xc40@$0F'GH%{A& -1I'{A( ){A {A {A _{A {A {Ar*   r   c            $       d   e Zd Z ej                    ddddddddddd	dd
dddddgddddfdededee         dededededededee         dee         deeej	        e
ej	                 f                  deej                 dee         deeeeegdf                  de
e         def"d            ZdS )  TextStableVideoDiffusionPipelineNr   r   r   r6   r   r   r   r   r   r   rn   Tr   r"   r#   r!   r1   r   r   r   r   r   r   r   rm   r   r   r   r3   c                 ^   |p| j         j        j        | j        z  }|p| j         j        j        | j        z  }||n| j         j        j        }||n|}|                     |||           t          |t          j        j                  rd}n2t          |t                    rt          |          }n|j        d         }| j        }|	dk    }|dk    r|                     ||||          }ni|dk    r|rt          j        ||g          }|}nH|                     ||||          }|rt          j        ||g          }t          j        ||gd          }| j         j        j        dk    }|rt          j        |gd	z            }|
dz
  }
| j                            |||
          }t'          |j        ||j        |j                  }|||z  z   }| j        j        t          j        k    o| j        j        j        }|r%| j                            t          j                   |_|                     ||||          }|                    |j                  }|                    d                              d|ddd          }n|rt          j        |gd	z            }|r%| j                            t          j                   |                     |
|||j        |||          } |                     |          } | j                             ||           | j        j!        }!| j         j        j        }"| "                    ||z  ||"|||j        |||	  	        }t          j#        ||	|                              d          }#|#                    ||j                  }#|#                    ||z  d          }#tI          |#|j%                  }#|#| _&        t          |!          || j        j'        z  z
  }$t          |!          | _(        | )                    |          5 }%tU          |!          D ]i\  }&}'|rt          j        |gd	z            n|}(| j        +                    |(|'          }(|rt          j        ||(|gd	          }(nt          j        |(|gd	          }(|                      |(|'|| d          d         })|r(|),                    d	          \  }*}+|*| j-        |+|*z
  z  z   })| j        .                    |)|'|          j/        }|Bi },|D ]}-ta                      |-         |,|-<    || |&|'|,          }.|.1                    d|          }|&t          |!          dz
  k    s|&dz   |$k    r*|&dz   | j        j'        z  dk    r|%2                                 k	 ddd           n# 1 swxY w Y   |dk    sV|r%| j                            t          j                   | 3                    |||          }/ti          |/| j        |          }/n|}/| 5                                 |s|/S tm          |/          S )r   Nr   r   r6   r   r   r   	   r   r   r   r   r;   r=   F)r?   r   r3   rn   r   r   rG   )7rI   rJ   rK   rL   r!   rM   rN   PILImagerP   rQ   r   _execution_device_encode_imagerY   rZ   r   r   r   r   r<   r   r   r   r   r   r   r   r   r	   r   rT   rU   rV   r   r   r   r   r   rX   r   r[   r\   r]   r`   rk   rb   rc   r   r   rd   re   r)   r   r   )0ri   r   r8   r9   r"   r#   r!   r1   r   r   r   r   r   r   r   rm   rn   r   r   r   r3   rB   condition_typerA   r   r<   rq   r   motion_maskr   r   r   r   rV   r   rk   rt   r[   rv   rw   rx   ry   rz   r   r   r   r   rH   s0                                                   r(   r   z)TextStableVideoDiffusionPipeline.__call__3  s*   P O49+7$:OOM)58MM#-#9ZZty?O?Z
1B1N--T^ 	%/// eSY_-- 	(JJt$$ 	(UJJQJ' '93&># 7""#11%AVXsttV##* S %	+A=*Q R R,#11%AVXstt* S %	+A=*Q R R$y*:M)JPQRRRi&2a7& 	'9dVAX&&D Ag $//fE/RRU[IelZ_Zefff*U22(.EM9Zdho>Z 	-HKKemK,,,# 225&BWYtuuM),,-=-CDDM  -66q99@@JPQSTVWXX* E#(9.>-?!-C#D#D   	-HKKemK,,,//"!'
 
 (**622 	$$%8$HHHN,	  $y/;&&.. "

 

 (:<NPZ[[eefghh'**67=AA'..z<Q/QSTUU%nglCC- y>>,?$.BV,VV!)nn%899 #	*\!),, "* "*1A\%iUYy1}%=%=%=bi"%)^%E%EFXZ[%\%\" b).D:LN^3_ef)g)g)g&&).4FHX3Y_`)a)a)a&!YY&*:#1 % '   
 / q9C9I9I!9L9L6%!2T5HO^oLo5p!pJ.--j!WEEQ'3&(O? 9 9-3XXa[**';';D!Q'X'X$.229gFFGI***A9I/I/IqSTuX\XfXlNlpqNqNq '')))E"*#	* #	* #	* #	* #	* #	* #	* #	* #	* #	* #	* #	* #	* #	* #	*J h&& 1%-000((*>OPPF#FD,@kZZZFFF##%%% 	M1@@@@s   E;VV"%V"r   r   r*   r(   r   r   2  s       U]__ !%$(#%$'$' #"&+//0MQ/3%*KO9B  1EA EA
 EA EA SMEA !EA "EA "EA EA EA  EA $C=EA  (}EA  E%/43H"HIJ!EA" %+,#EA$ c]%EA& 'xc40@$0F'GH'EA( -1I)EA* +EA EA EA _EA EA EAr*   r   )r   )typingr   r   r   r   r   rY   r   r   r	   r   	diffusersr
   &models.pipeline_stable_video_diffusionr   Hdiffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_synthr   Jdiffusers.pipelines.stable_video_diffusion.pipeline_stable_video_diffusionr   diffusers.loadersr   r   diffusers.utils.torch_utilsr   r   Tensorr)   r,   r   r   r   r   r*   r(   <module>r      s   8 8 8 8 8 8 8 8 8 8 8 8 8 8  $ $ $ $ $ $ $ $ 



  , + + + + + O O O O O O q p p p p p y y y y y y J J J J J J J J 4 4 4 4 4 4 p p p p p p %,    J9 J9 J9 J9 J91 J9 J9 J9X0 0 0}A }A }A }A }A!= }A }A }A~	GA GA GA GA GA'C GA GA GA GA GAr*   