o
     ݱi                     @   s6   d dl Z d dlmZ d dlmZ G dd dejZdS )    N)	CLIPModelc                       s8   e Zd Zdef fddZdejdejfddZ  ZS )CLIPTextEncoder	embed_dimc                    s>   t    td| _| j D ]}d|_qtd|| _	d S )Nzopenai/clip-vit-base-patch32Fi   )
super__init__r   from_pretrainedlanguage_model
parametersrequires_gradnnLinearhead)selfr   p	__class__ B/data/cameron/vidgen/unified-world-model/models/common/language.pyr      s
   
zCLIPTextEncoder.__init__	input_idsattention_maskc                 C   s   | j j||d}| |}|S )N)r   r   )r   Zget_text_featuresr   )r   r   r   Zfeatsr   r   r   forward   s
   
zCLIPTextEncoder.forward)	__name__
__module____qualname__intr   torchTensorr   __classcell__r   r   r   r   r      s    r   )r   torch.nnr   transformersr   Moduler   r   r   r   r   <module>   s    