o
    ?߱i+#                  
   @   s   d dl mZ d dlZd dlZz
d dlmZmZ W n ey. Z	 z
e
d W Y dZ	[	ndZ	[	ww d dlmZ d dlmZ dZ	dded	ee fd
dZdd ZG dd dZedkrd	 edZe
d dS dS )    )OptionalN)extract_vision_infoprocess_vision_infoz<qwen_vl_utils is not available. Reason1 model will not work.)AutoProcessor)log<   tokenizer_type	cache_dirc                 C   s
   t | |S N)	Processor)r   r	    r   Z/data/cameron/vidgen/cosmos-predict2.5/cosmos_predict2/_src/reason1/tokenizer/processor.pybuild_tokenizer!   s   
r   c                 C   sR   g }| D ]"}d|v r!t |d tr!dd |d D }d||d< || q|S )Ncontentc                 S   s   g | ]}|d  qS )textr   ).0itemr   r   r   
<listcomp>,   s    z(flatten_content_list.<locals>.<listcomp> )
isinstancelistjoinappend)messagesZnew_messagesmessage	text_listr   r   r   flatten_content_list(   s   r   c                   @   s:   e Zd ZdddZ	ddd	Zd
d Zdd Zdd ZdS )r   Qwen/Qwen2.5-VL-3B-InstructNc                 C   s   || _ |dvrtd| dd|vrd| _nd| _d| d}d	d
lm} ||}t|| _t	d t
| jdrF| jj| jj| _nd | _t
| jdrZ| jj| jj| _nd | _t
| jdrq| jjj| _| jjj| _d S | jj| _| jj| _d S )N)zQwen/Qwen2.5-VL-7B-Instructr   zQwen/Qwen2-VL-2B-InstructzQwen/Qwen2.5-VL-32B-InstructzQwen/Qwen2.5-VL-72B-InstructzQwen/Qwen2.5-0.5BzError loading processor z,, please check if the tokenizer is availableVLFTz8s3://bucket/cosmos_reasoning1/pretrained/Qwen_tokenizer//r   )get_checkpoint_pathz.Successfully loaded processor from local cacheimage_tokenvideo_token	tokenizer)name
ValueErroris_vision_tokenizer3cosmos_predict2._src.imaginaire.utils.checkpoint_dbr    r   from_pretrained	processorr   infohasattrr#   convert_tokens_to_idsr!   image_token_idr"   video_token_ideos_token_ideos_idpad_token_idpad_id)selfr$   r	   s3_urir    r   r   r   __init__4   s,   

zProcessor.__init__FptTc                 C   s   |sJ d| j drk| jst|}| jj|d||d}t|dd\}}}	t|}
g }|
D ]}d|v r;||d  q.| jrL| j|g||d||d	}n	| j|gd|d
}||d< |d d |d< |d d |d< |S t	d| j  )Nztokenize must be True
Qwen/Qwen2F)tokenizeadd_generation_promptadd_vision_idT)Zreturn_video_kwargsvideofps)r   imagesvideospaddingreturn_tensorsr<   )r   r?   r@   r   	input_idsr   attention_maskz:apply_chat_template is not implemented for tokenizer_type )
r$   
startswithr&   r   r)   apply_chat_templater   r   r   r%   )r3   r   r9   r@   r8   r:   r   Zimage_inputsZvideo_inputs_Zvision_infosZfps_listvision_infoinputsr   r   r   rD   ]   sH   	zProcessor.apply_chat_templatec                    s  t tjr'jdkr't fddtjd D }|jjks%J |S t tjr3  nt	
}|jdks?J  jdrd}d}d	}d
}d} jj|}	 jj|}
 jj|}t	||	kd }t	||
kd }t	j|td}t|t|ksJ t||D ]\}}||d  |krd||| || < qntd j |j|jksJ t tjrt|S | S )a  
        Add a mask to the assistant tokens.
        This is used to mask out tokens that are not generated by the assistant (e.g.,  system prompts, user prompts, chat templates), such that in the loss computation, only the tokens generated by the assistant are used.
        If there are multiple turns in the conversation, the mask will mask all the assistant tokens in each turn.

        Args:
            tokens (Union[List[int], torch.Tensor]): The tokens to add the mask to.
        Returns:
            Union[List[bool], torch.Tensor]: The mask. True for tokens generated by the assistant (i.e. should apply loss on), False for tokens not generated by the assistant.
           c                    s   g | ]	}  | qS r   )add_assistant_tokens_mask)r   ir3   tokensr   r   r      s    z7Processor.add_assistant_tokens_mask.<locals>.<listcomp>r      r7   z<|im_start|>z
<|im_end|>Z	assistant   )dtypeTz@add_assistant_tokens_mask is not implemented for tokenizer_type )r   torchTensorndimstackrangeshapecpunumpynparrayr$   rC   r)   r#   r,   where
zeros_likeboollenzipr%   
from_numpytolist)r3   rL   maskZ	np_tokensZ	BOS_TOKENZ	EOS_TOKENROLEZSTART_OFFSETZ
END_OFFSETbos_token_idr/   Zrole_idstart_indicesend_indicesmasksstartendr   rK   r   rI      s:   $"
z#Processor.add_assistant_tokens_maskc                 O      | j j|i |S r
   )r)   encoder3   argskwargsr   r   r   rj         zProcessor.encodec                 O   ri   r
   )r)   decoderk   r   r   r   ro      rn   zProcessor.decode)r   N)Fr6   TF)__name__
__module____qualname__r5   rD   rI   rj   ro   r   r   r   r   r   2   s    
*
66r   __main__r   doner
   )typingr   rW   rX   rP   Zqwen_vl_utilsr   r   ImportErroreprintZ(transformers.models.auto.processing_autor   %cosmos_predict2._src.imaginaire.utilsr   Z_LOCK_TIMEOUT_SECONDSstrr   r   r   rp   r)   r   r   r   r   <module>   s4   

 