o
    ?߱i                     @   s   d Z ddlZddlmZ ddlmZ 		ddededededB d	edB d
eddeef eeeeef B fddZ				ddededej	dej
dededB dedB dedB deeB d
efddZdS )zo
Imaginaire4 Attention Subpackage:
Unified implementation for all Attention implementations.

Varlen utilities
    N)Tensor)is_torch_compilingquerykeyvalue	seqlens_Q
seqlens_KVreturnc           
      C   s  t  rtd| jd |jd ks| jd |jd kr2td| jd d|jd d|jd d|d u |d u A rEtd|d|d|d u rO|d u rOd	S |d usUJ |d us[J t|tret|tsitd
|j| jksu|j| jkrtd|jd|jd| jd|jtj	ks|jtj	krtd|jd|jd|
 dks|
 dkrtd|
 d|
 d|jd |jd krtd|jd|jd|jd dk rtd|jd|jd| jd dkrtd| jd d|
 |
   krdksJ  J |jd |jd   krdks!J  J |j|j  kr1tj	ks4J  J |  }|  }tjdgtj	|jd}tj||dtj	gdd}tj||dtj	gdd}	t|tssJ t|ts{J ||	||fS )NzRunning 'generate_varlen_parameters' in a torch-compiled region is disallowed as it results in graph breaks. Please consider calling ahead of time and pass 'cumulative_seqlen_{Q,KV}' and 'max_seqlen_{Q,KV}' instead of 'seqlens_{Q,KV}' to 'attention'. r   z9Q, K, and V must match in batch size, got query.shape[0]=z, key.shape[0]=z, value.shape[0]=.z]Variable length Attention requires both of seqlens_Q and seqlens_KV to be set, got seqlens_Q=z, seqlens_KV=)NNr   r   z.seqlens_Q and seqlens_KV must both be tensors.zQseqlens_Q and seqlens_KV must be on the same device as QKV, but seqlens_Q.device=z, seqlens_KV.device=z, query.device=zOseqlens_Q and seqlens_KV must both be torch.int32 tensors, got seqlens_Q.dtype=z, seqlens_KV.dtype=   zGseqlens_Q and seqlens_KV must both be 1-D tensors, got seqlens_Q.dim()=z, seqlens_KV.dim()=zAseqlens_Q and seqlens_KV must match in size, got seqlens_Q.shape=z, seqlens_KV.shape=zPseqlens_Q and seqlens_KV must contain at least one element, got seqlens_Q.shape=zfVariable length attention only supports sequence-packed memory layout (batch = 1), got query.shape[0]=)dtypedevice)dim)r   RuntimeErrorshape
ValueError
isinstancer   r   r   torchint32r   maxitemtensorcatcumsumtoint)
r   r   r   r   r   max_seqlen_Qmax_seqlen_KVzcumulative_seqlen_Qcumulative_seqlen_KV r!   Z/data/cameron/vidgen/cosmos-predict2.5/cosmos_predict2/_src/imaginaire/attention/varlen.pygenerate_varlen_parameters   s   (((,&  r#   Ftoken_layout_listhead_dimr   r   requires_gradwindow_size_liststride_listdilation_list	is_causalc	                 O   s   ddl m}m} | std|dstdddlm} |du r)dd	 | D }ng }t|| D ]\}}td
d t||D }|| q0||	| |||||ddd|||d|
}|S )a  
    Configures metadata for variable-length multi-dimensional attention operations.

    This function prepares the metadata needed for varlen/varsized sparse attention,
    including backend selection and tile configurations. The metadata should be generated
    ahead of time (outside of torch.compile regions) and reused across forward/backward passes.

    **Requires NATTEN >= 0.21.6.dev1**

    Parameters:
        token_layout_list (list): List of token layout tuples describing the spatial arrangement
            of tokens for each sequence. For example, for 2D attention with two sequences of
            sizes (H1, W1) and (H2, W2), pass [(H1, W1), (H2, W2)].

        head_dim (int): Attention head dimension.

        device (torch.device): Target device for runtime.

        dtype (torch.dtype): Tensor element type.

        requires_grad (bool): Whether tensors will require backward pass.

        window_size_list (list | None): Per-sequence window sizes for variable kernel sizes.

        stride_list (list | None): Per-sequence stride values for variable strides.

        dilation_list (list | None): Per-sequence dilation values for variable dilations.

        is_causal (tuple | bool): Toggle causal masking. Default is False.

    Returns:
        dict: Runtime metadata for varlen operations. This dict should be passed to
            `natten_multi_dimensional_attention_varlen` as the `metadata` parameter.
    r   )natten_supportednatten_version_satisfiesz5generate_multi_dim_varlen_parameters requires NATTEN.z0.21.6.dev1zgenerate_multi_dim_varlen_parameters requires NATTEN >= 0.21.6.dev1. Please upgrade NATTEN to use varlen/varsized attention features.)configure_varlenNc                 S   s   g | ]}|qS r!   r!   ).0token_layoutr!   r!   r"   
<listcomp>   s    z8generate_multi_dim_varlen_parameters.<locals>.<listcomp>c                 s   s$    | ]\}}|d kr|n|V  qdS )r   Nr!   )r.   kxr!   r!   r"   	<genexpr>   s   " z7generate_multi_dim_varlen_parameters.<locals>.<genexpr>)r$   r%   r   r   r&   r*   kernel_sizestridedilationZkernel_size_listr(   r)   )	0cosmos_predict2._src.imaginaire.attention.nattenr+   r,   r   Znatten.varlenr-   ziptupleappend)r$   r%   r   r   r&   r'   r(   r)   r*   argskwargsr+   r,   r-   Zwindow_size_list_filteredwindow_sizer/   Zwindow_size_filteredmetadatar!   r!   r"   $generate_multi_dim_varlen_parameters{   s@   1r?   )NN)NNNF)__doc__r   r   /cosmos_predict2._src.imaginaire.attention.utilsr   r9   r   r#   listr   r   booldictr?   r!   r!   r!   r"   <module>   sX   
d	