o
    ?߱i'                     @   s   d Z ddlmZ ddlmZ ddlmZmZ ddlm	Z	 ddl
mZmZ ddlmZ ddlmZmZ 		dd
edededededededefddZdS )z
Imaginaire4 Attention Subpackage:
Unified implementation for all Attention implementations.

Flash Attention v3 (flash3) backend checks
    )partial)Tensor)attention_param_checksattention_tensor_checks)FLASH3_SUPPORTED)get_bwd_dtypesget_fwd_dtypes)
CausalType)get_arch_taglog_or_raise_errorFquerykeyvalue	is_causalcausal_type	is_varlenraise_errorreturnc                 C   s"  t t|d}ts|dtd dS t| j}t|}	t|}
t| |||	|
dd|dd	s2|dtd dS | j	d	 |j	d	 krt| j	d	 }|j	d	 }|d
krN|dkstd|  krXdkren nd|  krddkstn |d|d|dt
d dS t| ||||d |r|tjtjfvr|dt
d dS dS )a  
    Input validation function for the flash3 backend.

    Parameters:
        query (Tensor): 4-D query tensor, with the heads-last contiguous layout
            (`[batch, seqlen, heads, head_dim]`).

        key (Tensor): 4-D key tensor, with the heads-last contiguous layout
            (`[batch, seqlen_kv, heads_kv, head_dim]`).

        value (Tensor): 4-D value tensor, with heads-last contiguous layout
            (`[batch, seqlen_kv, heads_kv, head_dim_v]`).

        is_causal (bool): whether or not causal masking is enabled.

        causal_type (CausalType): causal masking mode. Choices: `CausalType.TopLeft`,
            `CausalType.BottomRight`. Required when `is_causal = True`.

        is_varlen (bool): whether or not a variable length (varlen) use case. Must be inferred
            beforehand based on arguments such as seqlens_{Q,KV} or cumulative_seqlen_{Q,KV} being
            passed.

        raise_error (bool): whether to raise an error if any checks fail or no backend is selected,
            instead of just returning False. Default is False.

    Returns:
        success (bool): whether use case is compatible with flash3 backend.

    )r   zFlash Attention v3 (flash3) is not supported in this environment. Run with debug logs to find out why, or choose another backend.)	exceptionFTzFlash Attention v3 (flash3))	r   r   r   supported_dtypes_forwardsupported_dtypes_backwardsupports_mlasupports_gqa_mqar   backend_namez>Flash Attention v3 (flash3) does not support the given inputs.@   i         `   zFlash Attention v3 (flash3) does not support this head dim combination. Expected either head_dim_qk <= 64 and head_dim_v <= 512, or 128 <= head_dim_qk <= 192 and 96 <= head_dim_v <= 128, got head_dim_q=z, head_dim_v=.)r   r   r   r   r   z=Flash Attention v3 only supports bottom-right causal masking.)r   r   r   RuntimeErrorr
   devicer   r   r   shape
ValueErrorr   r	   BottomRightDontCare)r   r   r   r   r   r   r   	target_fnarch_tag
fwd_dtypes
bwd_dtypesZ
head_dim_q
head_dim_v r+   a/data/cameron/vidgen/cosmos-predict2.5/cosmos_predict2/_src/imaginaire/attention/flash3/checks.pyflash3_attention_check"   s^   &


>r-   N)F)__doc__	functoolsr   torchr   0cosmos_predict2._src.imaginaire.attention.checksr   r   0cosmos_predict2._src.imaginaire.attention.flash3r   Z5cosmos_predict2._src.imaginaire.attention.flash3.metar   r   /cosmos_predict2._src.imaginaire.attention.masksr	   /cosmos_predict2._src.imaginaire.attention.utilsr
   r   boolr-   r+   r+   r+   r,   <module>   s4   
