o
    iq                     @   sZ   d dl mZmZ d dlZd dlmZ d dlm  mZ G dd dZ	G dd dej
ZdS )    )DictTupleNc                
   @   s:   e Zd ZdZdd Zdedededejdejf
d	d
Z	dS )PositionGetterab  Generates and caches 2D spatial positions for patches in a grid.

    This class efficiently manages the generation of spatial coordinates for patches
    in a 2D grid, caching results to avoid redundant computations.

    Attributes:
        position_cache: Dictionary storing precomputed position tensors for different
            grid dimensions.
    c                 C   s
   i | _ dS )z7Initializes the position generator with an empty cache.N)position_cache)self r   G/data/cameron/da3_repo/src/depth_anything_3/model/dinov2/layers/rope.py__init__"   s   
zPositionGetter.__init__
batch_sizeheightwidthdevicereturnc           	      C   sr   ||f| j vr"tj||d}tj||d}t||}|| j ||f< | j ||f }|d|| d|dd S )a  Generates spatial positions for a batch of patches.

        Args:
            batch_size: Number of samples in the batch.
            height: Height of the grid in patches.
            width: Width of the grid in patches.
            device: Target device for the position tensor.

        Returns:
            Tensor of shape (batch_size, height*width, 2) containing y,x coordinates
            for each position in the grid, repeated for each batch item.
        r         )r   torcharangecartesian_prodviewexpandclone)	r   r
   r   r   r   y_coordsx_coords	positionsZcached_positionsr   r   r   __call__&   s    zPositionGetter.__call__N)
__name__
__module____qualname____doc__r	   intr   r   Tensorr   r   r   r   r   r      s    
r   c                       s   e Zd ZdZddedef fddZded	ed
ejdej	de
ejejf f
ddZedejdejfddZdejdejdejdejdejf
ddZdejdejdejfddZ  ZS )RotaryPositionEmbedding2Da  2D Rotary Position Embedding implementation.

    This module applies rotary position embeddings to input tokens based on their
    2D spatial positions. It handles the position-dependent rotation of features
    separately for vertical and horizontal dimensions.

    Args:
        frequency: Base frequency for the position embeddings. Default: 100.0
        scaling_factor: Scaling factor for frequency computation. Default: 1.0

    Attributes:
        base_frequency: Base frequency for computing position embeddings.
        scaling_factor: Factor to scale the computed frequencies.
        frequency_cache: Cache for storing precomputed frequency components.
          Y@      ?	frequencyscaling_factorc                    s    t    || _|| _i | _dS )zInitializes the 2D RoPE module.N)superr	   base_frequencyr'   frequency_cache)r   r&   r'   	__class__r   r   r	   P   s   

z"RotaryPositionEmbedding2D.__init__dimseq_lenr   dtyper   c                 C   s   ||||f}|| j vrRtjd|d|d | }d| j|  }tj|||jd}td||}	|	|}	tj|	|	fdd}	|		 |}
|	
 |}|
|f| j |< | j | S )	ap  Computes frequency components for rotary embeddings.

        Args:
            dim: Feature dimension (must be even).
            seq_len: Maximum sequence length.
            device: Target device for computations.
            dtype: Data type for the computed tensors.

        Returns:
            Tuple of (cosine, sine) tensors for frequency components.
        r   r   r   r%   )r   r/   zi,j->ijr   r-   )r*   r   r   floatr)   r/   einsumtocatcossin)r   r-   r.   r   r/   	cache_key	exponentsZinv_freqr   anglesZcos_componentsZsin_componentsr   r   r   _compute_frequency_componentsW   s   


z7RotaryPositionEmbedding2D._compute_frequency_componentsxc                 C   sH   | j d }| dd|d f | d|d df }}tj| |fddS )zPerforms feature rotation by splitting and recombining feature dimensions.

        Args:
            x: Input tensor to rotate.

        Returns:
            Rotated feature tensor.
        r   .Nr   r0   )shaper   r4   )r;   feature_dimx1x2r   r   r   _rotate_featuresx   s   

*z*RotaryPositionEmbedding2D._rotate_featurestokensr   cos_compsin_compc                 C   s^   t ||dddddddf }t ||dddddddf }|| | ||  S )a]  Applies 1D rotary position embeddings along one dimension.

        Args:
            tokens: Input token features.
            positions: Position indices.
            cos_comp: Cosine components for rotation.
            sin_comp: Sine components for rotation.

        Returns:
            Tokens with applied rotary position embeddings.
        N)F	embeddingr@   )r   rA   r   rB   rC   r5   r6   r   r   r   _apply_1d_rope   s   $$z(RotaryPositionEmbedding2D._apply_1d_ropec           	      C   s   | dd dksJ d|jdkr|jd dksJ d| dd }t| d }| |||j|j\}}|jddd\}}| 	||d	 ||}| 	||d
 ||}t
j||fddS )aI  Applies 2D rotary position embeddings to input tokens.

        Args:
            tokens: Input tensor of shape (batch_size, n_heads, n_tokens, dim).
                   The feature dimension (dim) must be divisible by 4.
            positions: Position tensor of shape (batch_size, n_tokens, 2) containing
                      the y and x coordinates for each token.

        Returns:
            Tensor of same shape as input with applied 2D rotary position embeddings.

        Raises:
            AssertionError: If input dimensions are invalid or positions are malformed.
        r   r   r   zFeature dimension must be even   z3Positions must have shape (batch_size, n_tokens, 2)r   r0   ).r   ).r   )sizendimr<   r!   maxr:   r   r/   chunkrF   r   r4   )	r   rA   r   r=   Zmax_positionrB   rC   Zvertical_featuresZhorizontal_featuresr   r   r   forward   s"   z!RotaryPositionEmbedding2D.forward)r$   r%   )r   r   r   r    r1   r	   r!   r   r   r/   r   r"   r:   staticmethodr@   rF   rL   __classcell__r   r   r+   r   r#   ?   s8    
!
$r#   )typingr   r   r   torch.nnnnZtorch.nn.functional
functionalrD   r   Moduler#   r   r   r   r   <module>   s   (