o
    {i                     @   sx   d Z ddlZddlmZ ddlZddlmZ ddlmZ G dd deZG d	d
 d
ej	Z
G dd dej	Zdd ZdS )z3
Various positional encodings for the transformer.
    N)Enum)nn   )NestedTensorc                   @   s   e Zd ZdZdZdZdS )PositionEncodingZlearnedsineZsine_unnormN)__name__
__module____qualname__LEARNEDSINESINE_UNNORM r   r   Z/data/cameron/keygrip/volume_dino_tracks/dinov3/eval/detection/models/position_encoding.pyr      s    r   c                       s0   e Zd ZdZd fdd	Zdefd	d
Z  ZS )PositionEmbeddingSinez
    This is a more standard version of the position embedding, very similar to the one
    used by the Attention is all you need paper, generalized to work on images.
    @   '  FNc                    sP   t    || _|| _|| _|d ur|du rtd|d u r#dtj }|| _d S )NFz+normalize should be True if scale is passedr   )	super__init__num_pos_featstemperature	normalize
ValueErrormathpiscale)selfr   r   r   r   	__class__r   r   r   +   s   


zPositionEmbeddingSine.__init__tensor_listc              	   C   s  |j }|j}|d usJ | }|jdtjd}|jdtjd}| jrQd}|d |d d dd d d f |  | j }|d |d d d d dd f |  | j }n|d | j }|d | j }tj| jtj|j	d}| j
d|d  | j  }|d d d d d d d f | }	|d d d d d d d f | }
tj|	d d d d d d dd df  |	d d d d d d dd df  fd	d
d}	tj|
d d d d d d dd df  |
d d d d d d dd df  fd	d
d}
tj|
|	fdd
dddd}|S )N   )dtyper   gư>g      ?)r!   devicer      dim   )tensorsmaskcumsumtorchfloat32r   r   aranger   r#   r   stacksincosflattencatpermute)r   r   xr)   Znot_maskZy_embedZx_embedepsZdim_tZpos_xZpos_yposr   r   r   forward6   s(   ,.  \\zPositionEmbeddingSine.forward)r   r   FN)r   r	   r
   __doc__r   r   r7   __classcell__r   r   r   r   r   %   s    r   c                       s8   e Zd ZdZd
 fdd	Zdd Zdefdd	Z  ZS )PositionEmbeddingLearnedz*
    Absolute pos embedding, learned.
       c                    s2   t    td|| _td|| _|   d S )N2   )r   r   r   	Embedding	row_embed	col_embedreset_parameters)r   r   r   r   r   r   U   s   
z!PositionEmbeddingLearned.__init__c                 C   s$   t j| jj t j| jj d S )N)r   inituniform_r>   weightr?   )r   r   r   r   r@   [   s   z)PositionEmbeddingLearned.reset_parametersr   c           
      C   s   |j }|jdd  \}}tj||jd}tj||jd}| |}| |}tj|d	|dd|d	d|dgdd
dddd	|jd ddd}	|	S )N)r#   r   r    r"   r%   r   )r(   shaper+   r-   r#   r?   r>   r2   	unsqueezerepeatr3   )
r   r   r4   hwijZx_embZy_embr6   r   r   r   r7   _   s"   


z PositionEmbeddingLearned.forward)r;   )	r   r	   r
   r8   r   r@   r   r7   r9   r   r   r   r   r:   P   s
    r:   c                    s   | j d }| jtjkrt|dd n | jtjkrt| n| jtjkr*t|dd ntd| j t	
 fddt| jD   S )Nr   T)r   Fznot supported c                    s   g | ]} qS r   r   ).0_position_embeddingr   r   
<listcomp>   s    z+build_position_encoding.<locals>.<listcomp>)
hidden_dimrO   r   r   r   r   r:   r   r   r   
ModuleListrangenum_feature_levels)argsZN_stepsr   rN   r   build_position_encodingu   s   

rV   )r8   r   enumr   r+   r   Z	util.miscr   r   Moduler   r:   rV   r   r   r   r   <module>   s   +%