o
    {i                     @   s  d dl Z d dlmZ d dlmZ d dlZd dlmZ ddlm	Z	m
ZmZ ddlmZ G dd deZd	d
dejdddedededeeB def
ddZG dd dejZd	dejejdddededeeB deeB def
ddZdejejdddedeeB deeB defddZdS )    N)Enum)Optional   )dinov3_vit7b16Weightsconvert_path_or_url_to_url)DINOV3_BASE_URLc                   @   s   e Zd ZdZdS )ClassifierWeights
IMAGENET1KN)__name__
__module____qualname__r
    r   r   B/data/cameron/keygrip/volume_dino_tracks/dinov3/hub/classifiers.pyr	      s    r	   r   i    TF)backbone_name	embed_dim
pretrainedclassifier_weights
check_hashr   r   r   r   r   c                 K   s   t |d}|rQt|tu r=|tjksJ d| |j }d|v r'|d nd}|  d| d| d}	tj	t
| |	}
nt|}
tjj|
d|d	}|j|d
d |S )Ni  z+Unsupported weights for linear classifier: hashZ90d8ed92_z_linear_head-z.pthcpu)map_locationr   T)strict)nnLineartyper	   r
   valuelowerospathjoinr   r   torchhubload_state_dict_from_urlload_state_dict)r   r   r   r   r   kwargslinear_headweights_namer   model_filenameurl
state_dictr   r   r   '_make_dinov3_linear_classification_head   s   	
r,   c                       s2   e Zd Zdejdejf fddZdd Z  ZS )_LinearClassifierWrapperbackboner'   c                   s   t    || _|| _d S )N)super__init__r.   r'   )selfr.   r'   	__class__r   r   r0   5   s   

z!_LinearClassifierWrapper.__init__c                 C   s@   | j |}|d }|d }tj||jddgdd}| |S )NZx_norm_clstokenZx_norm_patchtokensr   )dim)r.   Zforward_featuresr"   catmeanr'   )r1   x	cls_tokenpatch_tokensZlinear_inputr   r   r   forward:   s   

z _LinearClassifierWrapper.forward)r   r   r   r   Moduler0   r:   __classcell__r   r   r2   r   r-   4   s    r-   r   r   r   backbone_weightsr   r>   c           	      K   sV   | dkrt |||d}ntd|  d|j}td| d| ||d|}t||dS )	Nr   )r   weightsr   zUnsupported backbone: z1, linear classifiers are provided only for ViT-7b   )r   r   r   r   )r.   r'   r   )r   AssertionErrorr   r,   r-   )	r   r   r   r>   r   r&   r.   r   r'   r   r   r   _make_dinov3_linear_classifierH   s   	rB   )r   r?   r>   r   r?   c                 K   s   t dd| |||d|S )z
    Linear classifier  on top of a DINOv3 ViT-7B/16 backbone pretrained on the LVD-1689M dataset and trained on ImageNet-1k.
    r   r=   Nr   )rB   )r   r?   r>   r   r&   r   r   r   dinov3_vit7b16_lc`   s   rC   )r   enumr   typingr   r"   torch.nnr   	backbonesr   r   ZBackboneWeightsr   utilsr   r	   r
   strintboolr,   r;   r-   LVD1689MrB   rC   r   r   r   r   <module>   sl   

