o
    {iI                     @   s   d dl Z d dlmZ d dlZd dlmZ d dlmZmZ d dl	m
Z
 ddlmZmZmZmZ ddlmZ G d	d
 d
eZG dd dejjZddddededeeB deeB def
ddZdejejdddedeeB deeB defddZdS )    N)Enum)DetectionHeadConfig)PostProcessbuild_model)PositionEncoding   )Weightsdinov3_vit7b16dinov3_vitl16plusconvert_path_or_url_to_url)DINOV3_BASE_URLc                   @   s   e Zd ZdZdS )DetectionWeightsCOCO2017N)__name__
__module____qualname__r    r   r   @/data/cameron/keygrip/volume_dino_tracks/dinov3/hub/detectors.pyr      s    r   c                       s4   e Zd ZdZ fddZdeej fddZ  Z	S )DetectorWithProcessorz
    takes as input a list of (3, H, W) normalized image tensors and outputs
    a list of dicts with keys "scores", "labels" and "boxes" (format XYXY)
    c                    s   t    || _|| _d S )N)super__init__detectorpostprocessor)selfr   r   	__class__r   r   r      s   

zDetectorWithProcessor.__init__samplesc                 C   s8   |  |}tjdd |D |d jd}| j|||dS )Nc                 S   s   g | ]	}|j d d qS )r   N)shape).0sampler   r   r   
<listcomp>$       z1DetectorWithProcessor.forward.<locals>.<listcomp>r   )device)Ztarget_sizesZoriginal_target_sizes)r   torchtensorr"   r   )r   r   outputsZsizes_tensorr   r   r   forward"   s   
zDetectorWithProcessor.forward)
r   r   r   __doc__r   listr#   Tensorr&   __classcell__r   r   r   r   r      s    r   TF)
pretrained
check_hashbackbone_namer+   detector_weightsbackbone_weightsr,   c                    s.  t dEi dddddddddddd	d
ddddddtjddddddddddddddddddd d!d"d#d$d d%d d&dd'dd(dd)d*d+dd,dd-d.d/d0}tdEi |}t ttd1|  }t d2d3d1|  }	||||d4    |	|_ j|_	 fd5d6d7D |_
|jd u r fd8d6tdd9D |_t |}
|rt|tu r|tjkr|tjksJ d:| |j }d;|v r|d; nd<}|  d=| d>| d?}tjt| |}nt|}tjj|d@|dAdB }|
j|ddC |
j|
_|
j|
j_t |j!|j"}t#|
|dD}|S )FNZwith_box_refineTZ	two_stageZmixed_selectionZlook_forward_twiceZ
k_one2many   Zlambda_one2manyg      ?num_queries_one2onei  Znum_queries_one2manyreparamZposition_embeddingZnum_feature_levelsr   Z
dec_layersdim_feedforwardi   dropoutg        	norm_typeZpre_normZproposal_feature_levels   Zproposal_min_size2   Zdecoder_typeZglobal_rpe_decompZdecoder_use_checkpointFZdecoder_rpe_hidden_dimi   Zdecoder_rpe_typelinearlayers_to_useZblocks_to_trainZadd_transformer_encodernum_encoder_layersZbackbone_use_layernormnum_classes[   Zaux_losstopk
hidden_dimi   Znheads   )r	   r
         )r+   weightsr,   c                    s   g | ]	}t | j qS r   )int
patch_sizer   mbackboner   r   r    Z   r!   z)_make_dinov3_detector.<locals>.<listcomp>)g      ?r   rA   r6   c                    s   g | ]}| j  d  d qS )r6   r   )n_blocksrE   rG   r   r   r    ^   s       zUnsupported detector weights hashZb0235ff7_z_detr_head-z.pthcpu)map_locationr,   model)strict)r   r   r   )$dictr   ZSINEr   r	   r
   evaln_windows_sqrtrD   Zproposal_in_strideZproposal_tgt_stridesr9   ranger   typer   r   valuelowerospathjoinr   r   r#   hubload_state_dict_from_urlload_state_dictr1   Znum_queriestransformerZtwo_stage_num_proposalsr   r=   r2   r   )r-   r+   r.   r/   r,   kwargsZdetection_kwargsconfigZbackbone_classrS   r   Zdetection_weights_namerK   model_filenameurl
state_dictr   rO   r   rG   r   _make_dinov3_detector(   s   		
!



rd   )r+   rB   r/   r,   rB   c                 K   s   t dd| |||d|S )Nr	   )r-   r+   r.   r/   r,   r   )rd   )r+   rB   r/   r,   r_   r   r   r   dinov3_vit7b16_deu   s   re   )rX   enumr   r#   Zdinov3.eval.detection.configr   Z!dinov3.eval.detection.models.detrr   r   Z.dinov3.eval.detection.models.position_encodingr   	backbonesr   BackboneWeightsr	   r
   r   utilsr   r   nnModuler   strboolrd   r   LVD1689Mre   r   r   r   r   <module>   sH   
O