
    Pf                         d dl Z d dlZd dl mZ d dlZd dlmZmZ d dlmZ	 d dl
ZddZ G d dej                        Zd Zd	 Z G d
 dej                        Zy)    N)nn)	rearrangerepeat)
functionalc                 :   | dk(  r(t        j                  t        j                  dd      }|S | dk(  r(t        j                  t        j                  dd      }|S | dk(  r&t        j                  t        j
                  |      }|S | dk(  rd}|S t        d	| z        )
aP  Return a normalization layer
    Parameters:
        norm_type (str) -- the name of the normalization layer: batch | instance | none
    For BatchNorm, we use learnable affine parameters and track running statistics (mean/stddev).
    For InstanceNorm, we do not use learnable affine parameters. We do not track running statistics.
    batchT)affinetrack_running_statsinstanceFgroupnoneNz%normalization layer [%s] is not found)	functoolspartialr   BatchNorm2dInstanceNorm2d	GroupNormNotImplementedError)	norm_typegroup_norm_groups
norm_layers      :/home/cameronsmith/repos/onestep_diffusion/conv_modules.pyget_norm_layerr      s     G&&NN4T

  
j	 &&e

  
g	&&r||5FG

 	 
f	
  ""II"UVV    c                   >     e Zd Z	 	 	 	 	 	 	 	 	 	 	 d fd	ZddZ xZS )PixelNeRFEncoderc           	      p   t         |           || _        || _        t	        |	      }t        d|d        t        t        j                  |      ||      | _	        |
dk7  rt        j                  |
| j                  j                  j                  j                  d   | j                  j                  j                  | j                  j                  j                   | j                  j                  j"                  | j                  j                  j$                        | j                  _        t        j&                         | j                  _        t        j&                         | j                  _        g d|   | _        || _        || _        || _        || _        | j7                  dt9        j:                  d	d	d	d	      d
       | j7                  dt9        j:                  dt8        j<                        d
       t        j&                  t        j                  | j,                  |d	            | _        y )NzUsing torchvisionencoder)
pretrainedr      r   padding_mode)r   @            i   latent   F)
persistentlatent_scaling   )dtype) super__init__feature_scaleuse_first_poolr   printgetattrtorchvisionmodelsmodelr   Conv2dconv1weightshapekernel_sizestridepaddingr!   
Sequentialfcavgpoollatent_size
num_layersindex_interpindex_paddingupsample_interpregister_buffertorchemptyfloat32out)selfbackboner   r@   rA   rB   rC   r.   r/   r   in_chfdim_outr   	__class__s                r   r-   zPixelNeRFEncoder.__init__!   s    	*,#I.
!8Y7:W[//:!j

 A:!yy

  ''--a0

  ,,

  ''

  ((!ZZ--:: DJJ 

]]_

7
C$(*.Xu{{1aA'>5Qekk!5==Ae 	 	
 ==IId&&!4
r   c                    t        |j                        dkD  r6 | |j                  dd      |      j                  d|j                  d d       S | j                  dk7  rFt        j                  || j                  | j                  dkD  rdnd| j                  dkD  rdnd d	      }|j                  | j                  j                  
      }| j                  j                  |      }| j                  j                  |      }| j                  j                  |      }|g}| j                  dkD  rS| j                  r| j                  j!                  |      }| j                  j#                  |      }|j%                  |       | j                  dkD  r,| j                  j'                  |      }|j%                  |       | j                  dkD  r,| j                  j)                  |      }|j%                  |       | j                  dkD  r,| j                  j+                  |      }|j%                  |       || _        | j.                  dk(  rd nd}|d   j                  dd  }t1        t        |            D ]/  }t        j                  ||   ||n|| j2                  |      ||<   1 t5        j6                  |d      | _        | j                  j                  d   | j8                  d<   | j                  j                  d   | j8                  d<   | j8                  | j8                  dz
  z  dz  | _        | j;                  | j                        S )N   r   r'   r*         ?bilinearareaT)scale_factormodealign_cornersrecompute_scale_factor)devicer   znearest )rT   rU   )dimg       @)lenr8   flatten	unflattenr.   Finterpolatetor&   rW   r4   r6   bn1relur@   r/   maxpoollayer1appendlayer2layer3layer4latentsrA   rangerC   rE   catr)   rH   )rI   xcustom_sizeri   rU   	latent_szis          r   forwardzPixelNeRFEncoder.forwardV   s    qww<>$qyy1~k"B"L"LQqwwWYXY{"[[$!//#'#5#5#;Z&*&8&83&>dD'+A DD**D+JJQJJNN1JJOOA#??Q""JJ&&q)

!!!$ANN1??Q

!!!$ANN1??Q

!!!$ANN1??Q

!!!$ANN1 $ 1 1Z ?TAJ$$RS)	s7|$ 	A
(0	k))+	GAJ	 iiQ/!%!2!22!6A!%!2!22!6A"11T5H5H15LMPSSxx$$r   )resnet34TrO   rQ   borderrQ   rP   Tr   r   r%   )N)__name__
__module____qualname__r-   rp   __classcell__rM   s   @r   r   r       s1     "3
j0%r   r   c                     | j                         } | dddf   dz
  dz  | dddf<   | dddf   dz
  dz  | dddf<   | dddf   d	z
  d
z  | dddf<   | S )zi Normalize input images according to ImageNet standards.

    Args:
        x (tensor): input images
    Nr   g
ףp=
?gZd;O?r'   gv/?gy&1?r*   gCl?g?)clone)rl   s    r   normalize_imagenetrz      sn     	
	AAw%'AadGAw%'AadGAw%'AadGHr   c                 x    t        | d      r.t        j                  j                  | j                  ddd       y y )Nr7   g        rb   fan_in)anonlinearityrT   )hasattrr   initkaiming_normal_r7   )ms    r   init_weights_normalr      s0    q(
Cf8T r   c                   *     e Zd ZdZd fd	Zd Z xZS )Resnet18a   ResNet-18 encoder network for image input.
    Args:
        c_dim (int): output dimension of the latent embedding
        normalize (bool): whether the input images should be normalized
        use_linear (bool): whether a final linear layer should be used
    c                 L   t         |           || _        || _        t        j
                  j                  d      | _        |dk7  rt        j                  || j                  j                  j                  j                  d   | j                  j                  j                  | j                  j                  j                  | j                  j                  j                  | j                  j                  j                         | j                  _
        d| _        t        j"                         | j                  _        |r;t        j&                  d|      | _        | j$                  j)                  t*               y |dk(  rt        j"                         | _        y t-        d      )	NT)r   r   r   r    Fr%   z(c_dim must be 512 if use_linear is False)r,   r-   	normalize
use_linearr2   r3   resnet18featuresr   r5   r6   r7   r8   r9   r:   r;   r!   r<   r=   Linearapplyr   
ValueError)rI   c_dimr   r   rK   rM   s        r   r-   zResnet18.__init__   s!   "$#**33t3DA:"$))##**003##//##**##++!]]00==#DMM !DN==?iiU+DGGGMM-.c\mmoDGGHHr   c                     |dz   dz  }| j                   rt        |      }| j                  |      }| j                  |      }|S )Nr'   r*   )r   rz   r   r=   )rI   inputrl   netrH   s        r   rp   zResnet18.forward   s@    QY!O>>"1%AmmAggcl
r   )TTr   )rs   rt   ru   __doc__r-   rp   rv   rw   s   @r   r   r      s    I0r   r   )r       )rE   r2   r   r   einopsr   r   torch.nnr   r^   numpynpr   Moduler   rz   r   r    r   r   <module>r      sJ       $ $ 0f%ryy f%P
U&ryy &r   