File size: 2,405 Bytes
e54b8f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
cond_image_size: 512
isosurface_resolution: 160
radius: 0.87

camera_embedder_cls: sf3d.models.camera.LinearCameraEmbedder
camera_embedder:
  in_channels: 25
  out_channels: 768
  conditions:
    - c2w_cond
    - intrinsic_normed_cond

image_tokenizer_cls: sf3d.models.tokenizers.image.DINOV2SingleImageTokenizer
image_tokenizer:
  pretrained_model_name_or_path: "facebook/dinov2-large"
  width: 512
  height: 512
  modulation_cond_dim: 768

tokenizer_cls: sf3d.models.tokenizers.triplane.TriplaneLearnablePositionalEmbedding
tokenizer:
  plane_size: 96
  num_channels: 1024

backbone_cls: sf3d.models.transformers.backbone.TwoStreamInterleaveTransformer
backbone:
  num_attention_heads: 16
  attention_head_dim: 64
  raw_triplane_channels: 1024
  triplane_channels: 1024
  raw_image_channels: 1024 # DINO features
  num_latents: 1792
  num_blocks: 4
  num_basic_blocks: 3

post_processor_cls: sf3d.models.network.PixelShuffleUpsampleNetwork
post_processor:
  in_channels: 1024
  out_channels: 40
  scale_factor: 4
  conv_layers: 4


decoder_cls: sf3d.models.network.MaterialMLP
decoder:
  in_channels: 120
  n_neurons: 64
  activation: silu
  heads:
    - name: density
      out_channels: 1
      out_bias: -1.0
      n_hidden_layers: 2
      output_activation: trunc_exp
    - name: features
      out_channels: 3
      n_hidden_layers: 3
      output_activation: sigmoid
    - name: perturb_normal
      out_channels: 3
      n_hidden_layers: 3
      output_activation: normalize_channel_last
    - name: vertex_offset
      out_channels: 3
      n_hidden_layers: 2

image_estimator_cls: sf3d.models.image_estimator.clip_based_estimator.ClipBasedHeadEstimator
image_estimator:
  distribution: beta
  distribution_eval: mode
  heads:
    - name: roughness
      out_channels: 1
      n_hidden_layers: 3
      output_activation: linear
      add_to_decoder_features: true
      output_bias: 1.0
      shape: [-1, 1, 1]
    - name: metallic
      out_channels: 1
      n_hidden_layers: 3
      output_activation: linear
      add_to_decoder_features: true
      output_bias: 1.0
      shape: [-1, 1, 1]

global_estimator_cls: sf3d.models.global_estimator.multi_head_estimator.MultiHeadEstimator
global_estimator:
  triplane_features: 1024
  heads:
    - name: sg_amplitudes
      out_channels: 24
      n_hidden_layers: 3
      output_activation: softplus
      output_bias: 1.0
      shape: [-1, 24, 1]