File size: 7,534 Bytes
c3a2df4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1fa8ce9
c3a2df4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
{
  "image_size_encoder": 256,
  "triplane_scaling_divider": 0.96806,
  "diffusion_input_size": 32,
  "trainer_name": "flow_matching",
  "use_amp": true,
  "clip_denoised": false,
  "num_samples": 4,
  "num_instances": 10,
  "use_ddim": false,
  "ddpm_model_path": "",
  "cldm_model_path": "",
  "rec_model_path": "",
  "logdir": "./logs/LSGM/inference/Objaverse/i23d/dit-L2/",
  "data_dir": "NONE",
  "eval_data_dir": "/cpfs01/user/lanyushi.p/Repo/eccv24/open-source/InstantMesh/test_dir",
  "eval_batch_size": 1,
  "num_workers": 0,
  "overfitting": false,
  "image_size": 256,
  "iterations": 5000001,
  "schedule_sampler": "uniform",
  "anneal_lr": false,
  "lr": 2e-05,
  "weight_decay": 0.05,
  "lr_anneal_steps": 0,
  "batch_size": 1,
  "microbatch": 1,
  "ema_rate": "0.9999",
  "log_interval": 50,
  "eval_interval": 5000,
  "save_interval": 10000,
  "resume_checkpoint": "checkpoints/objaverse/objaverse-dit/i23d/model_joint_denoise_rec_model2990000.safetensors",
  "resume_cldm_checkpoint": "",
  "resume_checkpoint_EG3D": "",
  "use_fp16": false,
  "fp16_scale_growth": 0.001,
  "load_submodule_name": "",
  "ignore_resume_opt": false,
  "freeze_ae": false,
  "denoised_ae": true,
  "prompt": "a red chair",
  "interval": 5,
  "save_img": false,
  "use_train_trajectory": false,
  "unconditional_guidance_scale": 6.5,
  "use_eos_feature": false,
  "export_mesh": false,
  "cond_key": "img",
  "allow_tf32": true,
  "num_channels": 320,
  "num_res_blocks": 2,
  "num_heads": 8,
  "num_heads_upsample": -1,
  "num_head_channels": -1,
  "attention_resolutions": "4,2,1",
  "channel_mult": "",
  "dropout": 0.0,
  "class_cond": false,
  "use_checkpoint": false,
  "use_scale_shift_norm": true,
  "resblock_updown": false,
  "use_new_attention_order": false,
  "denoise_in_channels": 4,
  "denoise_out_channels": 4,
  "create_controlnet": false,
  "create_dit": true,
  "i23d": true,
  "create_unet_with_hint": false,
  "dit_model_arch": "DiT-PixArt-L/2",
  "use_spatial_transformer": true,
  "transformer_depth": 1,
  "context_dim": 1024,
  "pooling_ctx_dim": 768,
  "roll_out": true,
  "n_embed": null,
  "legacy": true,
  "mixing_logit_init": -6,
  "hint_channels": 3,
  "learn_sigma": false,
  "diffusion_steps": 1000,
  "noise_schedule": "linear",
  "standarization_xt": false,
  "timestep_respacing": "",
  "use_kl": false,
  "predict_xstart": false,
  "predict_v": true,
  "rescale_timesteps": false,
  "rescale_learned_sigmas": false,
  "mixed_prediction": false,
  "dino_version": "mv-sd-dit-dynaInp-trilatent",
  "encoder_in_channels": 10,
  "img_size": [
    256
  ],
  "patch_size": 14,
  "in_chans": 384,
  "num_classes": 0,
  "embed_dim": 384,
  "depth": 6,
  "mlp_ratio": 4.0,
  "qkv_bias": false,
  "qk_scale": null,
  "drop_rate": 0.1,
  "attn_drop_rate": 0.0,
  "drop_path_rate": 0.0,
  "norm_layer": "nn.LayerNorm",
  "cls_token": false,
  "encoder_cls_token": false,
  "decoder_cls_token": false,
  "sr_kwargs": {},
  "sr_ratio": 2,
  "use_clip": false,
  "arch_encoder": "vits",
  "arch_decoder": "vitb",
  "load_pretrain_encoder": false,
  "encoder_lr": 1e-05,
  "encoder_weight_decay": 0.001,
  "no_dim_up_mlp": true,
  "dim_up_mlp_as_func": false,
  "decoder_load_pretrained": false,
  "uvit_skip_encoder": true,
  "vae_p": 2,
  "ldm_z_channels": 4,
  "ldm_embed_dim": 4,
  "use_conf_map": false,
  "sd_E_ch": 64,
  "z_channels": 12,
  "sd_E_num_res_blocks": 1,
  "num_frames": 6,
  "arch_dit_decoder": "DiT2-L/2",
  "return_all_dit_layers": false,
  "lrm_decoder": false,
  "plane_n": 3,
  "gs_rendering": false,
  "decomposed": true,
  "triplane_fg_bg": false,
  "cfg": "objverse_tuneray_aug_resolution_64_64_auto",
  "density_reg": 0.0,
  "density_reg_p_dist": 0.004,
  "reg_type": "l1",
  "triplane_decoder_lr": 5e-05,
  "super_resolution_lr": 5e-05,
  "c_scale": 1,
  "nsr_lr": 0.02,
  "triplane_size": 224,
  "decoder_in_chans": 32,
  "triplane_in_chans": 32,
  "decoder_output_dim": 3,
  "out_chans": 96,
  "c_dim": 25,
  "ray_start": 0.6,
  "ray_end": 1.8,
  "rendering_kwargs": {
    "image_resolution": 256,
    "disparity_space_sampling": false,
    "clamp_mode": "softplus",
    "c_gen_conditioning_zero": true,
    "c_scale": 1,
    "superresolution_noise_mode": "none",
    "density_reg": 0.0,
    "density_reg_p_dist": 0.004,
    "reg_type": "l1",
    "decoder_lr_mul": 1,
    "decoder_activation": "sigmoid",
    "sr_antialias": true,
    "return_triplane_features": false,
    "return_sampling_details_flag": true,
    "superresolution_module": "utils.torch_utils.components.NearestConvSR",
    "depth_resolution": 64,
    "depth_resolution_importance": 64,
    "ray_start": "auto",
    "ray_end": "auto",
    "box_warp": 0.9,
    "white_back": true,
    "radius_range": [
      1.5,
      2
    ],
    "sampler_bbox_min": -0.45,
    "sampler_bbox_max": 0.45,
    "filter_out_of_bbox": true,
    "PatchRaySampler": true,
    "patch_rendering_resolution": 45,
    "z_near": 1.05,
    "z_far": 2.45
  },
  "sr_training": false,
  "bcg_synthesis": false,
  "bcg_synthesis_kwargs": {},
  "patch_rendering_resolution": 45,
  "vit_decoder_lr": 1e-05,
  "vit_decoder_wd": 0.001,
  "ae_classname": "vit.vit_triplane.RodinSR_256_fusionv6_ConvQuant_liteSR_dinoInit3DAttn_SD_B_3L_C_withrollout_withSD_D_ditDecoder",
  "color_criterion": "mse",
  "l2_lambda": 1.0,
  "lpips_lambda": 0.8,
  "lpips_delay_iter": 0,
  "sr_delay_iter": 0,
  "kl_anneal": false,
  "latent_lambda": 0.0,
  "latent_criterion": "mse",
  "kl_lambda": 0.0,
  "ssim_lambda": 0.0,
  "l1_lambda": 0.0,
  "id_lambda": 0.0,
  "depth_lambda": 0.0,
  "alpha_lambda": 1.0,
  "fg_mse": false,
  "bg_lamdba": 0.01,
  "density_reg_every": 4,
  "shape_uniform_lambda": 0.005,
  "shape_importance_lambda": 0.01,
  "shape_depth_lambda": 0.0,
  "rec_cvD_lambda": 0.01,
  "nvs_cvD_lambda": 0.025,
  "patchgan_disc_factor": 0.01,
  "patchgan_disc_g_weight": 0.2,
  "r1_gamma": 1.0,
  "sds_lamdba": 1.0,
  "nvs_D_lr_mul": 1,
  "cano_D_lr_mul": 1,
  "ce_balanced_kl": 1.0,
  "p_eps_lambda": 1,
  "symmetry_loss": false,
  "depth_smoothness_lambda": 0.0,
  "ce_lambda": 0.5,
  "negative_entropy_lambda": 0.5,
  "grad_clip": true,
  "online_mask": false,
  "sde_time_eps": 0.01,
  "sde_beta_start": 0.1,
  "sde_beta_end": 20.0,
  "sde_sde_type": "vpsde",
  "sde_sigma2_0": 0.0,
  "iw_sample_p": "drop_sigma2t_iw",
  "iw_sample_q": "ll_iw",
  "iw_subvp_like_vp_sde": false,
  "train_vae": false,
  "pred_type": "v",
  "p_rendering_loss": false,
  "unfix_logit": false,
  "loss_type": "eps",
  "loss_weight": "simple",
  "diffusion_ce_anneal": true,
  "enable_mixing_normal": false,
  "only_mid_control": false,
  "control_key": "img",
  "normalize_clip_encoding": true,
  "scale_clip_encoding": 1.0,
  "cfg_dropout_prob": 0.1,
  "use_lmdb": false,
  "use_wds": false,
  "use_lmdb_compressed": false,
  "compile": false,
  "objv_dataset": true,
  "decode_encode_img_only": false,
  "load_wds_diff": true,
  "load_wds_latent": false,
  "eval_load_wds_instance": true,
  "shards_lst": "",
  "eval_shards_lst": "",
  "mv_input": true,
  "duplicate_sample": true,
  "orthog_duplicate": false,
  "split_chunk_input": false,
  "load_real": true,
  "four_view_for_latent": false,
  "single_view_for_i23d": false,
  "shuffle_across_cls": true,
  "load_extra_36_view": false,
  "mv_latent_dir": "",
  "append_depth": false,
  "plucker_embedding": true,
  "gs_cam_format": false,
  "split_chunk_size": 8,
  "path_type": "Linear",
  "prediction": "velocity",
  "sample_eps": null,
  "train_eps": null,
  "snr_type": "lognorm",
  "local_rank": 0,
  "gpus": 1
}