Ryukijano commited on
Commit
dff7251
1 Parent(s): b556a2f

LLAMA MESH

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. MacLab-Era3D-512-6view/feature_extractor/preprocessor_config.json +0 -27
  2. MacLab-Era3D-512-6view/image_encoder/config.json +0 -23
  3. MacLab-Era3D-512-6view/image_encoder/model.safetensors +0 -3
  4. MacLab-Era3D-512-6view/image_noising_scheduler/scheduler_config.json +0 -19
  5. MacLab-Era3D-512-6view/image_normalizer/config.json +0 -6
  6. MacLab-Era3D-512-6view/image_normalizer/diffusion_pytorch_model.safetensors +0 -3
  7. MacLab-Era3D-512-6view/model_index.json +0 -40
  8. MacLab-Era3D-512-6view/scheduler/scheduler_config.json +0 -20
  9. MacLab-Era3D-512-6view/text_encoder/config.json +0 -25
  10. MacLab-Era3D-512-6view/text_encoder/model.safetensors +0 -3
  11. MacLab-Era3D-512-6view/tokenizer/merges.txt +0 -0
  12. MacLab-Era3D-512-6view/tokenizer/special_tokens_map.json +0 -24
  13. MacLab-Era3D-512-6view/tokenizer/tokenizer_config.json +0 -38
  14. MacLab-Era3D-512-6view/tokenizer/vocab.json +0 -0
  15. MacLab-Era3D-512-6view/unet/config.json +0 -95
  16. MacLab-Era3D-512-6view/unet/diffusion_pytorch_model.safetensors +0 -3
  17. MacLab-Era3D-512-6view/vae/config.json +0 -32
  18. MacLab-Era3D-512-6view/vae/diffusion_pytorch_model.safetensors +0 -3
  19. assets/advanced/img1.png +0 -3
  20. assets/advanced/img2.png +0 -3
  21. assets/advanced/img3.png +0 -3
  22. assets/advanced/img4.png +0 -3
  23. assets/advanced/img5.png +0 -3
  24. assets/advanced/img6.png +0 -3
  25. assets/advanced/img7.png +0 -3
  26. assets/advanced/img8.png +0 -3
  27. assets/basic/img1.png +0 -3
  28. assets/basic/img2.png +0 -3
  29. assets/basic/img3.png +0 -3
  30. assets/basic/img4.png +0 -3
  31. assets/basic/img5.png +0 -3
  32. assets/basic/img6.png +0 -3
  33. assets/basic/img7.png +0 -3
  34. assets/basic/img_temp2.png +0 -3
  35. configs/inpaint.yaml +0 -31
  36. configs/sd_upsampler.yaml +0 -32
  37. configs/sd_upsampler_temporal.yaml +0 -36
  38. configs/sd_upsampler_train.yaml +0 -36
  39. configs/test_unclip-512-6view.yaml +0 -56
  40. configs/train.yaml +0 -35
  41. configs/train_floyd.yaml +0 -32
  42. configs/train_lora.yaml +0 -28
  43. configs/train_mv.yaml +0 -33
  44. configs/train_mv_256.yaml +0 -33
  45. configs/upsample_gen_single.yaml +0 -37
  46. configs/upsample_generation.yaml +0 -37
  47. examples/3968940-PH.png +0 -0
  48. examples/A_beautiful_cyborg_with_brown_hair_rgba.png +0 -3
  49. examples/A_bulldog_with_a_black_pirate_hat_rgba.png +0 -0
  50. examples/A_pig_wearing_a_backpack_rgba.png +0 -0
MacLab-Era3D-512-6view/feature_extractor/preprocessor_config.json DELETED
@@ -1,27 +0,0 @@
1
- {
2
- "crop_size": {
3
- "height": 224,
4
- "width": 224
5
- },
6
- "do_center_crop": true,
7
- "do_convert_rgb": true,
8
- "do_normalize": true,
9
- "do_rescale": true,
10
- "do_resize": true,
11
- "image_mean": [
12
- 0.48145466,
13
- 0.4578275,
14
- 0.40821073
15
- ],
16
- "image_processor_type": "CLIPImageProcessor",
17
- "image_std": [
18
- 0.26862954,
19
- 0.26130258,
20
- 0.27577711
21
- ],
22
- "resample": 3,
23
- "rescale_factor": 0.00392156862745098,
24
- "size": {
25
- "shortest_edge": 224
26
- }
27
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
MacLab-Era3D-512-6view/image_encoder/config.json DELETED
@@ -1,23 +0,0 @@
1
- {
2
- "_name_or_path": "stabilityai/stable-diffusion-2-1-unclip",
3
- "architectures": [
4
- "CLIPVisionModelWithProjection"
5
- ],
6
- "attention_dropout": 0.0,
7
- "dropout": 0.0,
8
- "hidden_act": "gelu",
9
- "hidden_size": 1280,
10
- "image_size": 224,
11
- "initializer_factor": 1.0,
12
- "initializer_range": 0.02,
13
- "intermediate_size": 5120,
14
- "layer_norm_eps": 1e-05,
15
- "model_type": "clip_vision_model",
16
- "num_attention_heads": 16,
17
- "num_channels": 3,
18
- "num_hidden_layers": 32,
19
- "patch_size": 14,
20
- "projection_dim": 1024,
21
- "torch_dtype": "float16",
22
- "transformers_version": "4.37.2"
23
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
MacLab-Era3D-512-6view/image_encoder/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae616c24393dd1854372b0639e5541666f7521cbe219669255e865cb7f89466a
3
- size 1264217240
 
 
 
 
MacLab-Era3D-512-6view/image_noising_scheduler/scheduler_config.json DELETED
@@ -1,19 +0,0 @@
1
- {
2
- "_class_name": "DDPMScheduler",
3
- "_diffusers_version": "0.26.0.dev0",
4
- "beta_end": 0.02,
5
- "beta_schedule": "squaredcos_cap_v2",
6
- "beta_start": 0.0001,
7
- "clip_sample": true,
8
- "clip_sample_range": 1.0,
9
- "dynamic_thresholding_ratio": 0.995,
10
- "num_train_timesteps": 1000,
11
- "prediction_type": "epsilon",
12
- "rescale_betas_zero_snr": false,
13
- "sample_max_value": 1.0,
14
- "steps_offset": 0,
15
- "thresholding": false,
16
- "timestep_spacing": "leading",
17
- "trained_betas": null,
18
- "variance_type": "fixed_small"
19
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
MacLab-Era3D-512-6view/image_normalizer/config.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "_class_name": "StableUnCLIPImageNormalizer",
3
- "_diffusers_version": "0.26.0.dev0",
4
- "_name_or_path": "stabilityai/stable-diffusion-2-1-unclip",
5
- "embedding_dim": 1024
6
- }
 
 
 
 
 
 
 
MacLab-Era3D-512-6view/image_normalizer/diffusion_pytorch_model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7772cf09639cea0c65639a3bfc88004a66d42259090d03fa8e15efdc255f240a
3
- size 4272
 
 
 
 
MacLab-Era3D-512-6view/model_index.json DELETED
@@ -1,40 +0,0 @@
1
- {
2
- "_class_name": "StableUnCLIPImg2ImgPipeline",
3
- "_diffusers_version": "0.26.0.dev0",
4
- "feature_extractor": [
5
- "transformers",
6
- "CLIPImageProcessor"
7
- ],
8
- "image_encoder": [
9
- "transformers",
10
- "CLIPVisionModelWithProjection"
11
- ],
12
- "image_noising_scheduler": [
13
- "diffusers",
14
- "DDPMScheduler"
15
- ],
16
- "image_normalizer": [
17
- "stable_diffusion",
18
- "StableUnCLIPImageNormalizer"
19
- ],
20
- "scheduler": [
21
- "diffusers",
22
- "DDIMScheduler"
23
- ],
24
- "text_encoder": [
25
- "transformers",
26
- "CLIPTextModel"
27
- ],
28
- "tokenizer": [
29
- "transformers",
30
- "CLIPTokenizer"
31
- ],
32
- "unet": [
33
- "mvdiffusion.models.unet_mv2d_condition",
34
- "UNetMV2DConditionModel"
35
- ],
36
- "vae": [
37
- "diffusers",
38
- "AutoencoderKL"
39
- ]
40
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
MacLab-Era3D-512-6view/scheduler/scheduler_config.json DELETED
@@ -1,20 +0,0 @@
1
- {
2
- "_class_name": "DDIMScheduler",
3
- "_diffusers_version": "0.26.0.dev0",
4
- "beta_end": 0.012,
5
- "beta_schedule": "linear",
6
- "beta_start": 0.00085,
7
- "clip_sample": false,
8
- "clip_sample_range": 1.0,
9
- "dynamic_thresholding_ratio": 0.995,
10
- "num_train_timesteps": 1000,
11
- "prediction_type": "v_prediction",
12
- "rescale_betas_zero_snr": false,
13
- "sample_max_value": 1.0,
14
- "set_alpha_to_one": false,
15
- "skip_prk_steps": true,
16
- "steps_offset": 1,
17
- "thresholding": false,
18
- "timestep_spacing": "leading",
19
- "trained_betas": null
20
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
MacLab-Era3D-512-6view/text_encoder/config.json DELETED
@@ -1,25 +0,0 @@
1
- {
2
- "_name_or_path": "stabilityai/stable-diffusion-2-1-unclip",
3
- "architectures": [
4
- "CLIPTextModel"
5
- ],
6
- "attention_dropout": 0.0,
7
- "bos_token_id": 0,
8
- "dropout": 0.0,
9
- "eos_token_id": 2,
10
- "hidden_act": "gelu",
11
- "hidden_size": 1024,
12
- "initializer_factor": 1.0,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 4096,
15
- "layer_norm_eps": 1e-05,
16
- "max_position_embeddings": 77,
17
- "model_type": "clip_text_model",
18
- "num_attention_heads": 16,
19
- "num_hidden_layers": 23,
20
- "pad_token_id": 1,
21
- "projection_dim": 512,
22
- "torch_dtype": "float16",
23
- "transformers_version": "4.37.2",
24
- "vocab_size": 49408
25
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
MacLab-Era3D-512-6view/text_encoder/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc1827c465450322616f06dea41596eac7d493f4e95904dcb51f0fc745c4e13f
3
- size 680820392
 
 
 
 
MacLab-Era3D-512-6view/tokenizer/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
MacLab-Era3D-512-6view/tokenizer/special_tokens_map.json DELETED
@@ -1,24 +0,0 @@
1
- {
2
- "bos_token": {
3
- "content": "<|startoftext|>",
4
- "lstrip": false,
5
- "normalized": true,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "<|endoftext|>",
11
- "lstrip": false,
12
- "normalized": true,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "pad_token": "!",
17
- "unk_token": {
18
- "content": "<|endoftext|>",
19
- "lstrip": false,
20
- "normalized": true,
21
- "rstrip": false,
22
- "single_word": false
23
- }
24
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
MacLab-Era3D-512-6view/tokenizer/tokenizer_config.json DELETED
@@ -1,38 +0,0 @@
1
- {
2
- "add_prefix_space": false,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "!",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "49406": {
13
- "content": "<|startoftext|>",
14
- "lstrip": false,
15
- "normalized": true,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "49407": {
21
- "content": "<|endoftext|>",
22
- "lstrip": false,
23
- "normalized": true,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- }
28
- },
29
- "bos_token": "<|startoftext|>",
30
- "clean_up_tokenization_spaces": true,
31
- "do_lower_case": true,
32
- "eos_token": "<|endoftext|>",
33
- "errors": "replace",
34
- "model_max_length": 77,
35
- "pad_token": "!",
36
- "tokenizer_class": "CLIPTokenizer",
37
- "unk_token": "<|endoftext|>"
38
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
MacLab-Era3D-512-6view/tokenizer/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
MacLab-Era3D-512-6view/unet/config.json DELETED
@@ -1,95 +0,0 @@
1
- {
2
- "_class_name": "UNetMV2DConditionModel",
3
- "_diffusers_version": "0.26.0.dev0",
4
- "_name_or_path": "../checkpoint_backup/output/unit-unclip-512-6view-randomele-self+row-8w-selfcd-rowmv-2block-linear-depthfilter-step-removewh-bs256-three/checkpoint-40000/unet_ema",
5
- "act_fn": "silu",
6
- "addition_channels": [
7
- 1280,
8
- 1280,
9
- 1280
10
- ],
11
- "addition_downsample": false,
12
- "addition_embed_type": null,
13
- "addition_embed_type_num_heads": 64,
14
- "addition_time_embed_dim": null,
15
- "attention_head_dim": [
16
- 5,
17
- 10,
18
- 20,
19
- 20
20
- ],
21
- "block_out_channels": [
22
- 320,
23
- 640,
24
- 1280,
25
- 1280
26
- ],
27
- "cd_attention_last": false,
28
- "cd_attention_mid": false,
29
- "center_input_sample": false,
30
- "class_embed_type": "projection",
31
- "class_embeddings_concat": false,
32
- "conv_in_kernel": 3,
33
- "conv_out_kernel": 3,
34
- "cross_attention_dim": 1024,
35
- "cross_attention_norm": null,
36
- "decay": 0.9999,
37
- "down_block_types": [
38
- "CrossAttnDownBlockMV2D",
39
- "CrossAttnDownBlockMV2D",
40
- "CrossAttnDownBlockMV2D",
41
- "DownBlock2D"
42
- ],
43
- "downsample_padding": 1,
44
- "dual_cross_attention": false,
45
- "encoder_hid_dim": null,
46
- "encoder_hid_dim_type": null,
47
- "flip_sin_to_cos": true,
48
- "freq_shift": 0,
49
- "in_channels": 8,
50
- "inv_gamma": 1.0,
51
- "layers_per_block": 2,
52
- "mid_block_only_cross_attention": null,
53
- "mid_block_scale_factor": 1,
54
- "mid_block_type": "UNetMidBlockMV2DCrossAttn",
55
- "min_decay": 0.0,
56
- "multiview_attention": true,
57
- "mvcd_attention": true,
58
- "norm_eps": 1e-05,
59
- "norm_num_groups": 32,
60
- "num_attention_heads": null,
61
- "num_class_embeds": null,
62
- "num_regress_blocks": 3,
63
- "num_views": 6,
64
- "only_cross_attention": false,
65
- "optimization_step": 40000,
66
- "out_channels": 4,
67
- "power": 0.6666666666666666,
68
- "projection_camera_embeddings_input_dim": 4,
69
- "projection_class_embeddings_input_dim": 2048,
70
- "regress_elevation": true,
71
- "regress_focal_length": true,
72
- "resnet_out_scale_factor": 1.0,
73
- "resnet_skip_time_act": false,
74
- "resnet_time_scale_shift": "default",
75
- "sample_size": 64,
76
- "selfattn_block": "self_rowwise",
77
- "sparse_mv_attention": true,
78
- "time_cond_proj_dim": null,
79
- "time_embedding_act_fn": null,
80
- "time_embedding_dim": null,
81
- "time_embedding_type": "positional",
82
- "timestep_post_act": null,
83
- "transformer_layers_per_block": 1,
84
- "up_block_types": [
85
- "UpBlock2D",
86
- "CrossAttnUpBlockMV2D",
87
- "CrossAttnUpBlockMV2D",
88
- "CrossAttnUpBlockMV2D"
89
- ],
90
- "upcast_attention": true,
91
- "update_after_step": 0,
92
- "use_dino": false,
93
- "use_ema_warmup": false,
94
- "use_linear_projection": true
95
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
MacLab-Era3D-512-6view/unet/diffusion_pytorch_model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:af12a68fdbfa51bb857422b42bd5ac5101467e22e5e58ea6d8b06dd9426c93af
3
- size 1895432652
 
 
 
 
MacLab-Era3D-512-6view/vae/config.json DELETED
@@ -1,32 +0,0 @@
1
- {
2
- "_class_name": "AutoencoderKL",
3
- "_diffusers_version": "0.26.0.dev0",
4
- "_name_or_path": "stabilityai/stable-diffusion-2-1-unclip",
5
- "act_fn": "silu",
6
- "block_out_channels": [
7
- 128,
8
- 256,
9
- 512,
10
- 512
11
- ],
12
- "down_block_types": [
13
- "DownEncoderBlock2D",
14
- "DownEncoderBlock2D",
15
- "DownEncoderBlock2D",
16
- "DownEncoderBlock2D"
17
- ],
18
- "force_upcast": true,
19
- "in_channels": 3,
20
- "latent_channels": 4,
21
- "layers_per_block": 2,
22
- "norm_num_groups": 32,
23
- "out_channels": 3,
24
- "sample_size": 768,
25
- "scaling_factor": 0.18215,
26
- "up_block_types": [
27
- "UpDecoderBlock2D",
28
- "UpDecoderBlock2D",
29
- "UpDecoderBlock2D",
30
- "UpDecoderBlock2D"
31
- ]
32
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
MacLab-Era3D-512-6view/vae/diffusion_pytorch_model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e4c08995484ee61270175e9e7a072b66a6e4eeb5f0c266667fe1f45b90daf9a
3
- size 167335342
 
 
 
 
assets/advanced/img1.png DELETED

Git LFS Details

  • SHA256: 0b6ee0af34eb5fa6bce208003dbad31301303572172346e978a6069fc45c67c8
  • Pointer size: 132 Bytes
  • Size of remote file: 1.25 MB
assets/advanced/img2.png DELETED

Git LFS Details

  • SHA256: 46b83c749171ee3105795ec7cc35ce4f37fcb2f18e1578ab8f7b41a5972ca4ed
  • Pointer size: 132 Bytes
  • Size of remote file: 1.22 MB
assets/advanced/img3.png DELETED

Git LFS Details

  • SHA256: aea4f2b59581dc012b353ed405e15bc286d24c6211e54748fbc6692be6203479
  • Pointer size: 132 Bytes
  • Size of remote file: 1.53 MB
assets/advanced/img4.png DELETED

Git LFS Details

  • SHA256: 34235863cf886241b37129a4fd3e7d24788ecef3ef29c2038211f1ddfe0cad28
  • Pointer size: 132 Bytes
  • Size of remote file: 1.27 MB
assets/advanced/img5.png DELETED

Git LFS Details

  • SHA256: 8b9b107ff5712c15114cc1dcafb3e847c8035941bb7c74111a54fbc6bfce5188
  • Pointer size: 132 Bytes
  • Size of remote file: 1.41 MB
assets/advanced/img6.png DELETED

Git LFS Details

  • SHA256: cdc8b4a173321f3a01ae7e167770bd3b67d8157310ed8055b2c984626e8589df
  • Pointer size: 132 Bytes
  • Size of remote file: 1.64 MB
assets/advanced/img7.png DELETED

Git LFS Details

  • SHA256: f399a24a822cff263cd5e310b6ef265500f0dfcdb48e92c398405b5761908446
  • Pointer size: 132 Bytes
  • Size of remote file: 1.66 MB
assets/advanced/img8.png DELETED

Git LFS Details

  • SHA256: 4541dbd422a4994b4219e2c89db08f514b406fb3f8d4c45084faeb13d7d6482e
  • Pointer size: 132 Bytes
  • Size of remote file: 1.74 MB
assets/basic/img1.png DELETED

Git LFS Details

  • SHA256: dd92c705faecc52785e9e114762747cfdc748abd770f7967ca3a97859abbc873
  • Pointer size: 132 Bytes
  • Size of remote file: 1.4 MB
assets/basic/img2.png DELETED

Git LFS Details

  • SHA256: 0a5d29c163ce3e6e8ee87c128e606b34ee911c2ff51ba112b0b801bf37f32c0c
  • Pointer size: 132 Bytes
  • Size of remote file: 1.62 MB
assets/basic/img3.png DELETED

Git LFS Details

  • SHA256: 4401231e8f735e12ff6741c4b5da2ce71e8bc9174b9c1f84770e04fad9d7cd63
  • Pointer size: 132 Bytes
  • Size of remote file: 1.39 MB
assets/basic/img4.png DELETED

Git LFS Details

  • SHA256: ca831c0265505b62a316fe5950b84dcfd83cc3a4ff92d721ea42350a01c28862
  • Pointer size: 132 Bytes
  • Size of remote file: 1.4 MB
assets/basic/img5.png DELETED

Git LFS Details

  • SHA256: 81ba340c301fd82a8fe41efd9f877d052b0999b2a62f46a6bc3250528f35bf01
  • Pointer size: 132 Bytes
  • Size of remote file: 1.94 MB
assets/basic/img6.png DELETED

Git LFS Details

  • SHA256: 928cc4876a697603597b5d9273e607cea6bd8c2d08b5c9f70e5b8e354309f845
  • Pointer size: 132 Bytes
  • Size of remote file: 1.94 MB
assets/basic/img7.png DELETED

Git LFS Details

  • SHA256: 8bfcad7363ff53c742fe13a49ecbbbcaea1142dc138a6a8020c21bcbfbce30b8
  • Pointer size: 132 Bytes
  • Size of remote file: 1.88 MB
assets/basic/img_temp2.png DELETED

Git LFS Details

  • SHA256: 8c1a083c1a06637ab7e871270e7b755082db9562ae67219533e06ed5e2b0831c
  • Pointer size: 132 Bytes
  • Size of remote file: 1.78 MB
configs/inpaint.yaml DELETED
@@ -1,31 +0,0 @@
1
- dataset:
2
- num_views_low_res: 8
3
- num_views_high_res: 1
4
- image_root_dir: training/mp3d_skybox
5
- fov: 90
6
- rot_low_res: 45
7
- resolution: 256
8
-
9
- train:
10
- log_dir: sd_upsampler
11
- lr: 0.0002
12
-
13
- test:
14
- fuse_type: single # multidiff
15
-
16
- model:
17
- guidance_scale: 9.
18
- diff_timestep: 1
19
- low_res_noise_level: 30 # from the default SD upsampler setting
20
- model_type: upsample
21
- upsample_model:
22
- model_id: stabilityai/stable-diffusion-x4-upscaler
23
- num_coarse_cp_blocks: 5
24
- lora_layers: True
25
- homo_cp_attn: True
26
- diff_timestep: 75
27
- base_model:
28
- diff_timestep: 50
29
- model_id: stabilityai/stable-diffusion-2-base
30
- lora_layers: True
31
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/sd_upsampler.yaml DELETED
@@ -1,32 +0,0 @@
1
- dataset:
2
- name: mp3d
3
- num_views_low_res: 8
4
- num_views_high_res: 1
5
- image_root_dir: training/mp3d_skybox
6
- fov: 90
7
- rot_low_res: 45
8
- resolution: 128
9
-
10
- train:
11
- log_dir: sd_upsampler
12
- lr: 0.0002
13
-
14
- test:
15
- fuse_type: single # multidiff
16
-
17
- model:
18
- guidance_scale: 9.
19
- diff_timestep: 1
20
- low_res_noise_level: 20 # from the default SD upsampler setting
21
- model_type: upsample
22
- upsample_model:
23
- model_id: stabilityai/stable-diffusion-x4-upscaler
24
- num_coarse_cp_blocks: 5
25
- lora_layers: True
26
- homo_cp_attn: True
27
- diff_timestep: 75
28
- base_model:
29
- diff_timestep: 50
30
- model_id: stabilityai/stable-diffusion-2-base
31
- lora_layers: True
32
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/sd_upsampler_temporal.yaml DELETED
@@ -1,36 +0,0 @@
1
- dataset:
2
- name: mp3d
3
- num_views_low_res: 8
4
- num_views_high_res: 6
5
- image_root_dir: training/mp3d_skybox
6
- fov: 90
7
- rot_low_res: 45
8
- rot_high_res: 45
9
- resolution: 256
10
- resolution_high_res: 1024
11
- crop_size_high_res: 384
12
-
13
- train:
14
- log_dir: sd_upsampler
15
- lr: 0.0001
16
-
17
- test:
18
- fuse_type: single # multidiff
19
-
20
- model:
21
- guidance_scale: 9.
22
- diff_timestep: 1
23
- low_res_noise_level: 20 # from the default SD upsampler setting
24
- model_type: upsample
25
- upsample_model:
26
- model_id: stabilityai/stable-diffusion-x4-upscaler
27
- num_coarse_cp_blocks: 5
28
- lora_layers: False
29
- homo_cp_attn: True
30
- diff_timestep: 75
31
- multiframe_fuse: True
32
- base_model:
33
- diff_timestep: 50
34
- model_id: stabilityai/stable-diffusion-2-base
35
- lora_layers: True
36
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/sd_upsampler_train.yaml DELETED
@@ -1,36 +0,0 @@
1
- dataset:
2
- name: mp3d
3
- num_views_low_res: 8
4
- num_views_high_res: 1
5
- image_root_dir: training/mp3d_skybox
6
- fov: 90
7
- rot_low_res: 45
8
- rot_high_res: 10
9
- resolution: 256
10
- resolution_high_res: 1024
11
- crop_size_high_res: 512
12
-
13
- train:
14
- log_dir: sd_upsampler
15
- lr: 0.000005
16
-
17
- test:
18
- fuse_type: single # multidiff
19
-
20
- model:
21
- guidance_scale: 9.
22
- diff_timestep: 1
23
- low_res_noise_level: 20 # from the default SD upsampler setting
24
- model_type: upsample
25
- upsample_model:
26
- model_id: stabilityai/stable-diffusion-x4-upscaler
27
- num_coarse_cp_blocks: 5
28
- lora_layers: False
29
- homo_cp_attn: True
30
- diff_timestep: 75
31
- multiframe_fuse: False
32
- base_model:
33
- diff_timestep: 50
34
- model_id: stabilityai/stable-diffusion-2-base
35
- lora_layers: True
36
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/test_unclip-512-6view.yaml DELETED
@@ -1,56 +0,0 @@
1
- pretrained_model_name_or_path: 'pengHTYX/MacLab-Era3D-512-6view'
2
- revision: null
3
-
4
- num_views: 6
5
- validation_dataset:
6
- prompt_embeds_path: mvdiffusion/data/fixed_prompt_embeds_6view
7
- root_dir: 'examples'
8
- num_views: ${num_views}
9
- bg_color: 'white'
10
- img_wh: [512, 512]
11
- num_validation_samples: 1000
12
- crop_size: 420
13
-
14
- pred_type: 'joint'
15
- save_dir: 'mv_res'
16
- save_mode: 'rgba' # 'concat', 'rgba', 'rgb'
17
- seed: 42
18
- validation_batch_size: 1
19
- dataloader_num_workers: 1
20
- local_rank: -1
21
-
22
- pipe_kwargs:
23
- num_views: ${num_views}
24
-
25
- validation_guidance_scales: [3.0]
26
- pipe_validation_kwargs:
27
- num_inference_steps: 40
28
- eta: 1.0
29
-
30
- validation_grid_nrow: ${num_views}
31
- regress_elevation: true
32
- regress_focal_length: true
33
- unet_from_pretrained_kwargs:
34
- unclip: true
35
- sdxl: false
36
- num_views: ${num_views}
37
- sample_size: 64
38
- zero_init_conv_in: false # modify
39
-
40
- regress_elevation: ${regress_elevation}
41
- regress_focal_length: ${regress_focal_length}
42
- camera_embedding_type: e_de_da_sincos
43
- projection_camera_embeddings_input_dim: 4 # 2 for elevation and 6 for focal_length
44
- zero_init_camera_projection: false
45
- num_regress_blocks: 3
46
-
47
- cd_attention_last: false
48
- cd_attention_mid: false
49
- multiview_attention: true
50
- sparse_mv_attention: true
51
- selfattn_block: self_rowwise
52
- mvcd_attention: true
53
-
54
- use_dino: false
55
-
56
- enable_xformers_memory_efficient_attention: true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/train.yaml DELETED
@@ -1,35 +0,0 @@
1
- dataset:
2
- name: 'mp3d'
3
- num_views_low_res: 1
4
- num_views_high_res: 1
5
- image_root_dir: training/mp3d_skybox
6
- fov: 90
7
- rot_low_res: 45
8
- rot_high_res: 10
9
- resolution: 512
10
- resolution_high_res: 1024
11
- crop_size_high_res: 256
12
-
13
- train:
14
- log_dir: high_res_upsample
15
- lr: 0.0002
16
-
17
- test:
18
- fuse_type: diffcollage # multidiff
19
-
20
- model:
21
- model_id: stabilityai/stable-diffusion-2-base
22
- guidance_scale: 9.
23
- model_type: base
24
- low_res_noise_level: 20
25
- upsample_model:
26
- num_coarse_cp_blocks: 5
27
- lora_layers: True
28
- homo_cp_attn: True
29
- diff_timestep: 75
30
- base_model:
31
- model_id: stabilityai/stable-diffusion-2-base
32
- diff_timestep: 50
33
- lora_layers: False
34
- single_image_ft: True
35
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/train_floyd.yaml DELETED
@@ -1,32 +0,0 @@
1
- dataset:
2
- name: 'mp3d'
3
- num_views_low_res: 12
4
- num_views_high_res: 1
5
- image_root_dir: training/mp3d_skybox
6
- fov: 90
7
- rot_low_res: 30
8
- rot_high_res: 10
9
- resolution: 256
10
- crop_size_high_res: 256
11
- train:
12
- log_dir: high_res_upsample
13
- lr: 0.0001
14
-
15
- test:
16
- fuse_type: diffcollage # multidiff
17
-
18
- model:
19
- guidance_scale: 9.
20
- model_type: base
21
- low_res_noise_level: 20
22
- upsample_model:
23
- num_coarse_cp_blocks: 5
24
- lora_layers: True
25
- homo_cp_attn: True
26
- diff_timestep: 75
27
- base_model:
28
- model_id: DeepFloyd/IF-I-XL-v1.0
29
- lora_layers: False
30
- single_image_ft: False
31
- diff_timestep: 50
32
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/train_lora.yaml DELETED
@@ -1,28 +0,0 @@
1
- dataset:
2
- num_views_low_res: 1
3
- num_views_high_res: 1
4
- image_root_dir: training/mp3d_skybox
5
- fov: 90
6
- rot_low_res: 45
7
- resolution: 256
8
-
9
- train:
10
- log_dir: high_res_upsample
11
- lr: 0.0002
12
-
13
- test:
14
- fuse_type: diffcollage # multidiff
15
-
16
- model:
17
- model_id: stabilityai/stable-diffusion-2-base
18
- guidance_scale: 9.
19
- diff_timestep: 50
20
- model_type: base
21
- upsample_model:
22
- num_coarse_cp_blocks: 5
23
- lora_layers: True
24
- homo_cp_attn: True
25
- base_model:
26
- lora_layers: True
27
- single_image_ft: True
28
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/train_mv.yaml DELETED
@@ -1,33 +0,0 @@
1
- dataset:
2
- name: 'mp3d'
3
- num_views_low_res: 8
4
- num_views_high_res: 1
5
- image_root_dir: training/mp3d_skybox
6
- fov: 90
7
- rot_low_res: 45
8
- rot_high_res: 10
9
- resolution: 512
10
- resolution_high_res: 1024
11
- crop_size_high_res: 256
12
- train:
13
- log_dir: high_res_upsample
14
- lr: 0.0001
15
-
16
- test:
17
- fuse_type: diffcollage # multidiff
18
-
19
- model:
20
- guidance_scale: 9.
21
- model_type: base
22
- low_res_noise_level: 20
23
- upsample_model:
24
- num_coarse_cp_blocks: 5
25
- lora_layers: True
26
- homo_cp_attn: True
27
- diff_timestep: 75
28
- base_model:
29
- model_id: stabilityai/stable-diffusion-2-base
30
- lora_layers: False
31
- single_image_ft: False
32
- diff_timestep: 50
33
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/train_mv_256.yaml DELETED
@@ -1,33 +0,0 @@
1
- dataset:
2
- name: 'mp3d'
3
- num_views_low_res: 2
4
- num_views_high_res: 1
5
- image_root_dir: training/mp3d_skybox
6
- fov: 90
7
- rot_low_res: 45
8
- rot_high_res: 10
9
- resolution: 256
10
- resolution_high_res: 1024
11
- crop_size_high_res: 256
12
- train:
13
- log_dir: high_res_upsample
14
- lr: 0.00001
15
-
16
- test:
17
- fuse_type: diffcollage # multidiff
18
-
19
- model:
20
- guidance_scale: 9.
21
- model_type: base
22
- low_res_noise_level: 20
23
- upsample_model:
24
- num_coarse_cp_blocks: 5
25
- lora_layers: True
26
- homo_cp_attn: True
27
- diff_timestep: 75
28
- base_model:
29
- model_id: stabilityai/stable-diffusion-2-base
30
- lora_layers: False
31
- single_image_ft: False
32
- diff_timestep: 50
33
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/upsample_gen_single.yaml DELETED
@@ -1,37 +0,0 @@
1
- dataset:
2
- name: generation
3
- num_views_low_res: 8
4
- num_views_high_res: 8
5
- image_root_dir: logs/tb_logs/test_mp3d_base_mv_all=1/version_1/images
6
- resume_dir: logs/tb_logs/test_mp3d_upsample_seperate=2/version_0/images
7
- fov: 90
8
- rot_low_res: 45
9
- rot_high_res: 45
10
- resolution: 256
11
- resolution_high_res: 1024
12
- crop_size_high_res: 1024
13
-
14
- train:
15
- log_dir: sd_upsampler
16
- lr: 0.0002
17
-
18
- test:
19
- fuse_type: single # multidiff
20
-
21
- model:
22
- guidance_scale: 9.
23
- diff_timestep: 1
24
- low_res_noise_level: 50 # from the default SD upsampler setting
25
- model_type: upsample
26
- upsample_model:
27
- model_id: stabilityai/stable-diffusion-x4-upscaler
28
- num_coarse_cp_blocks: 5
29
- lora_layers: False
30
- homo_cp_attn: True
31
- diff_timestep: 75
32
- multiframe_fuse: False
33
- base_model:
34
- diff_timestep: 30
35
- model_id: stabilityai/stable-diffusion-2-base
36
- lora_layers: True
37
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/upsample_generation.yaml DELETED
@@ -1,37 +0,0 @@
1
- dataset:
2
- name: generation
3
- num_views_low_res: 8
4
- num_views_high_res: 8
5
- image_root_dir: logs/tb_logs/test_mp3d_base_mv_all=1/version_1/images
6
- resume_dir: logs/tb_logs/test_mp3d_upsample_all=7/version_0/images
7
- fov: 90
8
- rot_low_res: 45
9
- rot_high_res: 45
10
- resolution: 256
11
- resolution_high_res: 1024
12
- crop_size_high_res: 1024
13
-
14
- train:
15
- log_dir: sd_upsampler
16
- lr: 0.0002
17
-
18
- test:
19
- fuse_type: single # multidiff
20
-
21
- model:
22
- guidance_scale: 9.
23
- diff_timestep: 1
24
- low_res_noise_level: 1 # from the default SD upsampler setting
25
- model_type: upsample
26
- upsample_model:
27
- model_id: stabilityai/stable-diffusion-x4-upscaler
28
- num_coarse_cp_blocks: 5
29
- lora_layers: False
30
- homo_cp_attn: True
31
- diff_timestep: 75
32
- multiframe_fuse: True
33
- base_model:
34
- diff_timestep: 30
35
- model_id: stabilityai/stable-diffusion-2-base
36
- lora_layers: True
37
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/3968940-PH.png DELETED
Binary file (206 kB)
 
examples/A_beautiful_cyborg_with_brown_hair_rgba.png DELETED

Git LFS Details

  • SHA256: 3dd8d815ba5bc0a7e17587f8a4d2cec64d196ba5b5f44fff3fed13e1783de366
  • Pointer size: 132 Bytes
  • Size of remote file: 1.13 MB
examples/A_bulldog_with_a_black_pirate_hat_rgba.png DELETED
Binary file (488 kB)
 
examples/A_pig_wearing_a_backpack_rgba.png DELETED
Binary file (652 kB)