michaelpiro1 commited on
Commit
6a602c0
·
verified ·
1 Parent(s): 936f855
model_index.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "_class_name": "DDPMPipeline",
3
- "_diffusers_version": "0.29.0",
4
  "scheduler": [
5
  "diffusers",
6
  "DDIMScheduler"
7
  ],
8
  "unet": [
9
- "diffusers",
10
- "UNet2DModel"
11
  ]
12
  }
 
1
  {
2
  "_class_name": "DDPMPipeline",
3
+ "_diffusers_version": "0.29.2",
4
  "scheduler": [
5
  "diffusers",
6
  "DDIMScheduler"
7
  ],
8
  "unet": [
9
+ "audioldm2",
10
+ "AudioLDM2UNet2DConditionModel"
11
  ]
12
  }
scheduler/scheduler_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "_class_name": "DDIMScheduler",
3
- "_diffusers_version": "0.29.0",
4
  "beta_end": 0.0195,
5
  "beta_schedule": "scaled_linear",
6
  "beta_start": 0.0015,
 
1
  {
2
  "_class_name": "DDIMScheduler",
3
+ "_diffusers_version": "0.29.2",
4
  "beta_end": 0.0195,
5
  "beta_schedule": "scaled_linear",
6
  "beta_start": 0.0015,
unet/config.json CHANGED
@@ -1,46 +1,75 @@
1
  {
2
- "_class_name": "UNet2DModel",
3
- "_diffusers_version": "0.29.0",
4
- "_name_or_path": "michaelpiro1/train_model",
5
  "act_fn": "silu",
6
- "add_attention": true,
7
  "attention_head_dim": 8,
8
- "attn_norm_num_groups": null,
9
  "block_out_channels": [
10
  128,
11
  256,
12
  384,
13
  640
14
  ],
15
- "center_input_sample": false,
16
  "class_embed_type": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  "down_block_types": [
18
- "AttnDownBlock2D",
19
- "AttnDownBlock2D",
20
- "AttnDownBlock2D",
21
- "DownBlock2D"
22
  ],
23
  "downsample_padding": 1,
24
- "downsample_type": "conv",
25
- "dropout": 0.0,
26
  "flip_sin_to_cos": true,
27
  "freq_shift": 0,
28
  "in_channels": 16,
29
  "layers_per_block": 2,
30
  "mid_block_scale_factor": 1,
 
31
  "norm_eps": 1e-05,
32
  "norm_num_groups": 32,
 
33
  "num_class_embeds": null,
34
- "num_train_timesteps": null,
35
  "out_channels": 8,
 
36
  "resnet_time_scale_shift": "default",
37
- "sample_size": 512,
 
 
 
38
  "time_embedding_type": "positional",
 
 
39
  "up_block_types": [
40
- "UpBlock2D",
41
- "AttnUpBlock2D",
42
- "AttnUpBlock2D",
43
- "AttnUpBlock2D"
44
  ],
45
- "upsample_type": "conv"
 
46
  }
 
1
  {
2
+ "_class_name": "AudioLDM2UNet2DConditionModel",
3
+ "_diffusers_version": "0.29.2",
4
+ "_name_or_path": "michaelpiro1/new_unet_fromLDM",
5
  "act_fn": "silu",
 
6
  "attention_head_dim": 8,
 
7
  "block_out_channels": [
8
  128,
9
  256,
10
  384,
11
  640
12
  ],
 
13
  "class_embed_type": null,
14
+ "class_embeddings_concat": false,
15
+ "conv_in_kernel": 3,
16
+ "conv_out_kernel": 3,
17
+ "cross_attention_dim": [
18
+ [
19
+ null,
20
+ 768,
21
+ 1024
22
+ ],
23
+ [
24
+ null,
25
+ 768,
26
+ 1024
27
+ ],
28
+ [
29
+ null,
30
+ 768,
31
+ 1024
32
+ ],
33
+ [
34
+ null,
35
+ 768,
36
+ 1024
37
+ ]
38
+ ],
39
  "down_block_types": [
40
+ "DownBlock2D",
41
+ "CrossAttnDownBlock2D",
42
+ "CrossAttnDownBlock2D",
43
+ "CrossAttnDownBlock2D"
44
  ],
45
  "downsample_padding": 1,
 
 
46
  "flip_sin_to_cos": true,
47
  "freq_shift": 0,
48
  "in_channels": 16,
49
  "layers_per_block": 2,
50
  "mid_block_scale_factor": 1,
51
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
52
  "norm_eps": 1e-05,
53
  "norm_num_groups": 32,
54
+ "num_attention_heads": null,
55
  "num_class_embeds": null,
56
+ "only_cross_attention": false,
57
  "out_channels": 8,
58
+ "projection_class_embeddings_input_dim": null,
59
  "resnet_time_scale_shift": "default",
60
+ "sample_size": 256,
61
+ "time_cond_proj_dim": null,
62
+ "time_embedding_act_fn": null,
63
+ "time_embedding_dim": null,
64
  "time_embedding_type": "positional",
65
+ "timestep_post_act": null,
66
+ "transformer_layers_per_block": 1,
67
  "up_block_types": [
68
+ "CrossAttnUpBlock2D",
69
+ "CrossAttnUpBlock2D",
70
+ "CrossAttnUpBlock2D",
71
+ "UpBlock2D"
72
  ],
73
+ "upcast_attention": false,
74
+ "use_linear_projection": false
75
  }
unet/diffusion_pytorch_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a66bdec9ac60336ec587a112d0a8f393e3e03aedf031de9ad896506c988cac71
3
- size 436573952
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f41f2714c77930fc77408b3ee1ddad1edfd3574d33045b51f1574f697db4340
3
+ size 1388001648