choihj0706 commited on
Commit
9ff53eb
·
1 Parent(s): a1f8fa7

Add fine-tuned MusicGen model

Browse files
Files changed (3) hide show
  1. compression_state_dict.bin +3 -0
  2. config.json +338 -0
  3. state_dict.bin +3 -0
compression_state_dict.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b966e4ba458c1e36b14d8ea39b0510afc6ae9ba33cb396b54ba18ca79963ff44
3
+ size 1052
config.json ADDED
@@ -0,0 +1,338 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "transformer_lm.norm_first": {
3
+ "value": true
4
+ },
5
+ "wandb.with_media_logging": {
6
+ "value": true
7
+ },
8
+ "generate.lm.prompt_duration": {
9
+ "value": "None"
10
+ },
11
+ "slurm.time": {
12
+ "value": 3600
13
+ },
14
+ "fuser.cross": {
15
+ "value": "['description']"
16
+ },
17
+ "fsdp.per_block": {
18
+ "value": true
19
+ },
20
+ "fsdp.buffer_dtype": {
21
+ "value": "float32"
22
+ },
23
+ "autocast": {
24
+ "value": true
25
+ },
26
+ "fsdp.param_dtype": {
27
+ "value": "float16"
28
+ },
29
+ "optim.eager_sync": {
30
+ "value": true
31
+ },
32
+ "transformer_lm.emb_lr": {
33
+ "value": "None"
34
+ },
35
+ "channels": {
36
+ "value": 1
37
+ },
38
+ "optim.ema.use": {
39
+ "value": true
40
+ },
41
+ "dataset.shuffle": {
42
+ "value": false
43
+ },
44
+ "generate.every": {
45
+ "value": 25
46
+ },
47
+ "codebooks_pattern.modeling": {
48
+ "value": "delay"
49
+ },
50
+ "metrics.text_consistency.clap.model_arch": {
51
+ "value": "HTSAT-base"
52
+ },
53
+ "generate.audio.loudness_headroom_db": {
54
+ "value": 14
55
+ },
56
+ "fuser.sum": {
57
+ "value": "[]"
58
+ },
59
+ "conditioners.description.t5.word_dropout": {
60
+ "value": 0.3
61
+ },
62
+ "dora.dir": {
63
+ "value": "/checkpoint/choihj/experiments/audiocraft/outputs"
64
+ },
65
+ "tensorboard.with_media_logging": {
66
+ "value": true
67
+ },
68
+ "generate.audio.format": {
69
+ "value": "wav"
70
+ },
71
+ "logging.level": {
72
+ "value": "INFO"
73
+ },
74
+ "slurm.gpus": {
75
+ "value": 4
76
+ },
77
+ "dataset.min_segment_ratio": {
78
+ "value": 0.8
79
+ },
80
+ "interleave_stereo_codebooks.use": {
81
+ "value": false
82
+ },
83
+ "codebooks_pattern.unroll.flattening": {
84
+ "value": "[0, 1, 2, 3]"
85
+ },
86
+ "transformer_lm.two_step_cfg": {
87
+ "value": false
88
+ },
89
+ "optim.updates_per_epoch": {
90
+ "value": 100
91
+ },
92
+ "transformer_lm.depthwise_init": {
93
+ "value": "current"
94
+ },
95
+ "transformer_lm.past_context": {
96
+ "value": "None"
97
+ },
98
+ "metrics.chroma_cosine.chroma_base.sample_rate": {
99
+ "value": 32000
100
+ },
101
+ "fuser.cross_attention_pos_emb_scale": {
102
+ "value": 1
103
+ },
104
+ "optim.epochs": {
105
+ "value": 100
106
+ },
107
+ "transformer_lm.bias_attn": {
108
+ "value": false
109
+ },
110
+ "datasource.valid": {
111
+ "value": "/content/drive/MyDrive/projects/carecruise_intern/audiocraft/egs/eval"
112
+ },
113
+ "tensorboard.sub_dir": {
114
+ "value": "None"
115
+ },
116
+ "generate.num_workers": {
117
+ "value": 5
118
+ },
119
+ "metrics.fad.tf.bin": {
120
+ "value": "None"
121
+ },
122
+ "fsdp.reduce_dtype": {
123
+ "value": "float32"
124
+ },
125
+ "dataset.train.merge_text_p": {
126
+ "value": 0.25
127
+ },
128
+ "schedule.step.gamma": {
129
+ "value": "None"
130
+ },
131
+ "transformer_lm.kv_repeat": {
132
+ "value": 1
133
+ },
134
+ "wandb.group": {
135
+ "value": "None"
136
+ },
137
+ "cache.write": {
138
+ "value": false
139
+ },
140
+ "transformer_lm.causal": {
141
+ "value": true
142
+ },
143
+ "generate.lm.remove_prompts": {
144
+ "value": false
145
+ },
146
+ "metrics.fad.tf.model_path": {
147
+ "value": "//reference/fad/vggish_model.ckpt"
148
+ },
149
+ "evaluate.metrics.base": {
150
+ "value": false
151
+ },
152
+ "generate.num_samples": {
153
+ "value": 5
154
+ },
155
+ "autocast_dtype": {
156
+ "value": "float16"
157
+ },
158
+ "classifier_free_guidance.inference_coef": {
159
+ "value": 3
160
+ },
161
+ "codebooks_pattern.delay.flatten_first": {
162
+ "value": 0
163
+ },
164
+ "dataset.segment_duration": {
165
+ "value": 30
166
+ },
167
+ "slurm.mem_per_gpu": {
168
+ "value": 40
169
+ },
170
+ "datasource.train": {
171
+ "value": "/content/drive/MyDrive/projects/carecruise_intern/audiocraft/egs/train"
172
+ },
173
+ "transformer_lm.layer_scale": {
174
+ "value": "None"
175
+ },
176
+ "num_threads": {
177
+ "value": 1
178
+ },
179
+ "optim.ema.device": {
180
+ "value": "cuda"
181
+ },
182
+ "metrics.text_consistency.use_gt": {
183
+ "value": false
184
+ },
185
+ "schedule.inverse_sqrt.warmup_init_lr": {
186
+ "value": 0
187
+ },
188
+ "evaluate.metrics.text_consistency": {
189
+ "value": false
190
+ },
191
+ "schedule.polynomial_decay.end_lr": {
192
+ "value": 0
193
+ },
194
+ "transformer_lm.num_heads": {
195
+ "value": 16
196
+ },
197
+ "metrics.chroma_cosine.chroma_base.n_chroma": {
198
+ "value": 12
199
+ },
200
+ "dtype": {
201
+ "value": "float32"
202
+ },
203
+ "metrics.kld.model": {
204
+ "value": "passt"
205
+ },
206
+ "evaluate.truncate_audio": {
207
+ "value": "None"
208
+ },
209
+ "checkpoint.save_last": {
210
+ "value": true
211
+ },
212
+ "evaluate.metrics.kld": {
213
+ "value": false
214
+ },
215
+ "optim.optimizer": {
216
+ "value": "adamw"
217
+ },
218
+ "dataset.train.drop_other_p": {
219
+ "value": 0.5
220
+ },
221
+ "transformer_lm.activation": {
222
+ "value": "gelu"
223
+ },
224
+ "evaluate.every": {
225
+ "value": 25
226
+ },
227
+ "fsdp.use": {
228
+ "value": false
229
+ },
230
+ "tokens.padding_with_special_token": {
231
+ "value": false
232
+ },
233
+ "transformer_lm.qk_layer_norm": {
234
+ "value": false
235
+ },
236
+ "device": {
237
+ "value": "cuda"
238
+ },
239
+ "fsdp.sharding_strategy": {
240
+ "value": "shard_grad_op"
241
+ },
242
+ "dataset.train.shuffle": {
243
+ "value": true
244
+ },
245
+ "optim.adam.betas": {
246
+ "value": "[0.9, 0.95]"
247
+ },
248
+ "metrics.kld.use_gt": {
249
+ "value": false
250
+ },
251
+ "dataset.generate.return_info": {
252
+ "value": true
253
+ },
254
+ "dataset.batch_size": {
255
+ "value": 1
256
+ },
257
+ "dataset.sample_on_duration": {
258
+ "value": false
259
+ },
260
+ "schedule.inverse_sqrt.warmup": {
261
+ "value": "None"
262
+ },
263
+ "fuser.prepend": {
264
+ "value": "[]"
265
+ },
266
+ "efficient_attention_backend": {
267
+ "value": "torch"
268
+ },
269
+ "codebooks_pattern.unroll.delays": {
270
+ "value": "[0, 0, 0, 0]"
271
+ },
272
+ "schedule.cosine.warmup": {
273
+ "value": 8
274
+ },
275
+ "schedule.lr_scheduler": {
276
+ "value": "cosine"
277
+ },
278
+ "dataset.valid.num_samples": {
279
+ "value": 1
280
+ },
281
+ "transformer_lm.hidden_scale": {
282
+ "value": 4
283
+ },
284
+ "schedule.exponential.lr_decay": {
285
+ "value": "None"
286
+ },
287
+ "show": {
288
+ "value": false
289
+ },
290
+ "transformer_lm.card": {
291
+ "value": 2048
292
+ },
293
+ "fuser.cross_attention_pos_emb": {
294
+ "value": false
295
+ },
296
+ "conditioners.description.model": {
297
+ "value": "t5"
298
+ },
299
+ "generate.path": {
300
+ "value": "samples"
301
+ },
302
+ "codebooks_pattern.delay.delays": {
303
+ "value": "[0, 1, 2, 3]"
304
+ },
305
+ "transformer_lm.xpos": {
306
+ "value": false
307
+ },
308
+ "logging.log_tensorboard": {
309
+ "value": true
310
+ },
311
+ "benchmark_no_load": {
312
+ "value": false
313
+ },
314
+ "schedule.cosine.lr_min_ratio": {
315
+ "value": 0
316
+ },
317
+ "transformer_lm.custom": {
318
+ "value": false
319
+ },
320
+ "evaluate.metrics.chroma_cosine": {
321
+ "value": false
322
+ },
323
+ "cache.write_shard": {
324
+ "value": 0
325
+ },
326
+ "schedule.polynomial_decay.power": {
327
+ "value": 1
328
+ },
329
+ "generate.audio.strategy": {
330
+ "value": "loudness"
331
+ },
332
+ "transformer_lm.dim": {
333
+ "value": 1024
334
+ },
335
+ "compression_model_checkpoint": {
336
+ "value": "//pretrained/facebook/encodec_32khz"
337
+ }
338
+ }
state_dict.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0c8e12eed36664e4d0f5884843c79eaed90466ab05a6072f352d35e9c982c5d
3
+ size 840844650