Fred commited on
Commit
a7bb718
1 Parent(s): d5dd255

Save config for this run

Browse files
Files changed (1) hide show
  1. config.py +504 -0
config.py ADDED
@@ -0,0 +1,504 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.data.CodeGeneration.APPS_dataloader import APPS
2
+ from src.data.CodeGeneration.MBPP_dataloader import MBPP
3
+ from src.data.Arithmetic.python_scripts.Arithmetic_Dataset import Arithmetic_Dataset
4
+
5
+ DEVICE = "cuda:0"
6
+
7
+
8
+ DEBUG = False
9
+
10
+ config = {
11
+ "model": {
12
+ "codellama": {
13
+ "base_model_id": "codellama/CodeLlama-7b-hf",
14
+ "quantitize": "int8",
15
+ "dataset": "Arithmetic_Simple",
16
+ "data_collator": "DataCollatorForSeq2Seq",
17
+ "lora_config": {
18
+ "r": 16,
19
+ "lora_alpha": 16,
20
+ "target_modules": [
21
+ "q_proj",
22
+ "k_proj",
23
+ "v_proj",
24
+ "o_proj",
25
+ "gate_proj",
26
+ "up_proj",
27
+ "down_proj",
28
+ ],
29
+ "lora_dropout": 0.05,
30
+ "bias": "none",
31
+ "task_type": "CAUSAL_LM",
32
+ },
33
+ "training_args": {
34
+ "output_dir": "codellama-output",
35
+ "warmup_steps": 100,
36
+ "per_device_train_batch_size": 1,
37
+ "per_device_eval_batch_size": 1,
38
+ "gradient_accumulation_steps": 4,
39
+ "max_steps": 10000,
40
+ "learning_rate": 3e-4,
41
+ "optim": "adamw_torch",
42
+ "logging_dir": "codellama-output-logs",
43
+ "logging_steps": 10,
44
+ "save_strategy": "steps",
45
+ "save_steps": 500,
46
+ "load_best_model_at_end": False,
47
+ "group_by_length": True,
48
+ "fp16": True,
49
+ "evaluation_strategy": "steps",
50
+ "eval_steps": 1000,
51
+ # Uncomment this line to set a custom integration to report the results and logs to
52
+ # With transformers v4, the default value is "all"
53
+ # With transformers v5, the default value will be "none"
54
+ # "report_to": "wandb",
55
+ # Uncomment this line to set a custom run name (default ones like "eternal-brook-20"
56
+ # will be used if not set)
57
+ # "run_name": "phi2-code-finetune",
58
+ # Uncomment the following lines to trigger (Hugging Face built-in) evaluation after
59
+ # every X steps of training
60
+ # "evaluation_strategy": "steps",
61
+ # "eval_steps": 200,
62
+ # "do_eval": True,
63
+ },
64
+ "tokenizer": {
65
+ "tokenize_config": {
66
+ "truncation": True,
67
+ "max_length": 192,
68
+ "padding": "max_length",
69
+ },
70
+ "prompt_template": "config/qa_template.txt",
71
+ },
72
+ },
73
+ "phi-2": {
74
+ "base_model_id": "microsoft/phi-2",
75
+ "quantitize": "fp16",
76
+ "dataset": "Arithmetic_Simple",
77
+ "data_collator": "DataCollatorForLanguageModeling",
78
+ "lora_config": {
79
+ "r": 32,
80
+ "lora_alpha": 64,
81
+ "target_modules": [
82
+ "q_proj",
83
+ "k_proj",
84
+ "v_proj",
85
+ "dense",
86
+ "fc1",
87
+ "fc2",
88
+ ],
89
+ "bias": "none",
90
+ "lora_dropout": 0.05,
91
+ "task_type": "CAUSAL_LM",
92
+ },
93
+ "training_args": {
94
+ "output_dir": "phi2-output",
95
+ "warmup_steps": 500,
96
+ # fp16: ~21.5GiB VRAM; ~40h to finish
97
+ "per_device_train_batch_size": 1,
98
+ "per_device_eval_batch_size": 1,
99
+ "gradient_accumulation_steps": 4,
100
+ "max_steps": 100000,
101
+ "learning_rate": 3e-4,
102
+ "optim": "paged_adamw_8bit",
103
+ "logging_dir": "phi2-output-logs",
104
+ "logging_steps": 100,
105
+ "save_strategy": "steps",
106
+ "save_steps": 500,
107
+ "evaluation_strategy": "steps",
108
+ "eval_steps": 500,
109
+ "fp16": True,
110
+ },
111
+ "tokenizer": {
112
+ "tokenize_config": {
113
+ "truncation": True,
114
+ "max_length": 512,
115
+ "padding": "max_length",
116
+ },
117
+ "prompt_template": "config/qa_template.txt",
118
+ },
119
+ },
120
+ "phi-1.5":{
121
+ "base_model_id": "microsoft/phi-1.5",
122
+ "quantitize": "fp16",
123
+ "dataset": "Arithmetic_Hard",
124
+ "data_collator": "DataCollatorForLanguageModeling",
125
+ "lora_config":{
126
+ "r": 32,
127
+ "lora_alpha":64,
128
+ "target_modules":["q_proj", "k_proj", "v_proj"],
129
+ "bias":"none",
130
+ "lora_dropout":0.05,
131
+ "task_type":"CAUSAL_LM",
132
+ },
133
+ "training_args": {
134
+ "output_dir": "phi-output",
135
+ "warmup_steps": 1,
136
+ "per_device_train_batch_size": 1,
137
+ "per_device_eval_batch_size": 1,
138
+ "gradient_accumulation_steps": 4,
139
+ "max_steps": 10000,
140
+ "learning_rate": 3e-4,
141
+ "optim": "paged_adamw_8bit",
142
+ "logging_dir": "phi-output-logs",
143
+ "logging_steps": 10,
144
+ "save_strategy": "steps",
145
+ "save_steps": 500,
146
+ "evaluation_strategy": "steps",
147
+ "eval_steps": 500,
148
+ "fp16": True,
149
+ "report_to": "none",
150
+ },
151
+ "tokenizer": {
152
+ "tokenize_config": {
153
+ "truncation": True,
154
+ "max_length": 512,
155
+ "padding": "max_length",
156
+ },
157
+ "prompt_template": "config/qa_template.txt",
158
+ },
159
+ },
160
+ "roberta":{
161
+ "base_model_id": "FacebookAI/roberta-large",
162
+ "quantitize": "fp16",
163
+ "dataset": "Arithmetic_Hard",
164
+ "data_collator": "DataCollatorForLanguageModeling",
165
+ "lora_config":{
166
+ "r": 32,
167
+ "lora_alpha":64,
168
+ "target_modules":["query", "key", "value"],
169
+ "bias":"none",
170
+ "lora_dropout":0.05,
171
+ "task_type":"CAUSAL_LM",
172
+ },
173
+ "training_args": {
174
+ "output_dir": "roberta-output",
175
+ "warmup_steps": 1,
176
+ "per_device_train_batch_size": 1,
177
+ "per_device_eval_batch_size": 1,
178
+ "gradient_accumulation_steps": 4,
179
+ "max_steps": 10000,
180
+ "learning_rate": 3e-4,
181
+ "optim": "paged_adamw_8bit",
182
+ "logging_dir": "roberta-output-logs",
183
+ "logging_steps": 10,
184
+ "save_strategy": "steps",
185
+ "save_steps": 500,
186
+ "report_to": "none",
187
+ },
188
+ "tokenizer": {
189
+ "tokenize_config": {
190
+ "truncation": True,
191
+ "max_length": 512,
192
+ "padding": "max_length",
193
+ },
194
+ "prompt_template": "config/qa_template.txt",
195
+ },
196
+ },
197
+ "deepseek": {
198
+ "base_model_id": "deepseek-ai/deepseek-coder-1.3b-instruct",
199
+ "quantitize": "bf16",
200
+ "dataset": "Arithmetic_Simple",
201
+ "data_collator": "DataCollatorForLanguageModeling",
202
+ "lora_config": { # trainable params = 30.0 M
203
+ "r": 32,
204
+ "lora_alpha": 64,
205
+ "target_modules": [
206
+ "q_proj",
207
+ "k_proj",
208
+ "v_proj",
209
+ "o_proj",
210
+ "gate_proj",
211
+ "up_proj",
212
+ "down_proj",
213
+ ],
214
+ "bias": "none",
215
+ "lora_dropout": 0.05,
216
+ "task_type": "CAUSAL_LM",
217
+ },
218
+ "lora_large_config": { # trainable params = not checked yet
219
+ "r": 128,
220
+ "lora_alpha": 256,
221
+ "target_modules": [
222
+ "q_proj",
223
+ "k_proj",
224
+ "v_proj",
225
+ "o_proj",
226
+ "gate_proj",
227
+ "up_proj",
228
+ "down_proj",
229
+ ],
230
+ "bias": "none",
231
+ "lora_dropout": 0.05,
232
+ "task_type": "CAUSAL_LM",
233
+ },
234
+ "p_tuning_config": { # Doesn't work, PEFT interface issues
235
+ "num_virtual_tokens": 16,
236
+ "num_transformer_submodules": 1,
237
+ "token_dim": 2048, # NOTE(Shih-Lun): should change w/ base LLM
238
+ "encoder_hidden_size": 2048,
239
+ "task_type": "CAUSAL_LM",
240
+ },
241
+ "training_args": {
242
+ "output_dir": "runs/deepseek-continue",
243
+ "warmup_steps": 500,
244
+ # bf16: ~21.0GiB VRAM; ~21h to finish
245
+ "per_device_train_batch_size": 4,
246
+ "per_device_eval_batch_size": 4,
247
+ "gradient_accumulation_steps": 1,
248
+ "max_steps": 100000,
249
+ "learning_rate": 5e-5,
250
+ "optim": "paged_adamw_8bit",
251
+ "logging_dir": "runs/deepseek-continue/logs",
252
+ "logging_steps": 100,
253
+ "save_strategy": "steps",
254
+ "save_steps": 1000,
255
+ "evaluation_strategy": "steps",
256
+ "eval_steps": 1000,
257
+ "fp16": True,
258
+ },
259
+ "tokenizer": {
260
+ "tokenize_config": {
261
+ "truncation": True,
262
+ "max_length": 512,
263
+ "padding": "max_length",
264
+ },
265
+ "prompt_template": "config/qa_template.txt",
266
+ },
267
+ },
268
+ },
269
+ "dataset": {
270
+ "simple_dataset": {
271
+ "type": "huggingface", # Public datasets on the Hugging Face Hub (only for testing)
272
+ "dataset_purpose": "downstream",
273
+ "name": "b-mc2/sql-create-context",
274
+ "train_split": 0.9,
275
+ "max_train_size": 100,
276
+ "filling_field": ["question", "context", "answer"],
277
+ },
278
+ "testdset": {
279
+ "type": "local", # Local files
280
+ "dataset_purpose": "downstream",
281
+ "train_file": "data/Test/TestDataset.json",
282
+ "val_file": "data/Test/TestDataset.json",
283
+ "test_file": "data/Test/TestDataset.json",
284
+ "filling_field": ["prompted_question", "answer"],
285
+ },
286
+ "APPS_loader": {
287
+ "type": "list-like", # List-like objects (we're going to use this for ablations)
288
+ "dataset_purpose": "downstream",
289
+ "train": "data/APPS/apps_train.json",
290
+ "val": "data/APPS/test/apps_test_1.json",
291
+ "test": "data/APPS/test/apps_test_75.json",
292
+ "filling_field": ["Question", "Answer"],
293
+ },
294
+ "MBPP_loader": {
295
+ "type": "list-like",
296
+ "dataset_purpose": "downstream",
297
+ "train": "data/MBPP/mbpp_train.json",
298
+ "val": "data/MBPP/mbpp_test.json",
299
+ "test": "data/MBPP/mbpp_dev.json",
300
+ "filling_field": ["Question", "Answer"],
301
+ },
302
+ "Arithmetic_Simple": {
303
+ "type": "list-like",
304
+ "dataset_purpose": "downstream",
305
+ "attributes": {
306
+ "subjects": [1, 2, 3, 4, 5, 6, 7, 8, 9],
307
+ "lessons": [
308
+ "Max_Ops1_Bounds0_100",
309
+ "Max_Ops1_Bounds0_1000",
310
+ "Max_Ops2_Bounds0_100",
311
+ "Max_Ops2_Bounds0_1000",
312
+ "Max_Ops3_Bounds0_100",
313
+ "Max_Ops3_Bounds0_1000",
314
+ "Max_Ops4_Bounds0_100",
315
+ "Max_Ops4_Bounds0_1000",
316
+ "Max_Ops5_Bounds0_100",
317
+ "Max_Ops5_Bounds0_1000",
318
+ ]
319
+ },
320
+ "train": "data/Arithmetic/Curriculum_Simple",
321
+ "val": "data/Arithmetic/Curriculum_Simple",
322
+ "test": "data/Arithmetic/Curriculum_Simple",
323
+ "filling_field": ["Question", "Answer"],
324
+ },
325
+ "Arithmetic_Hard": {
326
+ "type": "list-like",
327
+ "dataset_purpose": "downstream",
328
+ "attributes": {
329
+ "subjects": [1, 2, 3, 4, 5, 6, 7, 8, 9],
330
+ "lessons": [
331
+ "Max_Ops1_Bounds-1000_1000",
332
+ "Max_Ops1_Bounds-100_100",
333
+ "Max_Ops1_Bounds0_100",
334
+ "Max_Ops1_Bounds0_1000",
335
+ "Max_Ops2_Bounds-1000_1000",
336
+ "Max_Ops2_Bounds-100_100",
337
+ "Max_Ops2_Bounds0_100",
338
+ "Max_Ops2_Bounds0_1000",
339
+ "Max_Ops3_Bounds-1000_1000",
340
+ "Max_Ops3_Bounds-100_100",
341
+ "Max_Ops3_Bounds0_100",
342
+ "Max_Ops3_Bounds0_1000",
343
+ "Max_Ops4_Bounds-1000_1000",
344
+ "Max_Ops4_Bounds-100_100",
345
+ "Max_Ops4_Bounds0_100",
346
+ "Max_Ops4_Bounds0_1000",
347
+ "Max_Ops5_Bounds-1000_1000",
348
+ "Max_Ops5_Bounds-100_100",
349
+ "Max_Ops5_Bounds0_100",
350
+ "Max_Ops5_Bounds0_1000",
351
+ "Max_Ops6_Bounds-1000_1000",
352
+ "Max_Ops6_Bounds-100_100",
353
+ "Max_Ops6_Bounds0_100",
354
+ "Max_Ops6_Bounds0_1000",
355
+ "Max_Ops7_Bounds-1000_1000",
356
+ "Max_Ops7_Bounds-100_100",
357
+ "Max_Ops7_Bounds0_100",
358
+ "Max_Ops7_Bounds0_1000",
359
+ "Max_Ops8_Bounds-1000_1000",
360
+ "Max_Ops8_Bounds-100_100",
361
+ "Max_Ops8_Bounds0_100",
362
+ "Max_Ops8_Bounds0_1000",
363
+ "Max_Ops9_Bounds-1000_1000",
364
+ "Max_Ops9_Bounds-100_100",
365
+ "Max_Ops9_Bounds0_100",
366
+ "Max_Ops9_Bounds0_1000",
367
+ "Max_Ops10_Bounds-1000_1000",
368
+ "Max_Ops10_Bounds-100_100",
369
+ "Max_Ops10_Bounds0_100",
370
+ "Max_Ops10_Bounds0_1000",
371
+ ]
372
+ },
373
+ "train": "data/Arithmetic/Curriculum_Hard",
374
+ "val": "data/Arithmetic/Curriculum_Hard",
375
+ "test": "data/Arithmetic/Curriculum_Hard",
376
+ "filling_field": ["Question", "Answer"],
377
+ },
378
+ "Arithmetic_XHard": {
379
+ "type": "list-like",
380
+ "dataset_purpose": "downstream",
381
+ "attributes": {
382
+ "subjects": [1, 2, 3, 4, 5, 6, 7, 8, 9],
383
+ "lessons": [
384
+ "Max_Ops10_Bounds0_10000.json",
385
+ "Max_Ops10_Bounds0_1000.json",
386
+ "Max_Ops10_Bounds-10000_10000.json",
387
+ "Max_Ops10_Bounds-1000_1000.json",
388
+ "Max_Ops11_Bounds0_10000.json",
389
+ "Max_Ops11_Bounds0_1000.json",
390
+ "Max_Ops11_Bounds-10000_10000.json",
391
+ "Max_Ops11_Bounds-1000_1000.json",
392
+ "Max_Ops12_Bounds0_10000.json",
393
+ "Max_Ops12_Bounds0_1000.json",
394
+ "Max_Ops12_Bounds-10000_10000.json",
395
+ "Max_Ops12_Bounds-1000_1000.json",
396
+ "Max_Ops13_Bounds0_10000.json",
397
+ "Max_Ops13_Bounds0_1000.json",
398
+ "Max_Ops13_Bounds-10000_10000.json",
399
+ "Max_Ops13_Bounds-1000_1000.json",
400
+ "Max_Ops14_Bounds0_10000.json",
401
+ "Max_Ops14_Bounds0_1000.json",
402
+ "Max_Ops14_Bounds-10000_10000.json",
403
+ "Max_Ops14_Bounds-1000_1000.json",
404
+ "Max_Ops15_Bounds0_10000.json",
405
+ "Max_Ops15_Bounds0_1000.json",
406
+ "Max_Ops15_Bounds-10000_10000.json",
407
+ "Max_Ops15_Bounds-1000_1000.json",
408
+ "Max_Ops16_Bounds0_10000.json",
409
+ "Max_Ops16_Bounds0_1000.json",
410
+ "Max_Ops16_Bounds-10000_10000.json",
411
+ "Max_Ops16_Bounds-1000_1000.json",
412
+ "Max_Ops17_Bounds0_10000.json",
413
+ "Max_Ops17_Bounds0_1000.json",
414
+ "Max_Ops17_Bounds-10000_10000.json",
415
+ "Max_Ops17_Bounds-1000_1000.json",
416
+ "Max_Ops18_Bounds0_10000.json",
417
+ "Max_Ops18_Bounds0_1000.json",
418
+ "Max_Ops18_Bounds-10000_10000.json",
419
+ "Max_Ops18_Bounds-1000_1000.json",
420
+ "Max_Ops19_Bounds0_10000.json",
421
+ "Max_Ops19_Bounds0_1000.json",
422
+ "Max_Ops19_Bounds-10000_10000.json",
423
+ "Max_Ops19_Bounds-1000_1000.json",
424
+ "Max_Ops1_Bounds0_10000.json",
425
+ "Max_Ops1_Bounds0_1000.json",
426
+ "Max_Ops1_Bounds-10000_10000.json",
427
+ "Max_Ops1_Bounds-1000_1000.json",
428
+ "Max_Ops20_Bounds0_10000.json",
429
+ "Max_Ops20_Bounds0_1000.json",
430
+ "Max_Ops20_Bounds-10000_10000.json",
431
+ "Max_Ops20_Bounds-1000_1000.json",
432
+ "Max_Ops2_Bounds0_10000.json",
433
+ "Max_Ops2_Bounds0_1000.json",
434
+ "Max_Ops2_Bounds-10000_10000.json",
435
+ "Max_Ops2_Bounds-1000_1000.json",
436
+ "Max_Ops3_Bounds0_10000.json",
437
+ "Max_Ops3_Bounds0_1000.json",
438
+ "Max_Ops3_Bounds-10000_10000.json",
439
+ "Max_Ops3_Bounds-1000_1000.json",
440
+ "Max_Ops4_Bounds0_10000.json",
441
+ "Max_Ops4_Bounds0_1000.json",
442
+ "Max_Ops4_Bounds-10000_10000.json",
443
+ "Max_Ops4_Bounds-1000_1000.json",
444
+ "Max_Ops5_Bounds0_10000.json",
445
+ "Max_Ops5_Bounds0_1000.json",
446
+ "Max_Ops5_Bounds-10000_10000.json",
447
+ "Max_Ops5_Bounds-1000_1000.json",
448
+ "Max_Ops6_Bounds0_10000.json",
449
+ "Max_Ops6_Bounds0_1000.json",
450
+ "Max_Ops6_Bounds-10000_10000.json",
451
+ "Max_Ops6_Bounds-1000_1000.json",
452
+ "Max_Ops7_Bounds0_10000.json",
453
+ "Max_Ops7_Bounds0_1000.json",
454
+ "Max_Ops7_Bounds-10000_10000.json",
455
+ "Max_Ops7_Bounds-1000_1000.json",
456
+ "Max_Ops8_Bounds0_10000.json",
457
+ "Max_Ops8_Bounds0_1000.json",
458
+ "Max_Ops8_Bounds-10000_10000.json",
459
+ "Max_Ops8_Bounds-1000_1000.json",
460
+ "Max_Ops9_Bounds0_10000.json",
461
+ "Max_Ops9_Bounds0_1000.json",
462
+ "Max_Ops9_Bounds-10000_10000.json",
463
+ "Max_Ops9_Bounds-1000_1000.json",
464
+ ]
465
+ },
466
+ "train": "data/Arithmetic/Curriculum_XHard",
467
+ "val": "data/Arithmetic/Curriculum_XHard",
468
+ "test": "data/Arithmetic/Curriculum_XHard",
469
+ "filling_field": ["Question", "Answer"],
470
+ },
471
+ "GSM8K": {
472
+ "type": "local",
473
+ "dataset_purpose": "downstream",
474
+ "train_file": "data/GSM8K/GSM8K_train.json",
475
+ "val_file": "data/GSM8K/GSM8K_test.json",
476
+ "test_file": "data/GSM8K/GSM8K_dev.json",
477
+ "filling_field": ["Body", "Question", "Answer"],
478
+ },
479
+ "APPS": {
480
+ "type": "local",
481
+ "dataset_purpose": "downstream",
482
+ "train_file": "data/APPS/apps_train.json",
483
+ "val_file": "data/APPS/apps_test.json",
484
+ "test_file": "data/APPS/apps_dev.json",
485
+ "filling_field": ["Body", "Question", "Answer"],
486
+ },
487
+ "ghcode_python": {
488
+ "type": "huggingface",
489
+ "dataset_purpose": "pretrain",
490
+ "name": "slseanwu/ghcode_python_split_700k",
491
+ "max_eval_size": 1000,
492
+ "max_train_size": 160000,
493
+ "filling_field": ["code"],
494
+ },
495
+ },
496
+ }
497
+
498
+
499
+ if DEBUG:
500
+ config.epochs = 100
501
+ config.save_steps = 10
502
+ config.train_dataset = "local-test-train"
503
+ config.val_dataset = "local-test-dev"
504
+ config.test_dataset = "test-clean"