TachyHealthResearch commited on
Commit
042a5a5
·
1 Parent(s): 0c36757

Training in progress, step 15, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -19,12 +19,12 @@
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
- "gate_proj",
23
- "o_proj",
24
  "k_proj",
25
- "v_proj",
26
  "down_proj",
 
 
27
  "up_proj",
 
28
  "q_proj"
29
  ],
30
  "task_type": "CAUSAL_LM"
 
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
 
 
22
  "k_proj",
 
23
  "down_proj",
24
+ "o_proj",
25
+ "v_proj",
26
  "up_proj",
27
+ "gate_proj",
28
  "q_proj"
29
  ],
30
  "task_type": "CAUSAL_LM"
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:298922edea9bd506a442bf3faaefa327c3431746fa35ad70b63b6f230de6e185
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e111b7672c5d2a8f54e7966b44c3966154845bb02803264d2c43c90f3a082e65
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9fadd4982efdcbaf2e6a0a037cb5c1976c4fca115cb4ce7206ff477d1da3b273
3
  size 42545748
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dc26a38ae8770b17827642290912a8d4383f5eea19af27d7c96b34013b80420
3
  size 42545748
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5cf2042332884d26b423c910e7e944b9fb3bdc6234db05b1536b08f0d4fc221f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a81816f8c172759b66460138682effef4b6abce5a23baf230c81f044a0c8ede6
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a60c7d771c1fd156acee762fba03c724cb41829a3f71df370ecd1d20b134982
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d495c26c3e057e915a49e6c3755bab17f11955ce2bc01fcf07818c9d8f4dad2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.04,
5
  "eval_steps": 1000,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -23,597 +23,87 @@
23
  {
24
  "epoch": 0.0,
25
  "learning_rate": 0.0002,
26
- "loss": 1.6708,
27
  "step": 3
28
  },
29
  {
30
  "epoch": 0.0,
31
- "learning_rate": 0.00019793814432989693,
32
- "loss": 1.9559,
33
  "step": 4
34
  },
35
  {
36
  "epoch": 0.0,
37
- "learning_rate": 0.00019587628865979381,
38
- "loss": 1.6901,
39
  "step": 5
40
  },
41
  {
42
  "epoch": 0.0,
43
- "learning_rate": 0.00019381443298969073,
44
- "loss": 2.2035,
45
  "step": 6
46
  },
47
  {
48
  "epoch": 0.0,
49
- "learning_rate": 0.00019175257731958765,
50
- "loss": 2.4562,
51
  "step": 7
52
  },
53
  {
54
  "epoch": 0.0,
55
- "learning_rate": 0.00018969072164948454,
56
- "loss": 1.4477,
57
  "step": 8
58
  },
59
  {
60
  "epoch": 0.0,
61
- "learning_rate": 0.00018762886597938145,
62
- "loss": 1.938,
63
  "step": 9
64
  },
65
  {
66
  "epoch": 0.0,
67
- "learning_rate": 0.00018556701030927837,
68
- "loss": 1.941,
69
  "step": 10
70
  },
71
  {
72
  "epoch": 0.0,
73
- "learning_rate": 0.00018350515463917526,
74
- "loss": 1.7198,
75
  "step": 11
76
  },
77
  {
78
  "epoch": 0.0,
79
- "learning_rate": 0.00018144329896907217,
80
- "loss": 1.96,
81
  "step": 12
82
  },
83
  {
84
  "epoch": 0.01,
85
- "learning_rate": 0.0001793814432989691,
86
- "loss": 1.7657,
87
  "step": 13
88
  },
89
  {
90
  "epoch": 0.01,
91
- "learning_rate": 0.00017731958762886598,
92
- "loss": 1.7391,
93
  "step": 14
94
  },
95
  {
96
  "epoch": 0.01,
97
- "learning_rate": 0.0001752577319587629,
98
- "loss": 1.7447,
99
  "step": 15
100
- },
101
- {
102
- "epoch": 0.01,
103
- "learning_rate": 0.0001731958762886598,
104
- "loss": 1.5251,
105
- "step": 16
106
- },
107
- {
108
- "epoch": 0.01,
109
- "learning_rate": 0.0001711340206185567,
110
- "loss": 1.7227,
111
- "step": 17
112
- },
113
- {
114
- "epoch": 0.01,
115
- "learning_rate": 0.00016907216494845361,
116
- "loss": 2.5511,
117
- "step": 18
118
- },
119
- {
120
- "epoch": 0.01,
121
- "learning_rate": 0.00016701030927835053,
122
- "loss": 1.4848,
123
- "step": 19
124
- },
125
- {
126
- "epoch": 0.01,
127
- "learning_rate": 0.00016494845360824742,
128
- "loss": 1.9532,
129
- "step": 20
130
- },
131
- {
132
- "epoch": 0.01,
133
- "learning_rate": 0.00016288659793814434,
134
- "loss": 1.879,
135
- "step": 21
136
- },
137
- {
138
- "epoch": 0.01,
139
- "learning_rate": 0.00016082474226804125,
140
- "loss": 1.7689,
141
- "step": 22
142
- },
143
- {
144
- "epoch": 0.01,
145
- "learning_rate": 0.00015876288659793814,
146
- "loss": 1.5917,
147
- "step": 23
148
- },
149
- {
150
- "epoch": 0.01,
151
- "learning_rate": 0.00015670103092783506,
152
- "loss": 1.9113,
153
- "step": 24
154
- },
155
- {
156
- "epoch": 0.01,
157
- "learning_rate": 0.00015463917525773197,
158
- "loss": 1.5997,
159
- "step": 25
160
- },
161
- {
162
- "epoch": 0.01,
163
- "learning_rate": 0.00015257731958762886,
164
- "loss": 1.7558,
165
- "step": 26
166
- },
167
- {
168
- "epoch": 0.01,
169
- "learning_rate": 0.00015051546391752578,
170
- "loss": 1.4693,
171
- "step": 27
172
- },
173
- {
174
- "epoch": 0.01,
175
- "learning_rate": 0.0001484536082474227,
176
- "loss": 1.498,
177
- "step": 28
178
- },
179
- {
180
- "epoch": 0.01,
181
- "learning_rate": 0.00014639175257731958,
182
- "loss": 1.6517,
183
- "step": 29
184
- },
185
- {
186
- "epoch": 0.01,
187
- "learning_rate": 0.0001443298969072165,
188
- "loss": 1.5836,
189
- "step": 30
190
- },
191
- {
192
- "epoch": 0.01,
193
- "learning_rate": 0.00014226804123711342,
194
- "loss": 1.7213,
195
- "step": 31
196
- },
197
- {
198
- "epoch": 0.01,
199
- "learning_rate": 0.0001402061855670103,
200
- "loss": 1.4432,
201
- "step": 32
202
- },
203
- {
204
- "epoch": 0.01,
205
- "learning_rate": 0.00013814432989690722,
206
- "loss": 2.1706,
207
- "step": 33
208
- },
209
- {
210
- "epoch": 0.01,
211
- "learning_rate": 0.00013608247422680414,
212
- "loss": 1.613,
213
- "step": 34
214
- },
215
- {
216
- "epoch": 0.01,
217
- "learning_rate": 0.00013402061855670103,
218
- "loss": 1.7608,
219
- "step": 35
220
- },
221
- {
222
- "epoch": 0.01,
223
- "learning_rate": 0.00013195876288659794,
224
- "loss": 1.6158,
225
- "step": 36
226
- },
227
- {
228
- "epoch": 0.01,
229
- "learning_rate": 0.00012989690721649486,
230
- "loss": 1.5101,
231
- "step": 37
232
- },
233
- {
234
- "epoch": 0.02,
235
- "learning_rate": 0.00012783505154639175,
236
- "loss": 1.5626,
237
- "step": 38
238
- },
239
- {
240
- "epoch": 0.02,
241
- "learning_rate": 0.00012577319587628866,
242
- "loss": 2.1865,
243
- "step": 39
244
- },
245
- {
246
- "epoch": 0.02,
247
- "learning_rate": 0.00012371134020618558,
248
- "loss": 1.4066,
249
- "step": 40
250
- },
251
- {
252
- "epoch": 0.02,
253
- "learning_rate": 0.00012164948453608247,
254
- "loss": 1.4324,
255
- "step": 41
256
- },
257
- {
258
- "epoch": 0.02,
259
- "learning_rate": 0.00011958762886597938,
260
- "loss": 1.7029,
261
- "step": 42
262
- },
263
- {
264
- "epoch": 0.02,
265
- "learning_rate": 0.0001175257731958763,
266
- "loss": 1.9695,
267
- "step": 43
268
- },
269
- {
270
- "epoch": 0.02,
271
- "learning_rate": 0.00011546391752577319,
272
- "loss": 1.7016,
273
- "step": 44
274
- },
275
- {
276
- "epoch": 0.02,
277
- "learning_rate": 0.0001134020618556701,
278
- "loss": 1.768,
279
- "step": 45
280
- },
281
- {
282
- "epoch": 0.02,
283
- "learning_rate": 0.00011134020618556702,
284
- "loss": 1.3867,
285
- "step": 46
286
- },
287
- {
288
- "epoch": 0.02,
289
- "learning_rate": 0.00010927835051546391,
290
- "loss": 1.8445,
291
- "step": 47
292
- },
293
- {
294
- "epoch": 0.02,
295
- "learning_rate": 0.00010721649484536083,
296
- "loss": 1.5515,
297
- "step": 48
298
- },
299
- {
300
- "epoch": 0.02,
301
- "learning_rate": 0.00010515463917525774,
302
- "loss": 1.7586,
303
- "step": 49
304
- },
305
- {
306
- "epoch": 0.02,
307
- "learning_rate": 0.00010309278350515463,
308
- "loss": 1.4246,
309
- "step": 50
310
- },
311
- {
312
- "epoch": 0.02,
313
- "learning_rate": 0.00010103092783505155,
314
- "loss": 1.7533,
315
- "step": 51
316
- },
317
- {
318
- "epoch": 0.02,
319
- "learning_rate": 9.896907216494846e-05,
320
- "loss": 1.6105,
321
- "step": 52
322
- },
323
- {
324
- "epoch": 0.02,
325
- "learning_rate": 9.690721649484537e-05,
326
- "loss": 1.5276,
327
- "step": 53
328
- },
329
- {
330
- "epoch": 0.02,
331
- "learning_rate": 9.484536082474227e-05,
332
- "loss": 1.3974,
333
- "step": 54
334
- },
335
- {
336
- "epoch": 0.02,
337
- "learning_rate": 9.278350515463918e-05,
338
- "loss": 1.4832,
339
- "step": 55
340
- },
341
- {
342
- "epoch": 0.02,
343
- "learning_rate": 9.072164948453609e-05,
344
- "loss": 1.695,
345
- "step": 56
346
- },
347
- {
348
- "epoch": 0.02,
349
- "learning_rate": 8.865979381443299e-05,
350
- "loss": 1.6554,
351
- "step": 57
352
- },
353
- {
354
- "epoch": 0.02,
355
- "learning_rate": 8.65979381443299e-05,
356
- "loss": 1.5769,
357
- "step": 58
358
- },
359
- {
360
- "epoch": 0.02,
361
- "learning_rate": 8.453608247422681e-05,
362
- "loss": 1.8362,
363
- "step": 59
364
- },
365
- {
366
- "epoch": 0.02,
367
- "learning_rate": 8.247422680412371e-05,
368
- "loss": 1.5832,
369
- "step": 60
370
- },
371
- {
372
- "epoch": 0.02,
373
- "learning_rate": 8.041237113402063e-05,
374
- "loss": 1.6469,
375
- "step": 61
376
- },
377
- {
378
- "epoch": 0.02,
379
- "learning_rate": 7.835051546391753e-05,
380
- "loss": 1.5835,
381
- "step": 62
382
- },
383
- {
384
- "epoch": 0.03,
385
- "learning_rate": 7.628865979381443e-05,
386
- "loss": 1.5521,
387
- "step": 63
388
- },
389
- {
390
- "epoch": 0.03,
391
- "learning_rate": 7.422680412371135e-05,
392
- "loss": 1.3237,
393
- "step": 64
394
- },
395
- {
396
- "epoch": 0.03,
397
- "learning_rate": 7.216494845360825e-05,
398
- "loss": 1.413,
399
- "step": 65
400
- },
401
- {
402
- "epoch": 0.03,
403
- "learning_rate": 7.010309278350515e-05,
404
- "loss": 1.9074,
405
- "step": 66
406
- },
407
- {
408
- "epoch": 0.03,
409
- "learning_rate": 6.804123711340207e-05,
410
- "loss": 1.5583,
411
- "step": 67
412
- },
413
- {
414
- "epoch": 0.03,
415
- "learning_rate": 6.597938144329897e-05,
416
- "loss": 1.5104,
417
- "step": 68
418
- },
419
- {
420
- "epoch": 0.03,
421
- "learning_rate": 6.391752577319587e-05,
422
- "loss": 1.6876,
423
- "step": 69
424
- },
425
- {
426
- "epoch": 0.03,
427
- "learning_rate": 6.185567010309279e-05,
428
- "loss": 1.5563,
429
- "step": 70
430
- },
431
- {
432
- "epoch": 0.03,
433
- "learning_rate": 5.979381443298969e-05,
434
- "loss": 2.1585,
435
- "step": 71
436
- },
437
- {
438
- "epoch": 0.03,
439
- "learning_rate": 5.7731958762886594e-05,
440
- "loss": 1.8855,
441
- "step": 72
442
- },
443
- {
444
- "epoch": 0.03,
445
- "learning_rate": 5.567010309278351e-05,
446
- "loss": 1.3202,
447
- "step": 73
448
- },
449
- {
450
- "epoch": 0.03,
451
- "learning_rate": 5.360824742268041e-05,
452
- "loss": 1.3524,
453
- "step": 74
454
- },
455
- {
456
- "epoch": 0.03,
457
- "learning_rate": 5.1546391752577315e-05,
458
- "loss": 1.4767,
459
- "step": 75
460
- },
461
- {
462
- "epoch": 0.03,
463
- "learning_rate": 4.948453608247423e-05,
464
- "loss": 1.8857,
465
- "step": 76
466
- },
467
- {
468
- "epoch": 0.03,
469
- "learning_rate": 4.7422680412371134e-05,
470
- "loss": 1.6848,
471
- "step": 77
472
- },
473
- {
474
- "epoch": 0.03,
475
- "learning_rate": 4.536082474226804e-05,
476
- "loss": 1.3765,
477
- "step": 78
478
- },
479
- {
480
- "epoch": 0.03,
481
- "learning_rate": 4.329896907216495e-05,
482
- "loss": 1.4421,
483
- "step": 79
484
- },
485
- {
486
- "epoch": 0.03,
487
- "learning_rate": 4.1237113402061855e-05,
488
- "loss": 1.5687,
489
- "step": 80
490
- },
491
- {
492
- "epoch": 0.03,
493
- "learning_rate": 3.9175257731958764e-05,
494
- "loss": 1.4066,
495
- "step": 81
496
- },
497
- {
498
- "epoch": 0.03,
499
- "learning_rate": 3.7113402061855674e-05,
500
- "loss": 1.6676,
501
- "step": 82
502
- },
503
- {
504
- "epoch": 0.03,
505
- "learning_rate": 3.5051546391752576e-05,
506
- "loss": 1.3302,
507
- "step": 83
508
- },
509
- {
510
- "epoch": 0.03,
511
- "learning_rate": 3.2989690721649485e-05,
512
- "loss": 2.014,
513
- "step": 84
514
- },
515
- {
516
- "epoch": 0.03,
517
- "learning_rate": 3.0927835051546395e-05,
518
- "loss": 1.6257,
519
- "step": 85
520
- },
521
- {
522
- "epoch": 0.03,
523
- "learning_rate": 2.8865979381443297e-05,
524
- "loss": 1.5508,
525
- "step": 86
526
- },
527
- {
528
- "epoch": 0.03,
529
- "learning_rate": 2.6804123711340206e-05,
530
- "loss": 1.4105,
531
- "step": 87
532
- },
533
- {
534
- "epoch": 0.04,
535
- "learning_rate": 2.4742268041237116e-05,
536
- "loss": 1.789,
537
- "step": 88
538
- },
539
- {
540
- "epoch": 0.04,
541
- "learning_rate": 2.268041237113402e-05,
542
- "loss": 1.7302,
543
- "step": 89
544
- },
545
- {
546
- "epoch": 0.04,
547
- "learning_rate": 2.0618556701030927e-05,
548
- "loss": 1.7847,
549
- "step": 90
550
- },
551
- {
552
- "epoch": 0.04,
553
- "learning_rate": 1.8556701030927837e-05,
554
- "loss": 1.489,
555
- "step": 91
556
- },
557
- {
558
- "epoch": 0.04,
559
- "learning_rate": 1.6494845360824743e-05,
560
- "loss": 1.0728,
561
- "step": 92
562
- },
563
- {
564
- "epoch": 0.04,
565
- "learning_rate": 1.4432989690721649e-05,
566
- "loss": 1.5944,
567
- "step": 93
568
- },
569
- {
570
- "epoch": 0.04,
571
- "learning_rate": 1.2371134020618558e-05,
572
- "loss": 2.0057,
573
- "step": 94
574
- },
575
- {
576
- "epoch": 0.04,
577
- "learning_rate": 1.0309278350515464e-05,
578
- "loss": 1.481,
579
- "step": 95
580
- },
581
- {
582
- "epoch": 0.04,
583
- "learning_rate": 8.247422680412371e-06,
584
- "loss": 1.478,
585
- "step": 96
586
- },
587
- {
588
- "epoch": 0.04,
589
- "learning_rate": 6.185567010309279e-06,
590
- "loss": 1.6756,
591
- "step": 97
592
- },
593
- {
594
- "epoch": 0.04,
595
- "learning_rate": 4.123711340206186e-06,
596
- "loss": 1.6028,
597
- "step": 98
598
- },
599
- {
600
- "epoch": 0.04,
601
- "learning_rate": 2.061855670103093e-06,
602
- "loss": 1.6606,
603
- "step": 99
604
- },
605
- {
606
- "epoch": 0.04,
607
- "learning_rate": 0.0,
608
- "loss": 1.4798,
609
- "step": 100
610
  }
611
  ],
612
  "logging_steps": 1,
613
- "max_steps": 100,
614
  "num_train_epochs": 1,
615
  "save_steps": 5,
616
- "total_flos": 6616414021533696.0,
617
  "trial_name": null,
618
  "trial_params": null
619
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.006,
5
  "eval_steps": 1000,
6
+ "global_step": 15,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
23
  {
24
  "epoch": 0.0,
25
  "learning_rate": 0.0002,
26
+ "loss": 1.6718,
27
  "step": 3
28
  },
29
  {
30
  "epoch": 0.0,
31
+ "learning_rate": 0.00019722222222222225,
32
+ "loss": 1.9564,
33
  "step": 4
34
  },
35
  {
36
  "epoch": 0.0,
37
+ "learning_rate": 0.00019444444444444446,
38
+ "loss": 1.6946,
39
  "step": 5
40
  },
41
  {
42
  "epoch": 0.0,
43
+ "learning_rate": 0.00019166666666666667,
44
+ "loss": 2.2475,
45
  "step": 6
46
  },
47
  {
48
  "epoch": 0.0,
49
+ "learning_rate": 0.00018888888888888888,
50
+ "loss": 2.4487,
51
  "step": 7
52
  },
53
  {
54
  "epoch": 0.0,
55
+ "learning_rate": 0.00018611111111111112,
56
+ "loss": 1.4369,
57
  "step": 8
58
  },
59
  {
60
  "epoch": 0.0,
61
+ "learning_rate": 0.00018333333333333334,
62
+ "loss": 1.9525,
63
  "step": 9
64
  },
65
  {
66
  "epoch": 0.0,
67
+ "learning_rate": 0.00018055555555555557,
68
+ "loss": 1.9572,
69
  "step": 10
70
  },
71
  {
72
  "epoch": 0.0,
73
+ "learning_rate": 0.00017777777777777779,
74
+ "loss": 1.727,
75
  "step": 11
76
  },
77
  {
78
  "epoch": 0.0,
79
+ "learning_rate": 0.000175,
80
+ "loss": 1.9873,
81
  "step": 12
82
  },
83
  {
84
  "epoch": 0.01,
85
+ "learning_rate": 0.00017222222222222224,
86
+ "loss": 1.7861,
87
  "step": 13
88
  },
89
  {
90
  "epoch": 0.01,
91
+ "learning_rate": 0.00016944444444444445,
92
+ "loss": 1.7475,
93
  "step": 14
94
  },
95
  {
96
  "epoch": 0.01,
97
+ "learning_rate": 0.0001666666666666667,
98
+ "loss": 1.7548,
99
  "step": 15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  }
101
  ],
102
  "logging_steps": 1,
103
+ "max_steps": 75,
104
  "num_train_epochs": 1,
105
  "save_steps": 5,
106
+ "total_flos": 960031721619456.0,
107
  "trial_name": null,
108
  "trial_params": null
109
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5059c653756752cb24f5c1887d956f8a30f1a817ab283357ddbf07235a6cac86
3
  size 4600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b27b54f4df6b3ea01a348f2fcdb61d482edb9033e83476fc3a25b4bfa991e681
3
  size 4600