renqiux0302 commited on
Commit
cac1bb7
·
verified ·
1 Parent(s): cc50ad7

Delete trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +0 -787
trainer_state.json DELETED
@@ -1,787 +0,0 @@
1
- {
2
- "best_metric": 0.3883955776691437,
3
- "best_model_checkpoint": "exp/vicuna-7b-lora-sft-code_qa_desc_summ_triplet_r_16_alpha_32_8GPUs-0116/checkpoint-1200",
4
- "epoch": 4.375569735642662,
5
- "eval_steps": 200,
6
- "global_step": 1200,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.04,
13
- "learning_rate": 2.9999999999999997e-05,
14
- "loss": 1.4343,
15
- "step": 10
16
- },
17
- {
18
- "epoch": 0.07,
19
- "learning_rate": 5.9999999999999995e-05,
20
- "loss": 1.4848,
21
- "step": 20
22
- },
23
- {
24
- "epoch": 0.11,
25
- "learning_rate": 8.999999999999999e-05,
26
- "loss": 1.1941,
27
- "step": 30
28
- },
29
- {
30
- "epoch": 0.15,
31
- "learning_rate": 0.00011999999999999999,
32
- "loss": 0.8226,
33
- "step": 40
34
- },
35
- {
36
- "epoch": 0.18,
37
- "learning_rate": 0.00015,
38
- "loss": 0.6671,
39
- "step": 50
40
- },
41
- {
42
- "epoch": 0.22,
43
- "learning_rate": 0.00017999999999999998,
44
- "loss": 0.5676,
45
- "step": 60
46
- },
47
- {
48
- "epoch": 0.26,
49
- "learning_rate": 0.00020999999999999998,
50
- "loss": 0.5655,
51
- "step": 70
52
- },
53
- {
54
- "epoch": 0.29,
55
- "learning_rate": 0.00023999999999999998,
56
- "loss": 0.5251,
57
- "step": 80
58
- },
59
- {
60
- "epoch": 0.33,
61
- "learning_rate": 0.00027,
62
- "loss": 0.4845,
63
- "step": 90
64
- },
65
- {
66
- "epoch": 0.36,
67
- "learning_rate": 0.0003,
68
- "loss": 0.481,
69
- "step": 100
70
- },
71
- {
72
- "epoch": 0.4,
73
- "learning_rate": 0.0002976377952755905,
74
- "loss": 0.4565,
75
- "step": 110
76
- },
77
- {
78
- "epoch": 0.44,
79
- "learning_rate": 0.0002952755905511811,
80
- "loss": 0.4625,
81
- "step": 120
82
- },
83
- {
84
- "epoch": 0.47,
85
- "learning_rate": 0.00029291338582677163,
86
- "loss": 0.4584,
87
- "step": 130
88
- },
89
- {
90
- "epoch": 0.51,
91
- "learning_rate": 0.00029055118110236217,
92
- "loss": 0.4425,
93
- "step": 140
94
- },
95
- {
96
- "epoch": 0.55,
97
- "learning_rate": 0.0002881889763779527,
98
- "loss": 0.4573,
99
- "step": 150
100
- },
101
- {
102
- "epoch": 0.58,
103
- "learning_rate": 0.0002858267716535433,
104
- "loss": 0.4361,
105
- "step": 160
106
- },
107
- {
108
- "epoch": 0.62,
109
- "learning_rate": 0.00028346456692913383,
110
- "loss": 0.4396,
111
- "step": 170
112
- },
113
- {
114
- "epoch": 0.66,
115
- "learning_rate": 0.00028110236220472436,
116
- "loss": 0.4391,
117
- "step": 180
118
- },
119
- {
120
- "epoch": 0.69,
121
- "learning_rate": 0.00027874015748031495,
122
- "loss": 0.418,
123
- "step": 190
124
- },
125
- {
126
- "epoch": 0.73,
127
- "learning_rate": 0.0002763779527559055,
128
- "loss": 0.4469,
129
- "step": 200
130
- },
131
- {
132
- "epoch": 0.73,
133
- "eval_loss": 0.4269736409187317,
134
- "eval_runtime": 19.352,
135
- "eval_samples_per_second": 103.348,
136
- "eval_steps_per_second": 1.654,
137
- "step": 200
138
- },
139
- {
140
- "epoch": 0.77,
141
- "learning_rate": 0.0002740157480314961,
142
- "loss": 0.4149,
143
- "step": 210
144
- },
145
- {
146
- "epoch": 0.8,
147
- "learning_rate": 0.00027165354330708656,
148
- "loss": 0.428,
149
- "step": 220
150
- },
151
- {
152
- "epoch": 0.84,
153
- "learning_rate": 0.00026929133858267715,
154
- "loss": 0.4248,
155
- "step": 230
156
- },
157
- {
158
- "epoch": 0.88,
159
- "learning_rate": 0.0002669291338582677,
160
- "loss": 0.4249,
161
- "step": 240
162
- },
163
- {
164
- "epoch": 0.91,
165
- "learning_rate": 0.0002645669291338582,
166
- "loss": 0.4331,
167
- "step": 250
168
- },
169
- {
170
- "epoch": 0.95,
171
- "learning_rate": 0.0002622047244094488,
172
- "loss": 0.4192,
173
- "step": 260
174
- },
175
- {
176
- "epoch": 0.98,
177
- "learning_rate": 0.00025984251968503934,
178
- "loss": 0.4204,
179
- "step": 270
180
- },
181
- {
182
- "epoch": 1.02,
183
- "learning_rate": 0.00025748031496062993,
184
- "loss": 0.4318,
185
- "step": 280
186
- },
187
- {
188
- "epoch": 1.06,
189
- "learning_rate": 0.00025511811023622047,
190
- "loss": 0.4229,
191
- "step": 290
192
- },
193
- {
194
- "epoch": 1.09,
195
- "learning_rate": 0.000252755905511811,
196
- "loss": 0.4214,
197
- "step": 300
198
- },
199
- {
200
- "epoch": 1.13,
201
- "learning_rate": 0.00025039370078740154,
202
- "loss": 0.416,
203
- "step": 310
204
- },
205
- {
206
- "epoch": 1.17,
207
- "learning_rate": 0.00024803149606299207,
208
- "loss": 0.4199,
209
- "step": 320
210
- },
211
- {
212
- "epoch": 1.2,
213
- "learning_rate": 0.00024566929133858266,
214
- "loss": 0.4218,
215
- "step": 330
216
- },
217
- {
218
- "epoch": 1.24,
219
- "learning_rate": 0.0002433070866141732,
220
- "loss": 0.4113,
221
- "step": 340
222
- },
223
- {
224
- "epoch": 1.28,
225
- "learning_rate": 0.00024094488188976376,
226
- "loss": 0.4185,
227
- "step": 350
228
- },
229
- {
230
- "epoch": 1.31,
231
- "learning_rate": 0.00023858267716535432,
232
- "loss": 0.4168,
233
- "step": 360
234
- },
235
- {
236
- "epoch": 1.35,
237
- "learning_rate": 0.00023622047244094488,
238
- "loss": 0.4162,
239
- "step": 370
240
- },
241
- {
242
- "epoch": 1.39,
243
- "learning_rate": 0.0002338582677165354,
244
- "loss": 0.4175,
245
- "step": 380
246
- },
247
- {
248
- "epoch": 1.42,
249
- "learning_rate": 0.00023149606299212595,
250
- "loss": 0.4045,
251
- "step": 390
252
- },
253
- {
254
- "epoch": 1.46,
255
- "learning_rate": 0.00022913385826771652,
256
- "loss": 0.4152,
257
- "step": 400
258
- },
259
- {
260
- "epoch": 1.46,
261
- "eval_loss": 0.4086858630180359,
262
- "eval_runtime": 19.2818,
263
- "eval_samples_per_second": 103.725,
264
- "eval_steps_per_second": 1.66,
265
- "step": 400
266
- },
267
- {
268
- "epoch": 1.49,
269
- "learning_rate": 0.00022677165354330705,
270
- "loss": 0.415,
271
- "step": 410
272
- },
273
- {
274
- "epoch": 1.53,
275
- "learning_rate": 0.00022440944881889761,
276
- "loss": 0.4091,
277
- "step": 420
278
- },
279
- {
280
- "epoch": 1.57,
281
- "learning_rate": 0.00022204724409448818,
282
- "loss": 0.4132,
283
- "step": 430
284
- },
285
- {
286
- "epoch": 1.6,
287
- "learning_rate": 0.00021968503937007874,
288
- "loss": 0.3985,
289
- "step": 440
290
- },
291
- {
292
- "epoch": 1.64,
293
- "learning_rate": 0.00021732283464566927,
294
- "loss": 0.4056,
295
- "step": 450
296
- },
297
- {
298
- "epoch": 1.68,
299
- "learning_rate": 0.0002149606299212598,
300
- "loss": 0.4005,
301
- "step": 460
302
- },
303
- {
304
- "epoch": 1.71,
305
- "learning_rate": 0.00021259842519685037,
306
- "loss": 0.4059,
307
- "step": 470
308
- },
309
- {
310
- "epoch": 1.75,
311
- "learning_rate": 0.0002102362204724409,
312
- "loss": 0.409,
313
- "step": 480
314
- },
315
- {
316
- "epoch": 1.79,
317
- "learning_rate": 0.00020787401574803147,
318
- "loss": 0.4031,
319
- "step": 490
320
- },
321
- {
322
- "epoch": 1.82,
323
- "learning_rate": 0.00020551181102362203,
324
- "loss": 0.4097,
325
- "step": 500
326
- },
327
- {
328
- "epoch": 1.86,
329
- "learning_rate": 0.0002031496062992126,
330
- "loss": 0.4017,
331
- "step": 510
332
- },
333
- {
334
- "epoch": 1.9,
335
- "learning_rate": 0.00020078740157480313,
336
- "loss": 0.4026,
337
- "step": 520
338
- },
339
- {
340
- "epoch": 1.93,
341
- "learning_rate": 0.0001984251968503937,
342
- "loss": 0.4106,
343
- "step": 530
344
- },
345
- {
346
- "epoch": 1.97,
347
- "learning_rate": 0.00019606299212598423,
348
- "loss": 0.395,
349
- "step": 540
350
- },
351
- {
352
- "epoch": 2.01,
353
- "learning_rate": 0.0001937007874015748,
354
- "loss": 0.3988,
355
- "step": 550
356
- },
357
- {
358
- "epoch": 2.04,
359
- "learning_rate": 0.00019133858267716532,
360
- "loss": 0.409,
361
- "step": 560
362
- },
363
- {
364
- "epoch": 2.08,
365
- "learning_rate": 0.00018897637795275589,
366
- "loss": 0.3997,
367
- "step": 570
368
- },
369
- {
370
- "epoch": 2.11,
371
- "learning_rate": 0.00018661417322834645,
372
- "loss": 0.4007,
373
- "step": 580
374
- },
375
- {
376
- "epoch": 2.15,
377
- "learning_rate": 0.000184251968503937,
378
- "loss": 0.3905,
379
- "step": 590
380
- },
381
- {
382
- "epoch": 2.19,
383
- "learning_rate": 0.00018188976377952755,
384
- "loss": 0.4005,
385
- "step": 600
386
- },
387
- {
388
- "epoch": 2.19,
389
- "eval_loss": 0.40032637119293213,
390
- "eval_runtime": 19.2818,
391
- "eval_samples_per_second": 103.725,
392
- "eval_steps_per_second": 1.66,
393
- "step": 600
394
- },
395
- {
396
- "epoch": 2.22,
397
- "learning_rate": 0.0001795275590551181,
398
- "loss": 0.3983,
399
- "step": 610
400
- },
401
- {
402
- "epoch": 2.26,
403
- "learning_rate": 0.00017716535433070864,
404
- "loss": 0.3881,
405
- "step": 620
406
- },
407
- {
408
- "epoch": 2.3,
409
- "learning_rate": 0.00017480314960629918,
410
- "loss": 0.4008,
411
- "step": 630
412
- },
413
- {
414
- "epoch": 2.33,
415
- "learning_rate": 0.00017244094488188974,
416
- "loss": 0.3927,
417
- "step": 640
418
- },
419
- {
420
- "epoch": 2.37,
421
- "learning_rate": 0.0001700787401574803,
422
- "loss": 0.4005,
423
- "step": 650
424
- },
425
- {
426
- "epoch": 2.41,
427
- "learning_rate": 0.00016771653543307086,
428
- "loss": 0.3962,
429
- "step": 660
430
- },
431
- {
432
- "epoch": 2.44,
433
- "learning_rate": 0.0001653543307086614,
434
- "loss": 0.3902,
435
- "step": 670
436
- },
437
- {
438
- "epoch": 2.48,
439
- "learning_rate": 0.00016299212598425196,
440
- "loss": 0.3911,
441
- "step": 680
442
- },
443
- {
444
- "epoch": 2.52,
445
- "learning_rate": 0.00016062992125984252,
446
- "loss": 0.3891,
447
- "step": 690
448
- },
449
- {
450
- "epoch": 2.55,
451
- "learning_rate": 0.00015826771653543303,
452
- "loss": 0.3939,
453
- "step": 700
454
- },
455
- {
456
- "epoch": 2.59,
457
- "learning_rate": 0.0001559055118110236,
458
- "loss": 0.4001,
459
- "step": 710
460
- },
461
- {
462
- "epoch": 2.63,
463
- "learning_rate": 0.00015354330708661416,
464
- "loss": 0.3918,
465
- "step": 720
466
- },
467
- {
468
- "epoch": 2.66,
469
- "learning_rate": 0.00015118110236220472,
470
- "loss": 0.3979,
471
- "step": 730
472
- },
473
- {
474
- "epoch": 2.7,
475
- "learning_rate": 0.00014881889763779525,
476
- "loss": 0.3793,
477
- "step": 740
478
- },
479
- {
480
- "epoch": 2.73,
481
- "learning_rate": 0.00014645669291338582,
482
- "loss": 0.3879,
483
- "step": 750
484
- },
485
- {
486
- "epoch": 2.77,
487
- "learning_rate": 0.00014409448818897635,
488
- "loss": 0.3915,
489
- "step": 760
490
- },
491
- {
492
- "epoch": 2.81,
493
- "learning_rate": 0.00014173228346456691,
494
- "loss": 0.3831,
495
- "step": 770
496
- },
497
- {
498
- "epoch": 2.84,
499
- "learning_rate": 0.00013937007874015748,
500
- "loss": 0.3838,
501
- "step": 780
502
- },
503
- {
504
- "epoch": 2.88,
505
- "learning_rate": 0.00013700787401574804,
506
- "loss": 0.3734,
507
- "step": 790
508
- },
509
- {
510
- "epoch": 2.92,
511
- "learning_rate": 0.00013464566929133857,
512
- "loss": 0.3872,
513
- "step": 800
514
- },
515
- {
516
- "epoch": 2.92,
517
- "eval_loss": 0.3944130539894104,
518
- "eval_runtime": 19.2596,
519
- "eval_samples_per_second": 103.844,
520
- "eval_steps_per_second": 1.662,
521
- "step": 800
522
- },
523
- {
524
- "epoch": 2.95,
525
- "learning_rate": 0.0001322834645669291,
526
- "loss": 0.386,
527
- "step": 810
528
- },
529
- {
530
- "epoch": 2.99,
531
- "learning_rate": 0.00012992125984251967,
532
- "loss": 0.3799,
533
- "step": 820
534
- },
535
- {
536
- "epoch": 3.03,
537
- "learning_rate": 0.00012755905511811023,
538
- "loss": 0.3895,
539
- "step": 830
540
- },
541
- {
542
- "epoch": 3.06,
543
- "learning_rate": 0.00012519685039370077,
544
- "loss": 0.3852,
545
- "step": 840
546
- },
547
- {
548
- "epoch": 3.1,
549
- "learning_rate": 0.00012283464566929133,
550
- "loss": 0.3879,
551
- "step": 850
552
- },
553
- {
554
- "epoch": 3.14,
555
- "learning_rate": 0.00012047244094488188,
556
- "loss": 0.3892,
557
- "step": 860
558
- },
559
- {
560
- "epoch": 3.17,
561
- "learning_rate": 0.00011811023622047244,
562
- "loss": 0.3801,
563
- "step": 870
564
- },
565
- {
566
- "epoch": 3.21,
567
- "learning_rate": 0.00011574803149606298,
568
- "loss": 0.3802,
569
- "step": 880
570
- },
571
- {
572
- "epoch": 3.25,
573
- "learning_rate": 0.00011338582677165353,
574
- "loss": 0.3863,
575
- "step": 890
576
- },
577
- {
578
- "epoch": 3.28,
579
- "learning_rate": 0.00011102362204724409,
580
- "loss": 0.3792,
581
- "step": 900
582
- },
583
- {
584
- "epoch": 3.32,
585
- "learning_rate": 0.00010866141732283464,
586
- "loss": 0.3923,
587
- "step": 910
588
- },
589
- {
590
- "epoch": 3.35,
591
- "learning_rate": 0.00010629921259842519,
592
- "loss": 0.3753,
593
- "step": 920
594
- },
595
- {
596
- "epoch": 3.39,
597
- "learning_rate": 0.00010393700787401573,
598
- "loss": 0.3777,
599
- "step": 930
600
- },
601
- {
602
- "epoch": 3.43,
603
- "learning_rate": 0.0001015748031496063,
604
- "loss": 0.3849,
605
- "step": 940
606
- },
607
- {
608
- "epoch": 3.46,
609
- "learning_rate": 9.921259842519685e-05,
610
- "loss": 0.3775,
611
- "step": 950
612
- },
613
- {
614
- "epoch": 3.5,
615
- "learning_rate": 9.68503937007874e-05,
616
- "loss": 0.3853,
617
- "step": 960
618
- },
619
- {
620
- "epoch": 3.54,
621
- "learning_rate": 9.448818897637794e-05,
622
- "loss": 0.3719,
623
- "step": 970
624
- },
625
- {
626
- "epoch": 3.57,
627
- "learning_rate": 9.21259842519685e-05,
628
- "loss": 0.3779,
629
- "step": 980
630
- },
631
- {
632
- "epoch": 3.61,
633
- "learning_rate": 8.976377952755905e-05,
634
- "loss": 0.3921,
635
- "step": 990
636
- },
637
- {
638
- "epoch": 3.65,
639
- "learning_rate": 8.740157480314959e-05,
640
- "loss": 0.3776,
641
- "step": 1000
642
- },
643
- {
644
- "epoch": 3.65,
645
- "eval_loss": 0.3908761739730835,
646
- "eval_runtime": 19.2678,
647
- "eval_samples_per_second": 103.8,
648
- "eval_steps_per_second": 1.661,
649
- "step": 1000
650
- },
651
- {
652
- "epoch": 3.68,
653
- "learning_rate": 8.503937007874015e-05,
654
- "loss": 0.3889,
655
- "step": 1010
656
- },
657
- {
658
- "epoch": 3.72,
659
- "learning_rate": 8.26771653543307e-05,
660
- "loss": 0.3819,
661
- "step": 1020
662
- },
663
- {
664
- "epoch": 3.76,
665
- "learning_rate": 8.031496062992126e-05,
666
- "loss": 0.3758,
667
- "step": 1030
668
- },
669
- {
670
- "epoch": 3.79,
671
- "learning_rate": 7.79527559055118e-05,
672
- "loss": 0.3753,
673
- "step": 1040
674
- },
675
- {
676
- "epoch": 3.83,
677
- "learning_rate": 7.559055118110236e-05,
678
- "loss": 0.3737,
679
- "step": 1050
680
- },
681
- {
682
- "epoch": 3.87,
683
- "learning_rate": 7.322834645669291e-05,
684
- "loss": 0.3833,
685
- "step": 1060
686
- },
687
- {
688
- "epoch": 3.9,
689
- "learning_rate": 7.086614173228346e-05,
690
- "loss": 0.3625,
691
- "step": 1070
692
- },
693
- {
694
- "epoch": 3.94,
695
- "learning_rate": 6.850393700787402e-05,
696
- "loss": 0.3809,
697
- "step": 1080
698
- },
699
- {
700
- "epoch": 3.97,
701
- "learning_rate": 6.614173228346455e-05,
702
- "loss": 0.3751,
703
- "step": 1090
704
- },
705
- {
706
- "epoch": 4.01,
707
- "learning_rate": 6.377952755905512e-05,
708
- "loss": 0.3776,
709
- "step": 1100
710
- },
711
- {
712
- "epoch": 4.05,
713
- "learning_rate": 6.141732283464567e-05,
714
- "loss": 0.3748,
715
- "step": 1110
716
- },
717
- {
718
- "epoch": 4.08,
719
- "learning_rate": 5.905511811023622e-05,
720
- "loss": 0.3636,
721
- "step": 1120
722
- },
723
- {
724
- "epoch": 4.12,
725
- "learning_rate": 5.669291338582676e-05,
726
- "loss": 0.372,
727
- "step": 1130
728
- },
729
- {
730
- "epoch": 4.16,
731
- "learning_rate": 5.433070866141732e-05,
732
- "loss": 0.3795,
733
- "step": 1140
734
- },
735
- {
736
- "epoch": 4.19,
737
- "learning_rate": 5.196850393700787e-05,
738
- "loss": 0.3632,
739
- "step": 1150
740
- },
741
- {
742
- "epoch": 4.23,
743
- "learning_rate": 4.960629921259842e-05,
744
- "loss": 0.3806,
745
- "step": 1160
746
- },
747
- {
748
- "epoch": 4.27,
749
- "learning_rate": 4.724409448818897e-05,
750
- "loss": 0.3732,
751
- "step": 1170
752
- },
753
- {
754
- "epoch": 4.3,
755
- "learning_rate": 4.488188976377953e-05,
756
- "loss": 0.3818,
757
- "step": 1180
758
- },
759
- {
760
- "epoch": 4.34,
761
- "learning_rate": 4.2519685039370076e-05,
762
- "loss": 0.3766,
763
- "step": 1190
764
- },
765
- {
766
- "epoch": 4.38,
767
- "learning_rate": 4.015748031496063e-05,
768
- "loss": 0.3587,
769
- "step": 1200
770
- },
771
- {
772
- "epoch": 4.38,
773
- "eval_loss": 0.3883955776691437,
774
- "eval_runtime": 19.3219,
775
- "eval_samples_per_second": 103.51,
776
- "eval_steps_per_second": 1.656,
777
- "step": 1200
778
- }
779
- ],
780
- "logging_steps": 10,
781
- "max_steps": 1370,
782
- "num_train_epochs": 5,
783
- "save_steps": 200,
784
- "total_flos": 2.2400975031249142e+18,
785
- "trial_name": null,
786
- "trial_params": null
787
- }