bobox commited on
Commit
3588df9
·
verified ·
1 Parent(s): ec8218c

Training in progress, step 2613, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -732,6 +732,16 @@ You can finetune this model on your own dataset.
732
  | 0.4298 | 1496 | 1.1907 | 0.9647 | 0.5922 |
733
  | 0.4550 | 1584 | 1.1587 | 0.9537 | 0.5585 |
734
  | 0.4803 | 1672 | 0.9554 | 0.9304 | 0.5592 |
 
 
 
 
 
 
 
 
 
 
735
 
736
 
737
  ### Framework Versions
 
732
  | 0.4298 | 1496 | 1.1907 | 0.9647 | 0.5922 |
733
  | 0.4550 | 1584 | 1.1587 | 0.9537 | 0.5585 |
734
  | 0.4803 | 1672 | 0.9554 | 0.9304 | 0.5592 |
735
+ | 0.5056 | 1760 | 0.9837 | 0.9165 | 0.5467 |
736
+ | 0.5309 | 1848 | 0.8857 | 0.8931 | 0.5374 |
737
+ | 0.5562 | 1936 | 0.9305 | 0.8842 | 0.5331 |
738
+ | 0.5814 | 2024 | 0.8061 | 0.8854 | 0.5477 |
739
+ | 0.6067 | 2112 | 0.8286 | 0.8693 | 0.5196 |
740
+ | 0.6320 | 2200 | 0.7854 | 0.8592 | 0.5159 |
741
+ | 0.6573 | 2288 | 0.8374 | 0.8538 | 0.5090 |
742
+ | 0.6826 | 2376 | 0.7678 | 0.8425 | 0.5175 |
743
+ | 0.7078 | 2464 | 0.7064 | 0.8284 | 0.5046 |
744
+ | 0.7331 | 2552 | 0.8849 | 0.8329 | 0.4783 |
745
 
746
 
747
  ### Framework Versions
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74ae3aa245b90ed7e7fade5e22036fd4db3e79d0e60c917c98d7bb52eae949df
3
  size 1130520122
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aaee732f14b229d04a0b6c6d028c92842350429f670d7910b9ab5d292210c922
3
  size 1130520122
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a292c615b4c7184111afd8393bce447f10def285a7f193dced1d793b3f35c446
3
  size 565251810
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a46ba1df33f0d186a8634e3fd3902123f7ce96112f85627e6a763e3d779be99b
3
  size 565251810
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c28b09b15f7da5be865dfaddc8487a10def7b4fd10988e31421f04272831662
3
  size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51dc264fe435d10d1407e610654f4adbea838b132e0f6c5827047a283ee5ce28
3
  size 14180
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b19b307d6dfdc51420b9b8a249097f0d2f86a9860bd1617056feaf2607d184f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54bc5d3e1ab7114cca6c72d26cc59c590fe581357d9bb65482f0e470a92fd4ae
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5004309106578569,
5
  "eval_steps": 88,
6
- "global_step": 1742,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -444,6 +444,236 @@
444
  "eval_scitail-pairs-pos_samples_per_second": 166.813,
445
  "eval_scitail-pairs-pos_steps_per_second": 10.49,
446
  "step": 1672
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
447
  }
448
  ],
449
  "logging_steps": 88,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7506463659867854,
5
  "eval_steps": 88,
6
+ "global_step": 2613,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
444
  "eval_scitail-pairs-pos_samples_per_second": 166.813,
445
  "eval_scitail-pairs-pos_steps_per_second": 10.49,
446
  "step": 1672
447
+ },
448
+ {
449
+ "epoch": 0.5056018385521401,
450
+ "grad_norm": 8.821345329284668,
451
+ "learning_rate": 1.681910776921864e-05,
452
+ "loss": 0.9837,
453
+ "step": 1760
454
+ },
455
+ {
456
+ "epoch": 0.5056018385521401,
457
+ "eval_nli-pairs_loss": 0.9164705276489258,
458
+ "eval_nli-pairs_runtime": 38.0836,
459
+ "eval_nli-pairs_samples_per_second": 178.765,
460
+ "eval_nli-pairs_steps_per_second": 11.186,
461
+ "step": 1760
462
+ },
463
+ {
464
+ "epoch": 0.5056018385521401,
465
+ "eval_scitail-pairs-pos_loss": 0.5467000007629395,
466
+ "eval_scitail-pairs-pos_runtime": 7.7942,
467
+ "eval_scitail-pairs-pos_samples_per_second": 167.304,
468
+ "eval_scitail-pairs-pos_steps_per_second": 10.521,
469
+ "step": 1760
470
+ },
471
+ {
472
+ "epoch": 0.5308819304797472,
473
+ "grad_norm": 9.250692367553711,
474
+ "learning_rate": 1.5906153307778405e-05,
475
+ "loss": 0.8857,
476
+ "step": 1848
477
+ },
478
+ {
479
+ "epoch": 0.5308819304797472,
480
+ "eval_nli-pairs_loss": 0.8931341171264648,
481
+ "eval_nli-pairs_runtime": 38.0639,
482
+ "eval_nli-pairs_samples_per_second": 178.857,
483
+ "eval_nli-pairs_steps_per_second": 11.192,
484
+ "step": 1848
485
+ },
486
+ {
487
+ "epoch": 0.5308819304797472,
488
+ "eval_scitail-pairs-pos_loss": 0.5374401807785034,
489
+ "eval_scitail-pairs-pos_runtime": 7.8097,
490
+ "eval_scitail-pairs-pos_samples_per_second": 166.972,
491
+ "eval_scitail-pairs-pos_steps_per_second": 10.5,
492
+ "step": 1848
493
+ },
494
+ {
495
+ "epoch": 0.5561620224073542,
496
+ "grad_norm": 5.3266706466674805,
497
+ "learning_rate": 1.491028940034468e-05,
498
+ "loss": 0.9305,
499
+ "step": 1936
500
+ },
501
+ {
502
+ "epoch": 0.5561620224073542,
503
+ "eval_nli-pairs_loss": 0.8841533064842224,
504
+ "eval_nli-pairs_runtime": 38.1566,
505
+ "eval_nli-pairs_samples_per_second": 178.423,
506
+ "eval_nli-pairs_steps_per_second": 11.165,
507
+ "step": 1936
508
+ },
509
+ {
510
+ "epoch": 0.5561620224073542,
511
+ "eval_scitail-pairs-pos_loss": 0.5330824851989746,
512
+ "eval_scitail-pairs-pos_runtime": 7.8415,
513
+ "eval_scitail-pairs-pos_samples_per_second": 166.294,
514
+ "eval_scitail-pairs-pos_steps_per_second": 10.457,
515
+ "step": 1936
516
+ },
517
+ {
518
+ "epoch": 0.5814421143349612,
519
+ "grad_norm": 6.629028797149658,
520
+ "learning_rate": 1.3845495793217223e-05,
521
+ "loss": 0.8061,
522
+ "step": 2024
523
+ },
524
+ {
525
+ "epoch": 0.5814421143349612,
526
+ "eval_nli-pairs_loss": 0.8853806257247925,
527
+ "eval_nli-pairs_runtime": 38.172,
528
+ "eval_nli-pairs_samples_per_second": 178.351,
529
+ "eval_nli-pairs_steps_per_second": 11.16,
530
+ "step": 2024
531
+ },
532
+ {
533
+ "epoch": 0.5814421143349612,
534
+ "eval_scitail-pairs-pos_loss": 0.5477445125579834,
535
+ "eval_scitail-pairs-pos_runtime": 7.8333,
536
+ "eval_scitail-pairs-pos_samples_per_second": 166.469,
537
+ "eval_scitail-pairs-pos_steps_per_second": 10.468,
538
+ "step": 2024
539
+ },
540
+ {
541
+ "epoch": 0.6067222062625682,
542
+ "grad_norm": 4.16071081161499,
543
+ "learning_rate": 1.2726719854583736e-05,
544
+ "loss": 0.8286,
545
+ "step": 2112
546
+ },
547
+ {
548
+ "epoch": 0.6067222062625682,
549
+ "eval_nli-pairs_loss": 0.8693087697029114,
550
+ "eval_nli-pairs_runtime": 38.1088,
551
+ "eval_nli-pairs_samples_per_second": 178.646,
552
+ "eval_nli-pairs_steps_per_second": 11.179,
553
+ "step": 2112
554
+ },
555
+ {
556
+ "epoch": 0.6067222062625682,
557
+ "eval_scitail-pairs-pos_loss": 0.5196370482444763,
558
+ "eval_scitail-pairs-pos_runtime": 7.8534,
559
+ "eval_scitail-pairs-pos_samples_per_second": 166.042,
560
+ "eval_scitail-pairs-pos_steps_per_second": 10.441,
561
+ "step": 2112
562
+ },
563
+ {
564
+ "epoch": 0.6320022981901753,
565
+ "grad_norm": 2.518064498901367,
566
+ "learning_rate": 1.1569666746235527e-05,
567
+ "loss": 0.7854,
568
+ "step": 2200
569
+ },
570
+ {
571
+ "epoch": 0.6320022981901753,
572
+ "eval_nli-pairs_loss": 0.859151303768158,
573
+ "eval_nli-pairs_runtime": 38.0838,
574
+ "eval_nli-pairs_samples_per_second": 178.764,
575
+ "eval_nli-pairs_steps_per_second": 11.186,
576
+ "step": 2200
577
+ },
578
+ {
579
+ "epoch": 0.6320022981901753,
580
+ "eval_scitail-pairs-pos_loss": 0.5159358978271484,
581
+ "eval_scitail-pairs-pos_runtime": 7.7611,
582
+ "eval_scitail-pairs-pos_samples_per_second": 168.018,
583
+ "eval_scitail-pairs-pos_steps_per_second": 10.566,
584
+ "step": 2200
585
+ },
586
+ {
587
+ "epoch": 0.6572823901177822,
588
+ "grad_norm": 4.033371925354004,
589
+ "learning_rate": 1.0390578957522117e-05,
590
+ "loss": 0.8374,
591
+ "step": 2288
592
+ },
593
+ {
594
+ "epoch": 0.6572823901177822,
595
+ "eval_nli-pairs_loss": 0.8537901043891907,
596
+ "eval_nli-pairs_runtime": 38.0742,
597
+ "eval_nli-pairs_samples_per_second": 178.809,
598
+ "eval_nli-pairs_steps_per_second": 11.189,
599
+ "step": 2288
600
+ },
601
+ {
602
+ "epoch": 0.6572823901177822,
603
+ "eval_scitail-pairs-pos_loss": 0.509048581123352,
604
+ "eval_scitail-pairs-pos_runtime": 7.7812,
605
+ "eval_scitail-pairs-pos_samples_per_second": 167.582,
606
+ "eval_scitail-pairs-pos_steps_per_second": 10.538,
607
+ "step": 2288
608
+ },
609
+ {
610
+ "epoch": 0.6825624820453893,
611
+ "grad_norm": 6.1265363693237305,
612
+ "learning_rate": 9.206008296404724e-06,
613
+ "loss": 0.7678,
614
+ "step": 2376
615
+ },
616
+ {
617
+ "epoch": 0.6825624820453893,
618
+ "eval_nli-pairs_loss": 0.8425480723381042,
619
+ "eval_nli-pairs_runtime": 38.0596,
620
+ "eval_nli-pairs_samples_per_second": 178.877,
621
+ "eval_nli-pairs_steps_per_second": 11.193,
622
+ "step": 2376
623
+ },
624
+ {
625
+ "epoch": 0.6825624820453893,
626
+ "eval_scitail-pairs-pos_loss": 0.5174906253814697,
627
+ "eval_scitail-pairs-pos_runtime": 7.7617,
628
+ "eval_scitail-pairs-pos_samples_per_second": 168.003,
629
+ "eval_scitail-pairs-pos_steps_per_second": 10.565,
630
+ "step": 2376
631
+ },
632
+ {
633
+ "epoch": 0.7078425739729962,
634
+ "grad_norm": 3.0078606605529785,
635
+ "learning_rate": 8.032583538354534e-06,
636
+ "loss": 0.7064,
637
+ "step": 2464
638
+ },
639
+ {
640
+ "epoch": 0.7078425739729962,
641
+ "eval_nli-pairs_loss": 0.8283973336219788,
642
+ "eval_nli-pairs_runtime": 38.2909,
643
+ "eval_nli-pairs_samples_per_second": 177.797,
644
+ "eval_nli-pairs_steps_per_second": 11.125,
645
+ "step": 2464
646
+ },
647
+ {
648
+ "epoch": 0.7078425739729962,
649
+ "eval_scitail-pairs-pos_loss": 0.5045931935310364,
650
+ "eval_scitail-pairs-pos_runtime": 7.8174,
651
+ "eval_scitail-pairs-pos_samples_per_second": 166.806,
652
+ "eval_scitail-pairs-pos_steps_per_second": 10.489,
653
+ "step": 2464
654
+ },
655
+ {
656
+ "epoch": 0.7331226659006033,
657
+ "grad_norm": 8.649880409240723,
658
+ "learning_rate": 6.8867769947957765e-06,
659
+ "loss": 0.8849,
660
+ "step": 2552
661
+ },
662
+ {
663
+ "epoch": 0.7331226659006033,
664
+ "eval_nli-pairs_loss": 0.8328748941421509,
665
+ "eval_nli-pairs_runtime": 38.2288,
666
+ "eval_nli-pairs_samples_per_second": 178.086,
667
+ "eval_nli-pairs_steps_per_second": 11.143,
668
+ "step": 2552
669
+ },
670
+ {
671
+ "epoch": 0.7331226659006033,
672
+ "eval_scitail-pairs-pos_loss": 0.478294312953949,
673
+ "eval_scitail-pairs-pos_runtime": 7.8918,
674
+ "eval_scitail-pairs-pos_samples_per_second": 165.235,
675
+ "eval_scitail-pairs-pos_steps_per_second": 10.391,
676
+ "step": 2552
677
  }
678
  ],
679
  "logging_steps": 88,