Training in progress, step 2613, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -732,6 +732,16 @@ You can finetune this model on your own dataset.
|
|
732 |
| 0.4298 | 1496 | 1.1907 | 0.9647 | 0.5922 |
|
733 |
| 0.4550 | 1584 | 1.1587 | 0.9537 | 0.5585 |
|
734 |
| 0.4803 | 1672 | 0.9554 | 0.9304 | 0.5592 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
735 |
|
736 |
|
737 |
### Framework Versions
|
|
|
732 |
| 0.4298 | 1496 | 1.1907 | 0.9647 | 0.5922 |
|
733 |
| 0.4550 | 1584 | 1.1587 | 0.9537 | 0.5585 |
|
734 |
| 0.4803 | 1672 | 0.9554 | 0.9304 | 0.5592 |
|
735 |
+
| 0.5056 | 1760 | 0.9837 | 0.9165 | 0.5467 |
|
736 |
+
| 0.5309 | 1848 | 0.8857 | 0.8931 | 0.5374 |
|
737 |
+
| 0.5562 | 1936 | 0.9305 | 0.8842 | 0.5331 |
|
738 |
+
| 0.5814 | 2024 | 0.8061 | 0.8854 | 0.5477 |
|
739 |
+
| 0.6067 | 2112 | 0.8286 | 0.8693 | 0.5196 |
|
740 |
+
| 0.6320 | 2200 | 0.7854 | 0.8592 | 0.5159 |
|
741 |
+
| 0.6573 | 2288 | 0.8374 | 0.8538 | 0.5090 |
|
742 |
+
| 0.6826 | 2376 | 0.7678 | 0.8425 | 0.5175 |
|
743 |
+
| 0.7078 | 2464 | 0.7064 | 0.8284 | 0.5046 |
|
744 |
+
| 0.7331 | 2552 | 0.8849 | 0.8329 | 0.4783 |
|
745 |
|
746 |
|
747 |
### Framework Versions
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1130520122
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aaee732f14b229d04a0b6c6d028c92842350429f670d7910b9ab5d292210c922
|
3 |
size 1130520122
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 565251810
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a46ba1df33f0d186a8634e3fd3902123f7ce96112f85627e6a763e3d779be99b
|
3 |
size 565251810
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14180
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:51dc264fe435d10d1407e610654f4adbea838b132e0f6c5827047a283ee5ce28
|
3 |
size 14180
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:54bc5d3e1ab7114cca6c72d26cc59c590fe581357d9bb65482f0e470a92fd4ae
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 88,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -444,6 +444,236 @@
|
|
444 |
"eval_scitail-pairs-pos_samples_per_second": 166.813,
|
445 |
"eval_scitail-pairs-pos_steps_per_second": 10.49,
|
446 |
"step": 1672
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
447 |
}
|
448 |
],
|
449 |
"logging_steps": 88,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.7506463659867854,
|
5 |
"eval_steps": 88,
|
6 |
+
"global_step": 2613,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
444 |
"eval_scitail-pairs-pos_samples_per_second": 166.813,
|
445 |
"eval_scitail-pairs-pos_steps_per_second": 10.49,
|
446 |
"step": 1672
|
447 |
+
},
|
448 |
+
{
|
449 |
+
"epoch": 0.5056018385521401,
|
450 |
+
"grad_norm": 8.821345329284668,
|
451 |
+
"learning_rate": 1.681910776921864e-05,
|
452 |
+
"loss": 0.9837,
|
453 |
+
"step": 1760
|
454 |
+
},
|
455 |
+
{
|
456 |
+
"epoch": 0.5056018385521401,
|
457 |
+
"eval_nli-pairs_loss": 0.9164705276489258,
|
458 |
+
"eval_nli-pairs_runtime": 38.0836,
|
459 |
+
"eval_nli-pairs_samples_per_second": 178.765,
|
460 |
+
"eval_nli-pairs_steps_per_second": 11.186,
|
461 |
+
"step": 1760
|
462 |
+
},
|
463 |
+
{
|
464 |
+
"epoch": 0.5056018385521401,
|
465 |
+
"eval_scitail-pairs-pos_loss": 0.5467000007629395,
|
466 |
+
"eval_scitail-pairs-pos_runtime": 7.7942,
|
467 |
+
"eval_scitail-pairs-pos_samples_per_second": 167.304,
|
468 |
+
"eval_scitail-pairs-pos_steps_per_second": 10.521,
|
469 |
+
"step": 1760
|
470 |
+
},
|
471 |
+
{
|
472 |
+
"epoch": 0.5308819304797472,
|
473 |
+
"grad_norm": 9.250692367553711,
|
474 |
+
"learning_rate": 1.5906153307778405e-05,
|
475 |
+
"loss": 0.8857,
|
476 |
+
"step": 1848
|
477 |
+
},
|
478 |
+
{
|
479 |
+
"epoch": 0.5308819304797472,
|
480 |
+
"eval_nli-pairs_loss": 0.8931341171264648,
|
481 |
+
"eval_nli-pairs_runtime": 38.0639,
|
482 |
+
"eval_nli-pairs_samples_per_second": 178.857,
|
483 |
+
"eval_nli-pairs_steps_per_second": 11.192,
|
484 |
+
"step": 1848
|
485 |
+
},
|
486 |
+
{
|
487 |
+
"epoch": 0.5308819304797472,
|
488 |
+
"eval_scitail-pairs-pos_loss": 0.5374401807785034,
|
489 |
+
"eval_scitail-pairs-pos_runtime": 7.8097,
|
490 |
+
"eval_scitail-pairs-pos_samples_per_second": 166.972,
|
491 |
+
"eval_scitail-pairs-pos_steps_per_second": 10.5,
|
492 |
+
"step": 1848
|
493 |
+
},
|
494 |
+
{
|
495 |
+
"epoch": 0.5561620224073542,
|
496 |
+
"grad_norm": 5.3266706466674805,
|
497 |
+
"learning_rate": 1.491028940034468e-05,
|
498 |
+
"loss": 0.9305,
|
499 |
+
"step": 1936
|
500 |
+
},
|
501 |
+
{
|
502 |
+
"epoch": 0.5561620224073542,
|
503 |
+
"eval_nli-pairs_loss": 0.8841533064842224,
|
504 |
+
"eval_nli-pairs_runtime": 38.1566,
|
505 |
+
"eval_nli-pairs_samples_per_second": 178.423,
|
506 |
+
"eval_nli-pairs_steps_per_second": 11.165,
|
507 |
+
"step": 1936
|
508 |
+
},
|
509 |
+
{
|
510 |
+
"epoch": 0.5561620224073542,
|
511 |
+
"eval_scitail-pairs-pos_loss": 0.5330824851989746,
|
512 |
+
"eval_scitail-pairs-pos_runtime": 7.8415,
|
513 |
+
"eval_scitail-pairs-pos_samples_per_second": 166.294,
|
514 |
+
"eval_scitail-pairs-pos_steps_per_second": 10.457,
|
515 |
+
"step": 1936
|
516 |
+
},
|
517 |
+
{
|
518 |
+
"epoch": 0.5814421143349612,
|
519 |
+
"grad_norm": 6.629028797149658,
|
520 |
+
"learning_rate": 1.3845495793217223e-05,
|
521 |
+
"loss": 0.8061,
|
522 |
+
"step": 2024
|
523 |
+
},
|
524 |
+
{
|
525 |
+
"epoch": 0.5814421143349612,
|
526 |
+
"eval_nli-pairs_loss": 0.8853806257247925,
|
527 |
+
"eval_nli-pairs_runtime": 38.172,
|
528 |
+
"eval_nli-pairs_samples_per_second": 178.351,
|
529 |
+
"eval_nli-pairs_steps_per_second": 11.16,
|
530 |
+
"step": 2024
|
531 |
+
},
|
532 |
+
{
|
533 |
+
"epoch": 0.5814421143349612,
|
534 |
+
"eval_scitail-pairs-pos_loss": 0.5477445125579834,
|
535 |
+
"eval_scitail-pairs-pos_runtime": 7.8333,
|
536 |
+
"eval_scitail-pairs-pos_samples_per_second": 166.469,
|
537 |
+
"eval_scitail-pairs-pos_steps_per_second": 10.468,
|
538 |
+
"step": 2024
|
539 |
+
},
|
540 |
+
{
|
541 |
+
"epoch": 0.6067222062625682,
|
542 |
+
"grad_norm": 4.16071081161499,
|
543 |
+
"learning_rate": 1.2726719854583736e-05,
|
544 |
+
"loss": 0.8286,
|
545 |
+
"step": 2112
|
546 |
+
},
|
547 |
+
{
|
548 |
+
"epoch": 0.6067222062625682,
|
549 |
+
"eval_nli-pairs_loss": 0.8693087697029114,
|
550 |
+
"eval_nli-pairs_runtime": 38.1088,
|
551 |
+
"eval_nli-pairs_samples_per_second": 178.646,
|
552 |
+
"eval_nli-pairs_steps_per_second": 11.179,
|
553 |
+
"step": 2112
|
554 |
+
},
|
555 |
+
{
|
556 |
+
"epoch": 0.6067222062625682,
|
557 |
+
"eval_scitail-pairs-pos_loss": 0.5196370482444763,
|
558 |
+
"eval_scitail-pairs-pos_runtime": 7.8534,
|
559 |
+
"eval_scitail-pairs-pos_samples_per_second": 166.042,
|
560 |
+
"eval_scitail-pairs-pos_steps_per_second": 10.441,
|
561 |
+
"step": 2112
|
562 |
+
},
|
563 |
+
{
|
564 |
+
"epoch": 0.6320022981901753,
|
565 |
+
"grad_norm": 2.518064498901367,
|
566 |
+
"learning_rate": 1.1569666746235527e-05,
|
567 |
+
"loss": 0.7854,
|
568 |
+
"step": 2200
|
569 |
+
},
|
570 |
+
{
|
571 |
+
"epoch": 0.6320022981901753,
|
572 |
+
"eval_nli-pairs_loss": 0.859151303768158,
|
573 |
+
"eval_nli-pairs_runtime": 38.0838,
|
574 |
+
"eval_nli-pairs_samples_per_second": 178.764,
|
575 |
+
"eval_nli-pairs_steps_per_second": 11.186,
|
576 |
+
"step": 2200
|
577 |
+
},
|
578 |
+
{
|
579 |
+
"epoch": 0.6320022981901753,
|
580 |
+
"eval_scitail-pairs-pos_loss": 0.5159358978271484,
|
581 |
+
"eval_scitail-pairs-pos_runtime": 7.7611,
|
582 |
+
"eval_scitail-pairs-pos_samples_per_second": 168.018,
|
583 |
+
"eval_scitail-pairs-pos_steps_per_second": 10.566,
|
584 |
+
"step": 2200
|
585 |
+
},
|
586 |
+
{
|
587 |
+
"epoch": 0.6572823901177822,
|
588 |
+
"grad_norm": 4.033371925354004,
|
589 |
+
"learning_rate": 1.0390578957522117e-05,
|
590 |
+
"loss": 0.8374,
|
591 |
+
"step": 2288
|
592 |
+
},
|
593 |
+
{
|
594 |
+
"epoch": 0.6572823901177822,
|
595 |
+
"eval_nli-pairs_loss": 0.8537901043891907,
|
596 |
+
"eval_nli-pairs_runtime": 38.0742,
|
597 |
+
"eval_nli-pairs_samples_per_second": 178.809,
|
598 |
+
"eval_nli-pairs_steps_per_second": 11.189,
|
599 |
+
"step": 2288
|
600 |
+
},
|
601 |
+
{
|
602 |
+
"epoch": 0.6572823901177822,
|
603 |
+
"eval_scitail-pairs-pos_loss": 0.509048581123352,
|
604 |
+
"eval_scitail-pairs-pos_runtime": 7.7812,
|
605 |
+
"eval_scitail-pairs-pos_samples_per_second": 167.582,
|
606 |
+
"eval_scitail-pairs-pos_steps_per_second": 10.538,
|
607 |
+
"step": 2288
|
608 |
+
},
|
609 |
+
{
|
610 |
+
"epoch": 0.6825624820453893,
|
611 |
+
"grad_norm": 6.1265363693237305,
|
612 |
+
"learning_rate": 9.206008296404724e-06,
|
613 |
+
"loss": 0.7678,
|
614 |
+
"step": 2376
|
615 |
+
},
|
616 |
+
{
|
617 |
+
"epoch": 0.6825624820453893,
|
618 |
+
"eval_nli-pairs_loss": 0.8425480723381042,
|
619 |
+
"eval_nli-pairs_runtime": 38.0596,
|
620 |
+
"eval_nli-pairs_samples_per_second": 178.877,
|
621 |
+
"eval_nli-pairs_steps_per_second": 11.193,
|
622 |
+
"step": 2376
|
623 |
+
},
|
624 |
+
{
|
625 |
+
"epoch": 0.6825624820453893,
|
626 |
+
"eval_scitail-pairs-pos_loss": 0.5174906253814697,
|
627 |
+
"eval_scitail-pairs-pos_runtime": 7.7617,
|
628 |
+
"eval_scitail-pairs-pos_samples_per_second": 168.003,
|
629 |
+
"eval_scitail-pairs-pos_steps_per_second": 10.565,
|
630 |
+
"step": 2376
|
631 |
+
},
|
632 |
+
{
|
633 |
+
"epoch": 0.7078425739729962,
|
634 |
+
"grad_norm": 3.0078606605529785,
|
635 |
+
"learning_rate": 8.032583538354534e-06,
|
636 |
+
"loss": 0.7064,
|
637 |
+
"step": 2464
|
638 |
+
},
|
639 |
+
{
|
640 |
+
"epoch": 0.7078425739729962,
|
641 |
+
"eval_nli-pairs_loss": 0.8283973336219788,
|
642 |
+
"eval_nli-pairs_runtime": 38.2909,
|
643 |
+
"eval_nli-pairs_samples_per_second": 177.797,
|
644 |
+
"eval_nli-pairs_steps_per_second": 11.125,
|
645 |
+
"step": 2464
|
646 |
+
},
|
647 |
+
{
|
648 |
+
"epoch": 0.7078425739729962,
|
649 |
+
"eval_scitail-pairs-pos_loss": 0.5045931935310364,
|
650 |
+
"eval_scitail-pairs-pos_runtime": 7.8174,
|
651 |
+
"eval_scitail-pairs-pos_samples_per_second": 166.806,
|
652 |
+
"eval_scitail-pairs-pos_steps_per_second": 10.489,
|
653 |
+
"step": 2464
|
654 |
+
},
|
655 |
+
{
|
656 |
+
"epoch": 0.7331226659006033,
|
657 |
+
"grad_norm": 8.649880409240723,
|
658 |
+
"learning_rate": 6.8867769947957765e-06,
|
659 |
+
"loss": 0.8849,
|
660 |
+
"step": 2552
|
661 |
+
},
|
662 |
+
{
|
663 |
+
"epoch": 0.7331226659006033,
|
664 |
+
"eval_nli-pairs_loss": 0.8328748941421509,
|
665 |
+
"eval_nli-pairs_runtime": 38.2288,
|
666 |
+
"eval_nli-pairs_samples_per_second": 178.086,
|
667 |
+
"eval_nli-pairs_steps_per_second": 11.143,
|
668 |
+
"step": 2552
|
669 |
+
},
|
670 |
+
{
|
671 |
+
"epoch": 0.7331226659006033,
|
672 |
+
"eval_scitail-pairs-pos_loss": 0.478294312953949,
|
673 |
+
"eval_scitail-pairs-pos_runtime": 7.8918,
|
674 |
+
"eval_scitail-pairs-pos_samples_per_second": 165.235,
|
675 |
+
"eval_scitail-pairs-pos_steps_per_second": 10.391,
|
676 |
+
"step": 2552
|
677 |
}
|
678 |
],
|
679 |
"logging_steps": 88,
|