Uploaded checkpoint-5000
Browse files- adapter_model.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +153 -3
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 119975656
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:19344674fe2ad15f50200034530413438a33bff9ccab8bfa6cf2812aa37bf12e
|
3 |
size 119975656
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 60477396
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d94e63a8e69076b7c52dde790e804072eacd8a18380eb10fffd62f19a4cfff1f
|
3 |
size 60477396
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1abd2f2c053411bc4be9ca11b9a9a5f9be07dc02a0721eee3132129b1fc2a3d8
|
3 |
size 14244
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8557a40bc707e1ef7c101859ab04d1c4c6b283598d6d9dc4f6cea13cb82e641e
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -607,6 +607,156 @@
|
|
607 |
"eval_samples_per_second": 5.185,
|
608 |
"eval_steps_per_second": 5.185,
|
609 |
"step": 4000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
610 |
}
|
611 |
],
|
612 |
"logging_steps": 100,
|
@@ -614,7 +764,7 @@
|
|
614 |
"num_input_tokens_seen": 0,
|
615 |
"num_train_epochs": 2,
|
616 |
"save_steps": 1000,
|
617 |
-
"total_flos":
|
618 |
"train_batch_size": 1,
|
619 |
"trial_name": null,
|
620 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.5665387326701654,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 5000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
607 |
"eval_samples_per_second": 5.185,
|
608 |
"eval_steps_per_second": 5.185,
|
609 |
"step": 4000
|
610 |
+
},
|
611 |
+
{
|
612 |
+
"epoch": 1.28,
|
613 |
+
"grad_norm": 0.0578785166144371,
|
614 |
+
"learning_rate": 4.004444444444445e-06,
|
615 |
+
"loss": 0.0378,
|
616 |
+
"step": 4100
|
617 |
+
},
|
618 |
+
{
|
619 |
+
"epoch": 1.28,
|
620 |
+
"eval_loss": 0.018333839252591133,
|
621 |
+
"eval_runtime": 192.9576,
|
622 |
+
"eval_samples_per_second": 5.182,
|
623 |
+
"eval_steps_per_second": 5.182,
|
624 |
+
"step": 4100
|
625 |
+
},
|
626 |
+
{
|
627 |
+
"epoch": 1.32,
|
628 |
+
"grad_norm": 0.0014218598371371627,
|
629 |
+
"learning_rate": 3.5600000000000002e-06,
|
630 |
+
"loss": 0.0289,
|
631 |
+
"step": 4200
|
632 |
+
},
|
633 |
+
{
|
634 |
+
"epoch": 1.32,
|
635 |
+
"eval_loss": 0.02419031597673893,
|
636 |
+
"eval_runtime": 192.6293,
|
637 |
+
"eval_samples_per_second": 5.191,
|
638 |
+
"eval_steps_per_second": 5.191,
|
639 |
+
"step": 4200
|
640 |
+
},
|
641 |
+
{
|
642 |
+
"epoch": 1.35,
|
643 |
+
"grad_norm": 0.0013137555215507746,
|
644 |
+
"learning_rate": 3.1155555555555555e-06,
|
645 |
+
"loss": 0.0298,
|
646 |
+
"step": 4300
|
647 |
+
},
|
648 |
+
{
|
649 |
+
"epoch": 1.35,
|
650 |
+
"eval_loss": 0.02638879045844078,
|
651 |
+
"eval_runtime": 192.8273,
|
652 |
+
"eval_samples_per_second": 5.186,
|
653 |
+
"eval_steps_per_second": 5.186,
|
654 |
+
"step": 4300
|
655 |
+
},
|
656 |
+
{
|
657 |
+
"epoch": 1.38,
|
658 |
+
"grad_norm": 0.35259732604026794,
|
659 |
+
"learning_rate": 2.6711111111111116e-06,
|
660 |
+
"loss": 0.0382,
|
661 |
+
"step": 4400
|
662 |
+
},
|
663 |
+
{
|
664 |
+
"epoch": 1.38,
|
665 |
+
"eval_loss": 0.01607164740562439,
|
666 |
+
"eval_runtime": 192.7408,
|
667 |
+
"eval_samples_per_second": 5.188,
|
668 |
+
"eval_steps_per_second": 5.188,
|
669 |
+
"step": 4400
|
670 |
+
},
|
671 |
+
{
|
672 |
+
"epoch": 1.41,
|
673 |
+
"grad_norm": 0.0020605421159416437,
|
674 |
+
"learning_rate": 2.226666666666667e-06,
|
675 |
+
"loss": 0.0339,
|
676 |
+
"step": 4500
|
677 |
+
},
|
678 |
+
{
|
679 |
+
"epoch": 1.41,
|
680 |
+
"eval_loss": 0.014907135628163815,
|
681 |
+
"eval_runtime": 192.8289,
|
682 |
+
"eval_samples_per_second": 5.186,
|
683 |
+
"eval_steps_per_second": 5.186,
|
684 |
+
"step": 4500
|
685 |
+
},
|
686 |
+
{
|
687 |
+
"epoch": 1.44,
|
688 |
+
"grad_norm": 0.0019016048172488809,
|
689 |
+
"learning_rate": 1.7822222222222225e-06,
|
690 |
+
"loss": 0.0195,
|
691 |
+
"step": 4600
|
692 |
+
},
|
693 |
+
{
|
694 |
+
"epoch": 1.44,
|
695 |
+
"eval_loss": 0.015925556421279907,
|
696 |
+
"eval_runtime": 192.4215,
|
697 |
+
"eval_samples_per_second": 5.197,
|
698 |
+
"eval_steps_per_second": 5.197,
|
699 |
+
"step": 4600
|
700 |
+
},
|
701 |
+
{
|
702 |
+
"epoch": 1.47,
|
703 |
+
"grad_norm": 0.734219491481781,
|
704 |
+
"learning_rate": 1.337777777777778e-06,
|
705 |
+
"loss": 0.0328,
|
706 |
+
"step": 4700
|
707 |
+
},
|
708 |
+
{
|
709 |
+
"epoch": 1.47,
|
710 |
+
"eval_loss": 0.014890914782881737,
|
711 |
+
"eval_runtime": 192.4767,
|
712 |
+
"eval_samples_per_second": 5.195,
|
713 |
+
"eval_steps_per_second": 5.195,
|
714 |
+
"step": 4700
|
715 |
+
},
|
716 |
+
{
|
717 |
+
"epoch": 1.5,
|
718 |
+
"grad_norm": 2.157243251800537,
|
719 |
+
"learning_rate": 8.933333333333334e-07,
|
720 |
+
"loss": 0.0429,
|
721 |
+
"step": 4800
|
722 |
+
},
|
723 |
+
{
|
724 |
+
"epoch": 1.5,
|
725 |
+
"eval_loss": 0.01486047450453043,
|
726 |
+
"eval_runtime": 191.7644,
|
727 |
+
"eval_samples_per_second": 5.215,
|
728 |
+
"eval_steps_per_second": 5.215,
|
729 |
+
"step": 4800
|
730 |
+
},
|
731 |
+
{
|
732 |
+
"epoch": 1.54,
|
733 |
+
"grad_norm": 0.00161929486785084,
|
734 |
+
"learning_rate": 4.488888888888889e-07,
|
735 |
+
"loss": 0.0312,
|
736 |
+
"step": 4900
|
737 |
+
},
|
738 |
+
{
|
739 |
+
"epoch": 1.54,
|
740 |
+
"eval_loss": 0.02127786912024021,
|
741 |
+
"eval_runtime": 191.9957,
|
742 |
+
"eval_samples_per_second": 5.208,
|
743 |
+
"eval_steps_per_second": 5.208,
|
744 |
+
"step": 4900
|
745 |
+
},
|
746 |
+
{
|
747 |
+
"epoch": 1.57,
|
748 |
+
"grad_norm": 0.001571273198351264,
|
749 |
+
"learning_rate": 4.444444444444445e-09,
|
750 |
+
"loss": 0.0364,
|
751 |
+
"step": 5000
|
752 |
+
},
|
753 |
+
{
|
754 |
+
"epoch": 1.57,
|
755 |
+
"eval_loss": 0.01901436597108841,
|
756 |
+
"eval_runtime": 191.5985,
|
757 |
+
"eval_samples_per_second": 5.219,
|
758 |
+
"eval_steps_per_second": 5.219,
|
759 |
+
"step": 5000
|
760 |
}
|
761 |
],
|
762 |
"logging_steps": 100,
|
|
|
764 |
"num_input_tokens_seen": 0,
|
765 |
"num_train_epochs": 2,
|
766 |
"save_steps": 1000,
|
767 |
+
"total_flos": 8.051062996992e+16,
|
768 |
"train_batch_size": 1,
|
769 |
"trial_name": null,
|
770 |
"trial_params": null
|