Training in progress, step 128, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 167832240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2d601d43dade33badf98bf0461027583958dd8667ff02874ea062b250cd9db81
|
3 |
size 167832240
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 85723284
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a6c793b1cb9548fd561c88d3ad584df110322f62b86194ea29aeedbd50f780a3
|
3 |
size 85723284
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0810b5b5d390c1667c8d6c6351c2b0f8ee6d357396bbdeb5cbca15c6b011b798
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:968a6dcbda34982ad43f2af2e04e5edf94e043c521201e71b1583695497d18e0
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.24994200468063354,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-100",
|
4 |
-
"epoch":
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -731,6 +731,202 @@
|
|
731 |
"eval_samples_per_second": 7.183,
|
732 |
"eval_steps_per_second": 1.804,
|
733 |
"step": 100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
734 |
}
|
735 |
],
|
736 |
"logging_steps": 1,
|
@@ -754,12 +950,12 @@
|
|
754 |
"should_evaluate": false,
|
755 |
"should_log": false,
|
756 |
"should_save": true,
|
757 |
-
"should_training_stop":
|
758 |
},
|
759 |
"attributes": {}
|
760 |
}
|
761 |
},
|
762 |
-
"total_flos":
|
763 |
"train_batch_size": 4,
|
764 |
"trial_name": null,
|
765 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.24994200468063354,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-100",
|
4 |
+
"epoch": 1.006859382655561,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 128,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
731 |
"eval_samples_per_second": 7.183,
|
732 |
"eval_steps_per_second": 1.804,
|
733 |
"step": 100
|
734 |
+
},
|
735 |
+
{
|
736 |
+
"epoch": 0.7917687408133268,
|
737 |
+
"grad_norm": 4.1459736824035645,
|
738 |
+
"learning_rate": 2.4742923014386156e-05,
|
739 |
+
"loss": 4.0253,
|
740 |
+
"step": 101
|
741 |
+
},
|
742 |
+
{
|
743 |
+
"epoch": 0.7996080352768251,
|
744 |
+
"grad_norm": 4.166021347045898,
|
745 |
+
"learning_rate": 2.301660165700936e-05,
|
746 |
+
"loss": 4.1193,
|
747 |
+
"step": 102
|
748 |
+
},
|
749 |
+
{
|
750 |
+
"epoch": 0.8074473297403234,
|
751 |
+
"grad_norm": 4.5724639892578125,
|
752 |
+
"learning_rate": 2.1344844419735755e-05,
|
753 |
+
"loss": 4.1613,
|
754 |
+
"step": 103
|
755 |
+
},
|
756 |
+
{
|
757 |
+
"epoch": 0.8152866242038217,
|
758 |
+
"grad_norm": 3.7771105766296387,
|
759 |
+
"learning_rate": 1.9728836206903656e-05,
|
760 |
+
"loss": 3.577,
|
761 |
+
"step": 104
|
762 |
+
},
|
763 |
+
{
|
764 |
+
"epoch": 0.82312591866732,
|
765 |
+
"grad_norm": 4.427511215209961,
|
766 |
+
"learning_rate": 1.8169722409183097e-05,
|
767 |
+
"loss": 4.0308,
|
768 |
+
"step": 105
|
769 |
+
},
|
770 |
+
{
|
771 |
+
"epoch": 0.8309652131308183,
|
772 |
+
"grad_norm": 4.253128528594971,
|
773 |
+
"learning_rate": 1.6668608091748495e-05,
|
774 |
+
"loss": 4.358,
|
775 |
+
"step": 106
|
776 |
+
},
|
777 |
+
{
|
778 |
+
"epoch": 0.8388045075943165,
|
779 |
+
"grad_norm": 3.6124472618103027,
|
780 |
+
"learning_rate": 1.522655721103291e-05,
|
781 |
+
"loss": 3.8505,
|
782 |
+
"step": 107
|
783 |
+
},
|
784 |
+
{
|
785 |
+
"epoch": 0.8466438020578148,
|
786 |
+
"grad_norm": 4.112203598022461,
|
787 |
+
"learning_rate": 1.3844591860619383e-05,
|
788 |
+
"loss": 3.9511,
|
789 |
+
"step": 108
|
790 |
+
},
|
791 |
+
{
|
792 |
+
"epoch": 0.854483096521313,
|
793 |
+
"grad_norm": 3.7493703365325928,
|
794 |
+
"learning_rate": 1.2523691546803873e-05,
|
795 |
+
"loss": 3.8253,
|
796 |
+
"step": 109
|
797 |
+
},
|
798 |
+
{
|
799 |
+
"epoch": 0.8623223909848113,
|
800 |
+
"grad_norm": 4.115346431732178,
|
801 |
+
"learning_rate": 1.1264792494342857e-05,
|
802 |
+
"loss": 3.8934,
|
803 |
+
"step": 110
|
804 |
+
},
|
805 |
+
{
|
806 |
+
"epoch": 0.8701616854483096,
|
807 |
+
"grad_norm": 4.116607666015625,
|
808 |
+
"learning_rate": 1.0068786982878087e-05,
|
809 |
+
"loss": 3.7177,
|
810 |
+
"step": 111
|
811 |
+
},
|
812 |
+
{
|
813 |
+
"epoch": 0.8780009799118079,
|
814 |
+
"grad_norm": 3.9183499813079834,
|
815 |
+
"learning_rate": 8.936522714508678e-06,
|
816 |
+
"loss": 3.8943,
|
817 |
+
"step": 112
|
818 |
+
},
|
819 |
+
{
|
820 |
+
"epoch": 0.8858402743753062,
|
821 |
+
"grad_norm": 4.0384039878845215,
|
822 |
+
"learning_rate": 7.868802212958703e-06,
|
823 |
+
"loss": 3.4521,
|
824 |
+
"step": 113
|
825 |
+
},
|
826 |
+
{
|
827 |
+
"epoch": 0.8936795688388045,
|
828 |
+
"grad_norm": 4.025205612182617,
|
829 |
+
"learning_rate": 6.866382254766157e-06,
|
830 |
+
"loss": 3.8196,
|
831 |
+
"step": 114
|
832 |
+
},
|
833 |
+
{
|
834 |
+
"epoch": 0.9015188633023028,
|
835 |
+
"grad_norm": 4.01348352432251,
|
836 |
+
"learning_rate": 5.929973332896677e-06,
|
837 |
+
"loss": 3.9242,
|
838 |
+
"step": 115
|
839 |
+
},
|
840 |
+
{
|
841 |
+
"epoch": 0.9093581577658011,
|
842 |
+
"grad_norm": 4.146557331085205,
|
843 |
+
"learning_rate": 5.060239153161872e-06,
|
844 |
+
"loss": 4.0043,
|
845 |
+
"step": 116
|
846 |
+
},
|
847 |
+
{
|
848 |
+
"epoch": 0.9171974522292994,
|
849 |
+
"grad_norm": 4.394860744476318,
|
850 |
+
"learning_rate": 4.257796163799455e-06,
|
851 |
+
"loss": 3.8837,
|
852 |
+
"step": 117
|
853 |
+
},
|
854 |
+
{
|
855 |
+
"epoch": 0.9250367466927977,
|
856 |
+
"grad_norm": 4.56512451171875,
|
857 |
+
"learning_rate": 3.5232131185484076e-06,
|
858 |
+
"loss": 4.3846,
|
859 |
+
"step": 118
|
860 |
+
},
|
861 |
+
{
|
862 |
+
"epoch": 0.932876041156296,
|
863 |
+
"grad_norm": 3.989962577819824,
|
864 |
+
"learning_rate": 2.857010673529015e-06,
|
865 |
+
"loss": 3.3769,
|
866 |
+
"step": 119
|
867 |
+
},
|
868 |
+
{
|
869 |
+
"epoch": 0.9407153356197943,
|
870 |
+
"grad_norm": 4.115790843963623,
|
871 |
+
"learning_rate": 2.259661018213333e-06,
|
872 |
+
"loss": 3.7523,
|
873 |
+
"step": 120
|
874 |
+
},
|
875 |
+
{
|
876 |
+
"epoch": 0.9485546300832925,
|
877 |
+
"grad_norm": 4.354365348815918,
|
878 |
+
"learning_rate": 1.7315875407479032e-06,
|
879 |
+
"loss": 3.7809,
|
880 |
+
"step": 121
|
881 |
+
},
|
882 |
+
{
|
883 |
+
"epoch": 0.9563939245467908,
|
884 |
+
"grad_norm": 4.128818035125732,
|
885 |
+
"learning_rate": 1.2731645278655445e-06,
|
886 |
+
"loss": 3.8053,
|
887 |
+
"step": 122
|
888 |
+
},
|
889 |
+
{
|
890 |
+
"epoch": 0.964233219010289,
|
891 |
+
"grad_norm": 4.223034858703613,
|
892 |
+
"learning_rate": 8.847168995992916e-07,
|
893 |
+
"loss": 3.5431,
|
894 |
+
"step": 123
|
895 |
+
},
|
896 |
+
{
|
897 |
+
"epoch": 0.9720725134737873,
|
898 |
+
"grad_norm": 4.52358341217041,
|
899 |
+
"learning_rate": 5.665199789862907e-07,
|
900 |
+
"loss": 4.4289,
|
901 |
+
"step": 124
|
902 |
+
},
|
903 |
+
{
|
904 |
+
"epoch": 0.9799118079372856,
|
905 |
+
"grad_norm": 3.970877170562744,
|
906 |
+
"learning_rate": 3.1879929692498757e-07,
|
907 |
+
"loss": 3.9163,
|
908 |
+
"step": 125
|
909 |
+
},
|
910 |
+
{
|
911 |
+
"epoch": 0.9877511024007839,
|
912 |
+
"grad_norm": 4.14064359664917,
|
913 |
+
"learning_rate": 1.4173043232380557e-07,
|
914 |
+
"loss": 3.9214,
|
915 |
+
"step": 126
|
916 |
+
},
|
917 |
+
{
|
918 |
+
"epoch": 0.9955903968642822,
|
919 |
+
"grad_norm": 3.948699712753296,
|
920 |
+
"learning_rate": 3.5438887654737355e-08,
|
921 |
+
"loss": 4.2029,
|
922 |
+
"step": 127
|
923 |
+
},
|
924 |
+
{
|
925 |
+
"epoch": 1.006859382655561,
|
926 |
+
"grad_norm": 3.799189567565918,
|
927 |
+
"learning_rate": 0.0,
|
928 |
+
"loss": 3.387,
|
929 |
+
"step": 128
|
930 |
}
|
931 |
],
|
932 |
"logging_steps": 1,
|
|
|
950 |
"should_evaluate": false,
|
951 |
"should_log": false,
|
952 |
"should_save": true,
|
953 |
+
"should_training_stop": true
|
954 |
},
|
955 |
"attributes": {}
|
956 |
}
|
957 |
},
|
958 |
+
"total_flos": 7.203212659984957e+17,
|
959 |
"train_batch_size": 4,
|
960 |
"trial_name": null,
|
961 |
"trial_params": null
|