Training in progress, step 2136, checkpoint
Browse files- last-checkpoint/model-00001-of-00004.safetensors +1 -1
- last-checkpoint/model-00002-of-00004.safetensors +1 -1
- last-checkpoint/model-00003-of-00004.safetensors +1 -1
- last-checkpoint/model-00004-of-00004.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +256 -4
last-checkpoint/model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4903351912
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ffed5f8c81d4bcc1c3949f7ae2640c2980c4515a7bf471d7277700e4a42dc62
|
3 |
size 4903351912
|
last-checkpoint/model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4947570872
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2776b66179020970a8f8df643b6376fa54d907b9a0ab7de3152df871e8519472
|
3 |
size 4947570872
|
last-checkpoint/model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4962221464
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:637c9d5ccb29c2571a60e81aba1229d7c57a7860baf764cefb86807c1fd55c51
|
3 |
size 4962221464
|
last-checkpoint/model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3670322200
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e6e6a180710afa8be7623704d675a1486d890f8acd593108fd30e06f8faf9d99
|
3 |
size 3670322200
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2216
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe0c40a4a813653ed70995efe5d8a8fed35d5d52cf0ca2d406ae22aa69dd62dc
|
3 |
size 2216
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1000
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:638959202e00ec8e922c9fefb3271344d643c48007a3ce5c5efbd2a02e4157e6
|
3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -14707,6 +14707,258 @@
|
|
14707 |
"learning_rate": 3.106191336121222e-08,
|
14708 |
"loss": 1.1671,
|
14709 |
"step": 2100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14710 |
}
|
14711 |
],
|
14712 |
"logging_steps": 1,
|
@@ -14721,12 +14973,12 @@
|
|
14721 |
"should_evaluate": false,
|
14722 |
"should_log": false,
|
14723 |
"should_save": true,
|
14724 |
-
"should_training_stop":
|
14725 |
},
|
14726 |
"attributes": {}
|
14727 |
}
|
14728 |
},
|
14729 |
-
"total_flos": 6.
|
14730 |
"train_batch_size": 8,
|
14731 |
"trial_name": null,
|
14732 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 2136,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
14707 |
"learning_rate": 3.106191336121222e-08,
|
14708 |
"loss": 1.1671,
|
14709 |
"step": 2100
|
14710 |
+
},
|
14711 |
+
{
|
14712 |
+
"epoch": 1.9672284644194757,
|
14713 |
+
"grad_norm": 0.0,
|
14714 |
+
"learning_rate": 2.936063551628454e-08,
|
14715 |
+
"loss": 1.1406,
|
14716 |
+
"step": 2101
|
14717 |
+
},
|
14718 |
+
{
|
14719 |
+
"epoch": 1.9681647940074907,
|
14720 |
+
"grad_norm": 0.0,
|
14721 |
+
"learning_rate": 2.7707234737970325e-08,
|
14722 |
+
"loss": 1.155,
|
14723 |
+
"step": 2102
|
14724 |
+
},
|
14725 |
+
{
|
14726 |
+
"epoch": 1.9691011235955056,
|
14727 |
+
"grad_norm": 0.0,
|
14728 |
+
"learning_rate": 2.6101714990083292e-08,
|
14729 |
+
"loss": 1.1179,
|
14730 |
+
"step": 2103
|
14731 |
+
},
|
14732 |
+
{
|
14733 |
+
"epoch": 1.9700374531835205,
|
14734 |
+
"grad_norm": 0.0,
|
14735 |
+
"learning_rate": 2.4544080121657877e-08,
|
14736 |
+
"loss": 1.1077,
|
14737 |
+
"step": 2104
|
14738 |
+
},
|
14739 |
+
{
|
14740 |
+
"epoch": 1.9709737827715355,
|
14741 |
+
"grad_norm": 0.0,
|
14742 |
+
"learning_rate": 2.3034333866922555e-08,
|
14743 |
+
"loss": 1.1038,
|
14744 |
+
"step": 2105
|
14745 |
+
},
|
14746 |
+
{
|
14747 |
+
"epoch": 1.9719101123595506,
|
14748 |
+
"grad_norm": 0.0,
|
14749 |
+
"learning_rate": 2.1572479845299865e-08,
|
14750 |
+
"loss": 1.1178,
|
14751 |
+
"step": 2106
|
14752 |
+
},
|
14753 |
+
{
|
14754 |
+
"epoch": 1.9728464419475655,
|
14755 |
+
"grad_norm": 0.0,
|
14756 |
+
"learning_rate": 2.0158521561404188e-08,
|
14757 |
+
"loss": 1.1605,
|
14758 |
+
"step": 2107
|
14759 |
+
},
|
14760 |
+
{
|
14761 |
+
"epoch": 1.9737827715355807,
|
14762 |
+
"grad_norm": 0.0,
|
14763 |
+
"learning_rate": 1.879246240501509e-08,
|
14764 |
+
"loss": 1.1151,
|
14765 |
+
"step": 2108
|
14766 |
+
},
|
14767 |
+
{
|
14768 |
+
"epoch": 1.9747191011235956,
|
14769 |
+
"grad_norm": 0.0,
|
14770 |
+
"learning_rate": 1.747430565108843e-08,
|
14771 |
+
"loss": 1.1017,
|
14772 |
+
"step": 2109
|
14773 |
+
},
|
14774 |
+
{
|
14775 |
+
"epoch": 1.9756554307116105,
|
14776 |
+
"grad_norm": 0.0,
|
14777 |
+
"learning_rate": 1.6204054459736385e-08,
|
14778 |
+
"loss": 1.1408,
|
14779 |
+
"step": 2110
|
14780 |
+
},
|
14781 |
+
{
|
14782 |
+
"epoch": 1.9765917602996255,
|
14783 |
+
"grad_norm": 0.0,
|
14784 |
+
"learning_rate": 1.4981711876227435e-08,
|
14785 |
+
"loss": 1.1019,
|
14786 |
+
"step": 2111
|
14787 |
+
},
|
14788 |
+
{
|
14789 |
+
"epoch": 1.9775280898876404,
|
14790 |
+
"grad_norm": 0.0,
|
14791 |
+
"learning_rate": 1.3807280830968606e-08,
|
14792 |
+
"loss": 1.1559,
|
14793 |
+
"step": 2112
|
14794 |
+
},
|
14795 |
+
{
|
14796 |
+
"epoch": 1.9784644194756553,
|
14797 |
+
"grad_norm": 0.0,
|
14798 |
+
"learning_rate": 1.2680764139509915e-08,
|
14799 |
+
"loss": 1.1168,
|
14800 |
+
"step": 2113
|
14801 |
+
},
|
14802 |
+
{
|
14803 |
+
"epoch": 1.9794007490636703,
|
14804 |
+
"grad_norm": 0.0,
|
14805 |
+
"learning_rate": 1.1602164502531043e-08,
|
14806 |
+
"loss": 1.1662,
|
14807 |
+
"step": 2114
|
14808 |
+
},
|
14809 |
+
{
|
14810 |
+
"epoch": 1.9803370786516854,
|
14811 |
+
"grad_norm": 0.0,
|
14812 |
+
"learning_rate": 1.0571484505839114e-08,
|
14813 |
+
"loss": 1.1558,
|
14814 |
+
"step": 2115
|
14815 |
+
},
|
14816 |
+
{
|
14817 |
+
"epoch": 1.9812734082397003,
|
14818 |
+
"grad_norm": 0.0,
|
14819 |
+
"learning_rate": 9.588726620357591e-09,
|
14820 |
+
"loss": 1.1725,
|
14821 |
+
"step": 2116
|
14822 |
+
},
|
14823 |
+
{
|
14824 |
+
"epoch": 1.9822097378277155,
|
14825 |
+
"grad_norm": 0.0,
|
14826 |
+
"learning_rate": 8.653893202124064e-09,
|
14827 |
+
"loss": 1.1049,
|
14828 |
+
"step": 2117
|
14829 |
+
},
|
14830 |
+
{
|
14831 |
+
"epoch": 1.9831460674157304,
|
14832 |
+
"grad_norm": 0.0,
|
14833 |
+
"learning_rate": 7.76698649228136e-09,
|
14834 |
+
"loss": 1.1635,
|
14835 |
+
"step": 2118
|
14836 |
+
},
|
14837 |
+
{
|
14838 |
+
"epoch": 1.9840823970037453,
|
14839 |
+
"grad_norm": 0.0,
|
14840 |
+
"learning_rate": 6.928008617077542e-09,
|
14841 |
+
"loss": 1.1132,
|
14842 |
+
"step": 2119
|
14843 |
+
},
|
14844 |
+
{
|
14845 |
+
"epoch": 1.9850187265917603,
|
14846 |
+
"grad_norm": 0.0,
|
14847 |
+
"learning_rate": 6.136961587852597e-09,
|
14848 |
+
"loss": 1.1769,
|
14849 |
+
"step": 2120
|
14850 |
+
},
|
14851 |
+
{
|
14852 |
+
"epoch": 1.9859550561797752,
|
14853 |
+
"grad_norm": 0.0,
|
14854 |
+
"learning_rate": 5.393847301042865e-09,
|
14855 |
+
"loss": 1.1298,
|
14856 |
+
"step": 2121
|
14857 |
+
},
|
14858 |
+
{
|
14859 |
+
"epoch": 1.9868913857677901,
|
14860 |
+
"grad_norm": 0.0,
|
14861 |
+
"learning_rate": 4.698667538169943e-09,
|
14862 |
+
"loss": 1.1547,
|
14863 |
+
"step": 2122
|
14864 |
+
},
|
14865 |
+
{
|
14866 |
+
"epoch": 1.9878277153558053,
|
14867 |
+
"grad_norm": 0.0,
|
14868 |
+
"learning_rate": 4.051423965838464e-09,
|
14869 |
+
"loss": 1.1608,
|
14870 |
+
"step": 2123
|
14871 |
+
},
|
14872 |
+
{
|
14873 |
+
"epoch": 1.9887640449438202,
|
14874 |
+
"grad_norm": 0.0,
|
14875 |
+
"learning_rate": 3.4521181357316523e-09,
|
14876 |
+
"loss": 1.1669,
|
14877 |
+
"step": 2124
|
14878 |
+
},
|
14879 |
+
{
|
14880 |
+
"epoch": 1.9897003745318353,
|
14881 |
+
"grad_norm": 0.0,
|
14882 |
+
"learning_rate": 2.9007514846113304e-09,
|
14883 |
+
"loss": 1.167,
|
14884 |
+
"step": 2125
|
14885 |
+
},
|
14886 |
+
{
|
14887 |
+
"epoch": 1.9906367041198503,
|
14888 |
+
"grad_norm": 0.0,
|
14889 |
+
"learning_rate": 2.397325334309031e-09,
|
14890 |
+
"loss": 1.1729,
|
14891 |
+
"step": 2126
|
14892 |
+
},
|
14893 |
+
{
|
14894 |
+
"epoch": 1.9915730337078652,
|
14895 |
+
"grad_norm": 0.0,
|
14896 |
+
"learning_rate": 1.941840891721558e-09,
|
14897 |
+
"loss": 1.1634,
|
14898 |
+
"step": 2127
|
14899 |
+
},
|
14900 |
+
{
|
14901 |
+
"epoch": 1.9925093632958801,
|
14902 |
+
"grad_norm": 0.0,
|
14903 |
+
"learning_rate": 1.53429924881765e-09,
|
14904 |
+
"loss": 1.1068,
|
14905 |
+
"step": 2128
|
14906 |
+
},
|
14907 |
+
{
|
14908 |
+
"epoch": 1.993445692883895,
|
14909 |
+
"grad_norm": 0.0,
|
14910 |
+
"learning_rate": 1.174701382626875e-09,
|
14911 |
+
"loss": 1.1238,
|
14912 |
+
"step": 2129
|
14913 |
+
},
|
14914 |
+
{
|
14915 |
+
"epoch": 1.99438202247191,
|
14916 |
+
"grad_norm": 0.0,
|
14917 |
+
"learning_rate": 8.63048155235191e-10,
|
14918 |
+
"loss": 1.113,
|
14919 |
+
"step": 2130
|
14920 |
+
},
|
14921 |
+
{
|
14922 |
+
"epoch": 1.9953183520599251,
|
14923 |
+
"grad_norm": 0.0,
|
14924 |
+
"learning_rate": 5.99340313798269e-10,
|
14925 |
+
"loss": 1.1078,
|
14926 |
+
"step": 2131
|
14927 |
+
},
|
14928 |
+
{
|
14929 |
+
"epoch": 1.99625468164794,
|
14930 |
+
"grad_norm": 0.0,
|
14931 |
+
"learning_rate": 3.8357849051484655e-10,
|
14932 |
+
"loss": 1.102,
|
14933 |
+
"step": 2132
|
14934 |
+
},
|
14935 |
+
{
|
14936 |
+
"epoch": 1.9971910112359552,
|
14937 |
+
"grad_norm": 0.0,
|
14938 |
+
"learning_rate": 2.1576320265337403e-10,
|
14939 |
+
"loss": 1.0998,
|
14940 |
+
"step": 2133
|
14941 |
+
},
|
14942 |
+
{
|
14943 |
+
"epoch": 1.9981273408239701,
|
14944 |
+
"grad_norm": 0.0,
|
14945 |
+
"learning_rate": 9.58948525253689e-11,
|
14946 |
+
"loss": 1.1516,
|
14947 |
+
"step": 2134
|
14948 |
+
},
|
14949 |
+
{
|
14950 |
+
"epoch": 1.999063670411985,
|
14951 |
+
"grad_norm": 0.0,
|
14952 |
+
"learning_rate": 2.3973727498738387e-11,
|
14953 |
+
"loss": 1.1345,
|
14954 |
+
"step": 2135
|
14955 |
+
},
|
14956 |
+
{
|
14957 |
+
"epoch": 2.0,
|
14958 |
+
"grad_norm": 0.0,
|
14959 |
+
"learning_rate": 0.0,
|
14960 |
+
"loss": 1.2175,
|
14961 |
+
"step": 2136
|
14962 |
}
|
14963 |
],
|
14964 |
"logging_steps": 1,
|
|
|
14973 |
"should_evaluate": false,
|
14974 |
"should_log": false,
|
14975 |
"should_save": true,
|
14976 |
+
"should_training_stop": true
|
14977 |
},
|
14978 |
"attributes": {}
|
14979 |
}
|
14980 |
},
|
14981 |
+
"total_flos": 6.987483782320226e+18,
|
14982 |
"train_batch_size": 8,
|
14983 |
"trial_name": null,
|
14984 |
"trial_params": null
|