Training in progress, step 4280, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 368988278
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:517491d8759942f27beaaca2325a4b97a206a4bc50d01b37f43e9dbfa7a419a1
|
3 |
size 368988278
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1107079290
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:149043e2c866159ce43cb83040a8df7ea2f79cc230e8dcb485ab8e3def1116c3
|
3 |
size 1107079290
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1000
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ffa9145f2f47a4ee9348b70f818bdbc6838c07fe673a33bbd23b4e0aa3723fd
|
3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 2000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2823,6 +2823,202 @@
|
|
2823 |
"eval_samples_per_second": 1678.827,
|
2824 |
"eval_steps_per_second": 52.47,
|
2825 |
"step": 4000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2826 |
}
|
2827 |
],
|
2828 |
"logging_steps": 10,
|
@@ -2837,12 +3033,12 @@
|
|
2837 |
"should_evaluate": false,
|
2838 |
"should_log": false,
|
2839 |
"should_save": true,
|
2840 |
-
"should_training_stop":
|
2841 |
},
|
2842 |
"attributes": {}
|
2843 |
}
|
2844 |
},
|
2845 |
-
"total_flos": 1.
|
2846 |
"train_batch_size": 4,
|
2847 |
"trial_name": null,
|
2848 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9997956025345286,
|
5 |
"eval_steps": 2000,
|
6 |
+
"global_step": 4280,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2823 |
"eval_samples_per_second": 1678.827,
|
2824 |
"eval_steps_per_second": 52.47,
|
2825 |
"step": 4000
|
2826 |
+
},
|
2827 |
+
{
|
2828 |
+
"epoch": 0.9367243846176307,
|
2829 |
+
"grad_norm": 46.65625,
|
2830 |
+
"learning_rate": 9.981704618557266e-07,
|
2831 |
+
"loss": 86.0902,
|
2832 |
+
"step": 4010
|
2833 |
+
},
|
2834 |
+
{
|
2835 |
+
"epoch": 0.93906035565159,
|
2836 |
+
"grad_norm": 44.65625,
|
2837 |
+
"learning_rate": 9.98165899416464e-07,
|
2838 |
+
"loss": 85.4592,
|
2839 |
+
"step": 4020
|
2840 |
+
},
|
2841 |
+
{
|
2842 |
+
"epoch": 0.9413963266855491,
|
2843 |
+
"grad_norm": 44.34375,
|
2844 |
+
"learning_rate": 9.981613369772013e-07,
|
2845 |
+
"loss": 86.5728,
|
2846 |
+
"step": 4030
|
2847 |
+
},
|
2848 |
+
{
|
2849 |
+
"epoch": 0.9437322977195083,
|
2850 |
+
"grad_norm": 46.03125,
|
2851 |
+
"learning_rate": 9.98156774537939e-07,
|
2852 |
+
"loss": 87.2485,
|
2853 |
+
"step": 4040
|
2854 |
+
},
|
2855 |
+
{
|
2856 |
+
"epoch": 0.9460682687534675,
|
2857 |
+
"grad_norm": 48.28125,
|
2858 |
+
"learning_rate": 9.981522120986765e-07,
|
2859 |
+
"loss": 87.1623,
|
2860 |
+
"step": 4050
|
2861 |
+
},
|
2862 |
+
{
|
2863 |
+
"epoch": 0.9484042397874266,
|
2864 |
+
"grad_norm": 47.96875,
|
2865 |
+
"learning_rate": 9.981476496594138e-07,
|
2866 |
+
"loss": 86.2034,
|
2867 |
+
"step": 4060
|
2868 |
+
},
|
2869 |
+
{
|
2870 |
+
"epoch": 0.9507402108213858,
|
2871 |
+
"grad_norm": 48.25,
|
2872 |
+
"learning_rate": 9.981430872201514e-07,
|
2873 |
+
"loss": 86.5078,
|
2874 |
+
"step": 4070
|
2875 |
+
},
|
2876 |
+
{
|
2877 |
+
"epoch": 0.953076181855345,
|
2878 |
+
"grad_norm": 44.53125,
|
2879 |
+
"learning_rate": 9.981385247808888e-07,
|
2880 |
+
"loss": 86.3279,
|
2881 |
+
"step": 4080
|
2882 |
+
},
|
2883 |
+
{
|
2884 |
+
"epoch": 0.9554121528893041,
|
2885 |
+
"grad_norm": 45.6875,
|
2886 |
+
"learning_rate": 9.981339623416264e-07,
|
2887 |
+
"loss": 86.4747,
|
2888 |
+
"step": 4090
|
2889 |
+
},
|
2890 |
+
{
|
2891 |
+
"epoch": 0.9577481239232634,
|
2892 |
+
"grad_norm": 47.53125,
|
2893 |
+
"learning_rate": 9.981293999023637e-07,
|
2894 |
+
"loss": 85.3221,
|
2895 |
+
"step": 4100
|
2896 |
+
},
|
2897 |
+
{
|
2898 |
+
"epoch": 0.9600840949572226,
|
2899 |
+
"grad_norm": 47.15625,
|
2900 |
+
"learning_rate": 9.981248374631013e-07,
|
2901 |
+
"loss": 85.7835,
|
2902 |
+
"step": 4110
|
2903 |
+
},
|
2904 |
+
{
|
2905 |
+
"epoch": 0.9624200659911817,
|
2906 |
+
"grad_norm": 45.96875,
|
2907 |
+
"learning_rate": 9.981202750238387e-07,
|
2908 |
+
"loss": 85.919,
|
2909 |
+
"step": 4120
|
2910 |
+
},
|
2911 |
+
{
|
2912 |
+
"epoch": 0.9647560370251409,
|
2913 |
+
"grad_norm": 46.40625,
|
2914 |
+
"learning_rate": 9.981157125845762e-07,
|
2915 |
+
"loss": 86.6488,
|
2916 |
+
"step": 4130
|
2917 |
+
},
|
2918 |
+
{
|
2919 |
+
"epoch": 0.9670920080591001,
|
2920 |
+
"grad_norm": 47.8125,
|
2921 |
+
"learning_rate": 9.981111501453136e-07,
|
2922 |
+
"loss": 86.7465,
|
2923 |
+
"step": 4140
|
2924 |
+
},
|
2925 |
+
{
|
2926 |
+
"epoch": 0.9694279790930592,
|
2927 |
+
"grad_norm": 50.96875,
|
2928 |
+
"learning_rate": 9.981065877060512e-07,
|
2929 |
+
"loss": 85.8423,
|
2930 |
+
"step": 4150
|
2931 |
+
},
|
2932 |
+
{
|
2933 |
+
"epoch": 0.9717639501270184,
|
2934 |
+
"grad_norm": 44.84375,
|
2935 |
+
"learning_rate": 9.981020252667885e-07,
|
2936 |
+
"loss": 86.4872,
|
2937 |
+
"step": 4160
|
2938 |
+
},
|
2939 |
+
{
|
2940 |
+
"epoch": 0.9740999211609777,
|
2941 |
+
"grad_norm": 51.46875,
|
2942 |
+
"learning_rate": 9.980974628275261e-07,
|
2943 |
+
"loss": 86.9111,
|
2944 |
+
"step": 4170
|
2945 |
+
},
|
2946 |
+
{
|
2947 |
+
"epoch": 0.9764358921949368,
|
2948 |
+
"grad_norm": 46.25,
|
2949 |
+
"learning_rate": 9.980929003882635e-07,
|
2950 |
+
"loss": 86.4476,
|
2951 |
+
"step": 4180
|
2952 |
+
},
|
2953 |
+
{
|
2954 |
+
"epoch": 0.978771863228896,
|
2955 |
+
"grad_norm": 47.0625,
|
2956 |
+
"learning_rate": 9.98088337949001e-07,
|
2957 |
+
"loss": 86.3345,
|
2958 |
+
"step": 4190
|
2959 |
+
},
|
2960 |
+
{
|
2961 |
+
"epoch": 0.9811078342628552,
|
2962 |
+
"grad_norm": 47.96875,
|
2963 |
+
"learning_rate": 9.980837755097384e-07,
|
2964 |
+
"loss": 87.4492,
|
2965 |
+
"step": 4200
|
2966 |
+
},
|
2967 |
+
{
|
2968 |
+
"epoch": 0.9834438052968143,
|
2969 |
+
"grad_norm": 47.53125,
|
2970 |
+
"learning_rate": 9.98079213070476e-07,
|
2971 |
+
"loss": 87.3175,
|
2972 |
+
"step": 4210
|
2973 |
+
},
|
2974 |
+
{
|
2975 |
+
"epoch": 0.9857797763307735,
|
2976 |
+
"grad_norm": 47.84375,
|
2977 |
+
"learning_rate": 9.980746506312134e-07,
|
2978 |
+
"loss": 85.7159,
|
2979 |
+
"step": 4220
|
2980 |
+
},
|
2981 |
+
{
|
2982 |
+
"epoch": 0.9881157473647327,
|
2983 |
+
"grad_norm": 50.5,
|
2984 |
+
"learning_rate": 9.98070088191951e-07,
|
2985 |
+
"loss": 85.7232,
|
2986 |
+
"step": 4230
|
2987 |
+
},
|
2988 |
+
{
|
2989 |
+
"epoch": 0.9904517183986918,
|
2990 |
+
"grad_norm": 47.1875,
|
2991 |
+
"learning_rate": 9.980655257526883e-07,
|
2992 |
+
"loss": 86.1964,
|
2993 |
+
"step": 4240
|
2994 |
+
},
|
2995 |
+
{
|
2996 |
+
"epoch": 0.992787689432651,
|
2997 |
+
"grad_norm": 46.15625,
|
2998 |
+
"learning_rate": 9.980609633134259e-07,
|
2999 |
+
"loss": 86.2977,
|
3000 |
+
"step": 4250
|
3001 |
+
},
|
3002 |
+
{
|
3003 |
+
"epoch": 0.9951236604666102,
|
3004 |
+
"grad_norm": 44.8125,
|
3005 |
+
"learning_rate": 9.980564008741632e-07,
|
3006 |
+
"loss": 85.6801,
|
3007 |
+
"step": 4260
|
3008 |
+
},
|
3009 |
+
{
|
3010 |
+
"epoch": 0.9974596315005694,
|
3011 |
+
"grad_norm": 46.15625,
|
3012 |
+
"learning_rate": 9.980518384349008e-07,
|
3013 |
+
"loss": 85.8044,
|
3014 |
+
"step": 4270
|
3015 |
+
},
|
3016 |
+
{
|
3017 |
+
"epoch": 0.9997956025345286,
|
3018 |
+
"grad_norm": 46.75,
|
3019 |
+
"learning_rate": 9.980472759956384e-07,
|
3020 |
+
"loss": 86.1971,
|
3021 |
+
"step": 4280
|
3022 |
}
|
3023 |
],
|
3024 |
"logging_steps": 10,
|
|
|
3033 |
"should_evaluate": false,
|
3034 |
"should_log": false,
|
3035 |
"should_save": true,
|
3036 |
+
"should_training_stop": true
|
3037 |
},
|
3038 |
"attributes": {}
|
3039 |
}
|
3040 |
},
|
3041 |
+
"total_flos": 1.1817578952753414e+19,
|
3042 |
"train_batch_size": 4,
|
3043 |
"trial_name": null,
|
3044 |
"trial_params": null
|