Training in progress, step 450000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 202194449
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c75b6a012ab82fb16d27738e0a95c3b1b8ef7e5d1f6ee0116a8c50549bb847f8
|
3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:78b24f59351cac08b9824b3f79acd0f1dc0a89b48a8f25dc820166f982b53fa7
|
3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a4f1f1c1cb0bb3ff6487630da96ece6a563c0a58db594f3dd7e10d4cd4b3713
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a4f1f1c1cb0bb3ff6487630da96ece6a563c0a58db594f3dd7e10d4cd4b3713
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a4f1f1c1cb0bb3ff6487630da96ece6a563c0a58db594f3dd7e10d4cd4b3713
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a4f1f1c1cb0bb3ff6487630da96ece6a563c0a58db594f3dd7e10d4cd4b3713
|
3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a4f1f1c1cb0bb3ff6487630da96ece6a563c0a58db594f3dd7e10d4cd4b3713
|
3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a4f1f1c1cb0bb3ff6487630da96ece6a563c0a58db594f3dd7e10d4cd4b3713
|
3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a4f1f1c1cb0bb3ff6487630da96ece6a563c0a58db594f3dd7e10d4cd4b3713
|
3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a4f1f1c1cb0bb3ff6487630da96ece6a563c0a58db594f3dd7e10d4cd4b3713
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc29c434fb0390a8f4f90d65ac745a0b4f381dbd06e857762d450d4a464c7045
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 10.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -8806,11 +8806,211 @@
|
|
8806 |
"eval_samples_per_second": 1530.896,
|
8807 |
"eval_steps_per_second": 24.377,
|
8808 |
"step": 440000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8809 |
}
|
8810 |
],
|
8811 |
"max_steps": 500000,
|
8812 |
"num_train_epochs": 12,
|
8813 |
-
"total_flos": 1.
|
8814 |
"trial_name": null,
|
8815 |
"trial_params": null
|
8816 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 10.484866842191105,
|
5 |
+
"global_step": 450000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
8806 |
"eval_samples_per_second": 1530.896,
|
8807 |
"eval_steps_per_second": 24.377,
|
8808 |
"step": 440000
|
8809 |
+
},
|
8810 |
+
{
|
8811 |
+
"epoch": 10.26,
|
8812 |
+
"learning_rate": 2.1083388335824145e-05,
|
8813 |
+
"loss": 0.2526,
|
8814 |
+
"step": 440500
|
8815 |
+
},
|
8816 |
+
{
|
8817 |
+
"epoch": 10.28,
|
8818 |
+
"learning_rate": 2.0900255633978873e-05,
|
8819 |
+
"loss": 0.2525,
|
8820 |
+
"step": 441000
|
8821 |
+
},
|
8822 |
+
{
|
8823 |
+
"epoch": 10.28,
|
8824 |
+
"eval_loss": 0.23552300035953522,
|
8825 |
+
"eval_runtime": 1.4323,
|
8826 |
+
"eval_samples_per_second": 1534.608,
|
8827 |
+
"eval_steps_per_second": 24.436,
|
8828 |
+
"step": 441000
|
8829 |
+
},
|
8830 |
+
{
|
8831 |
+
"epoch": 10.29,
|
8832 |
+
"learning_rate": 2.0718589425453314e-05,
|
8833 |
+
"loss": 0.2527,
|
8834 |
+
"step": 441500
|
8835 |
+
},
|
8836 |
+
{
|
8837 |
+
"epoch": 10.3,
|
8838 |
+
"learning_rate": 2.0538391696920015e-05,
|
8839 |
+
"loss": 0.2521,
|
8840 |
+
"step": 442000
|
8841 |
+
},
|
8842 |
+
{
|
8843 |
+
"epoch": 10.3,
|
8844 |
+
"eval_loss": 0.23650765419006348,
|
8845 |
+
"eval_runtime": 1.4235,
|
8846 |
+
"eval_samples_per_second": 1544.133,
|
8847 |
+
"eval_steps_per_second": 24.588,
|
8848 |
+
"step": 442000
|
8849 |
+
},
|
8850 |
+
{
|
8851 |
+
"epoch": 10.31,
|
8852 |
+
"learning_rate": 2.035966441899249e-05,
|
8853 |
+
"loss": 0.2523,
|
8854 |
+
"step": 442500
|
8855 |
+
},
|
8856 |
+
{
|
8857 |
+
"epoch": 10.32,
|
8858 |
+
"learning_rate": 2.0182409546203555e-05,
|
8859 |
+
"loss": 0.2523,
|
8860 |
+
"step": 443000
|
8861 |
+
},
|
8862 |
+
{
|
8863 |
+
"epoch": 10.32,
|
8864 |
+
"eval_loss": 0.23561330139636993,
|
8865 |
+
"eval_runtime": 1.4199,
|
8866 |
+
"eval_samples_per_second": 1548.043,
|
8867 |
+
"eval_steps_per_second": 24.65,
|
8868 |
+
"step": 443000
|
8869 |
+
},
|
8870 |
+
{
|
8871 |
+
"epoch": 10.33,
|
8872 |
+
"learning_rate": 2.000662901698415e-05,
|
8873 |
+
"loss": 0.2521,
|
8874 |
+
"step": 443500
|
8875 |
+
},
|
8876 |
+
{
|
8877 |
+
"epoch": 10.35,
|
8878 |
+
"learning_rate": 1.983232475364195e-05,
|
8879 |
+
"loss": 0.2523,
|
8880 |
+
"step": 444000
|
8881 |
+
},
|
8882 |
+
{
|
8883 |
+
"epoch": 10.35,
|
8884 |
+
"eval_loss": 0.23662236332893372,
|
8885 |
+
"eval_runtime": 1.4145,
|
8886 |
+
"eval_samples_per_second": 1553.873,
|
8887 |
+
"eval_steps_per_second": 24.743,
|
8888 |
+
"step": 444000
|
8889 |
+
},
|
8890 |
+
{
|
8891 |
+
"epoch": 10.36,
|
8892 |
+
"learning_rate": 1.9659498662340474e-05,
|
8893 |
+
"loss": 0.2524,
|
8894 |
+
"step": 444500
|
8895 |
+
},
|
8896 |
+
{
|
8897 |
+
"epoch": 10.37,
|
8898 |
+
"learning_rate": 1.948815263307819e-05,
|
8899 |
+
"loss": 0.2519,
|
8900 |
+
"step": 445000
|
8901 |
+
},
|
8902 |
+
{
|
8903 |
+
"epoch": 10.37,
|
8904 |
+
"eval_loss": 0.23521380126476288,
|
8905 |
+
"eval_runtime": 1.4054,
|
8906 |
+
"eval_samples_per_second": 1563.94,
|
8907 |
+
"eval_steps_per_second": 24.903,
|
8908 |
+
"step": 445000
|
8909 |
+
},
|
8910 |
+
{
|
8911 |
+
"epoch": 10.38,
|
8912 |
+
"learning_rate": 1.9318288539667765e-05,
|
8913 |
+
"loss": 0.2524,
|
8914 |
+
"step": 445500
|
8915 |
+
},
|
8916 |
+
{
|
8917 |
+
"epoch": 10.39,
|
8918 |
+
"learning_rate": 1.914990823971574e-05,
|
8919 |
+
"loss": 0.2523,
|
8920 |
+
"step": 446000
|
8921 |
+
},
|
8922 |
+
{
|
8923 |
+
"epoch": 10.39,
|
8924 |
+
"eval_loss": 0.2357960045337677,
|
8925 |
+
"eval_runtime": 1.4368,
|
8926 |
+
"eval_samples_per_second": 1529.784,
|
8927 |
+
"eval_steps_per_second": 24.36,
|
8928 |
+
"step": 446000
|
8929 |
+
},
|
8930 |
+
{
|
8931 |
+
"epoch": 10.4,
|
8932 |
+
"learning_rate": 1.8983013574602096e-05,
|
8933 |
+
"loss": 0.2521,
|
8934 |
+
"step": 446500
|
8935 |
+
},
|
8936 |
+
{
|
8937 |
+
"epoch": 10.41,
|
8938 |
+
"learning_rate": 1.8817606369460156e-05,
|
8939 |
+
"loss": 0.2521,
|
8940 |
+
"step": 447000
|
8941 |
+
},
|
8942 |
+
{
|
8943 |
+
"epoch": 10.41,
|
8944 |
+
"eval_loss": 0.23412732779979706,
|
8945 |
+
"eval_runtime": 1.4282,
|
8946 |
+
"eval_samples_per_second": 1538.96,
|
8947 |
+
"eval_steps_per_second": 24.506,
|
8948 |
+
"step": 447000
|
8949 |
+
},
|
8950 |
+
{
|
8951 |
+
"epoch": 10.43,
|
8952 |
+
"learning_rate": 1.865368843315663e-05,
|
8953 |
+
"loss": 0.252,
|
8954 |
+
"step": 447500
|
8955 |
+
},
|
8956 |
+
{
|
8957 |
+
"epoch": 10.44,
|
8958 |
+
"learning_rate": 1.8491261558271762e-05,
|
8959 |
+
"loss": 0.2519,
|
8960 |
+
"step": 448000
|
8961 |
+
},
|
8962 |
+
{
|
8963 |
+
"epoch": 10.44,
|
8964 |
+
"eval_loss": 0.23749284446239471,
|
8965 |
+
"eval_runtime": 1.4343,
|
8966 |
+
"eval_samples_per_second": 1532.461,
|
8967 |
+
"eval_steps_per_second": 24.402,
|
8968 |
+
"step": 448000
|
8969 |
+
},
|
8970 |
+
{
|
8971 |
+
"epoch": 10.45,
|
8972 |
+
"learning_rate": 1.833032752107986e-05,
|
8973 |
+
"loss": 0.2518,
|
8974 |
+
"step": 448500
|
8975 |
+
},
|
8976 |
+
{
|
8977 |
+
"epoch": 10.46,
|
8978 |
+
"learning_rate": 1.817088808152978e-05,
|
8979 |
+
"loss": 0.2519,
|
8980 |
+
"step": 449000
|
8981 |
+
},
|
8982 |
+
{
|
8983 |
+
"epoch": 10.46,
|
8984 |
+
"eval_loss": 0.2349672168493271,
|
8985 |
+
"eval_runtime": 1.4127,
|
8986 |
+
"eval_samples_per_second": 1555.927,
|
8987 |
+
"eval_steps_per_second": 24.776,
|
8988 |
+
"step": 449000
|
8989 |
+
},
|
8990 |
+
{
|
8991 |
+
"epoch": 10.47,
|
8992 |
+
"learning_rate": 1.801294498322569e-05,
|
8993 |
+
"loss": 0.2517,
|
8994 |
+
"step": 449500
|
8995 |
+
},
|
8996 |
+
{
|
8997 |
+
"epoch": 10.48,
|
8998 |
+
"learning_rate": 1.7856499953407978e-05,
|
8999 |
+
"loss": 0.2519,
|
9000 |
+
"step": 450000
|
9001 |
+
},
|
9002 |
+
{
|
9003 |
+
"epoch": 10.48,
|
9004 |
+
"eval_loss": 0.2356027066707611,
|
9005 |
+
"eval_runtime": 1.4379,
|
9006 |
+
"eval_samples_per_second": 1528.604,
|
9007 |
+
"eval_steps_per_second": 24.341,
|
9008 |
+
"step": 450000
|
9009 |
}
|
9010 |
],
|
9011 |
"max_steps": 500000,
|
9012 |
"num_train_epochs": 12,
|
9013 |
+
"total_flos": 1.4376896274943432e+22,
|
9014 |
"trial_name": null,
|
9015 |
"trial_params": null
|
9016 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:78b24f59351cac08b9824b3f79acd0f1dc0a89b48a8f25dc820166f982b53fa7
|
3 |
size 102501541
|