Training in progress, step 460000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 202194449
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:583b1f1834e02218d89c3b697d47dd796ee6e2f54da76dbfa4b2b4a392308637
|
3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b1ac07a8fd5b7f18f94228f9f3e9b88d618a44073c792161d704c98526bced7
|
3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8cc778d00dc1434fe3ece5ed1e4f0a69b755dfb4b5125022a7e4273f579b7722
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8cc778d00dc1434fe3ece5ed1e4f0a69b755dfb4b5125022a7e4273f579b7722
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8cc778d00dc1434fe3ece5ed1e4f0a69b755dfb4b5125022a7e4273f579b7722
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8cc778d00dc1434fe3ece5ed1e4f0a69b755dfb4b5125022a7e4273f579b7722
|
3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8cc778d00dc1434fe3ece5ed1e4f0a69b755dfb4b5125022a7e4273f579b7722
|
3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8cc778d00dc1434fe3ece5ed1e4f0a69b755dfb4b5125022a7e4273f579b7722
|
3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8cc778d00dc1434fe3ece5ed1e4f0a69b755dfb4b5125022a7e4273f579b7722
|
3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8cc778d00dc1434fe3ece5ed1e4f0a69b755dfb4b5125022a7e4273f579b7722
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d60fd80961b777bf4901f5c7189278f8f31f61a50c51a19e170f6a1919a5ce33
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 10.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -9006,11 +9006,211 @@
|
|
9006 |
"eval_samples_per_second": 1528.604,
|
9007 |
"eval_steps_per_second": 24.341,
|
9008 |
"step": 450000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9009 |
}
|
9010 |
],
|
9011 |
"max_steps": 500000,
|
9012 |
"num_train_epochs": 12,
|
9013 |
-
"total_flos": 1.
|
9014 |
"trial_name": null,
|
9015 |
"trial_params": null
|
9016 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 10.717863883128684,
|
5 |
+
"global_step": 460000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
9006 |
"eval_samples_per_second": 1528.604,
|
9007 |
"eval_steps_per_second": 24.341,
|
9008 |
"step": 450000
|
9009 |
+
},
|
9010 |
+
{
|
9011 |
+
"epoch": 10.5,
|
9012 |
+
"learning_rate": 1.770155470293445e-05,
|
9013 |
+
"loss": 0.2519,
|
9014 |
+
"step": 450500
|
9015 |
+
},
|
9016 |
+
{
|
9017 |
+
"epoch": 10.51,
|
9018 |
+
"learning_rate": 1.7548110926261522e-05,
|
9019 |
+
"loss": 0.2518,
|
9020 |
+
"step": 451000
|
9021 |
+
},
|
9022 |
+
{
|
9023 |
+
"epoch": 10.51,
|
9024 |
+
"eval_loss": 0.23447871208190918,
|
9025 |
+
"eval_runtime": 1.4307,
|
9026 |
+
"eval_samples_per_second": 1536.276,
|
9027 |
+
"eval_steps_per_second": 24.463,
|
9028 |
+
"step": 451000
|
9029 |
+
},
|
9030 |
+
{
|
9031 |
+
"epoch": 10.52,
|
9032 |
+
"learning_rate": 1.7396170301425777e-05,
|
9033 |
+
"loss": 0.2518,
|
9034 |
+
"step": 451500
|
9035 |
+
},
|
9036 |
+
{
|
9037 |
+
"epoch": 10.53,
|
9038 |
+
"learning_rate": 1.7245734490025544e-05,
|
9039 |
+
"loss": 0.252,
|
9040 |
+
"step": 452000
|
9041 |
+
},
|
9042 |
+
{
|
9043 |
+
"epoch": 10.53,
|
9044 |
+
"eval_loss": 0.23647595942020416,
|
9045 |
+
"eval_runtime": 1.4161,
|
9046 |
+
"eval_samples_per_second": 1552.16,
|
9047 |
+
"eval_steps_per_second": 24.716,
|
9048 |
+
"step": 452000
|
9049 |
+
},
|
9050 |
+
{
|
9051 |
+
"epoch": 10.54,
|
9052 |
+
"learning_rate": 1.7096805137202738e-05,
|
9053 |
+
"loss": 0.2518,
|
9054 |
+
"step": 452500
|
9055 |
+
},
|
9056 |
+
{
|
9057 |
+
"epoch": 10.55,
|
9058 |
+
"learning_rate": 1.6949383871624917e-05,
|
9059 |
+
"loss": 0.2517,
|
9060 |
+
"step": 453000
|
9061 |
+
},
|
9062 |
+
{
|
9063 |
+
"epoch": 10.55,
|
9064 |
+
"eval_loss": 0.23766835033893585,
|
9065 |
+
"eval_runtime": 1.4533,
|
9066 |
+
"eval_samples_per_second": 1512.402,
|
9067 |
+
"eval_steps_per_second": 24.083,
|
9068 |
+
"step": 453000
|
9069 |
+
},
|
9070 |
+
{
|
9071 |
+
"epoch": 10.57,
|
9072 |
+
"learning_rate": 1.6803472305467368e-05,
|
9073 |
+
"loss": 0.2515,
|
9074 |
+
"step": 453500
|
9075 |
+
},
|
9076 |
+
{
|
9077 |
+
"epoch": 10.58,
|
9078 |
+
"learning_rate": 1.665907203439568e-05,
|
9079 |
+
"loss": 0.2516,
|
9080 |
+
"step": 454000
|
9081 |
+
},
|
9082 |
+
{
|
9083 |
+
"epoch": 10.58,
|
9084 |
+
"eval_loss": 0.23621916770935059,
|
9085 |
+
"eval_runtime": 1.476,
|
9086 |
+
"eval_samples_per_second": 1489.127,
|
9087 |
+
"eval_steps_per_second": 23.712,
|
9088 |
+
"step": 454000
|
9089 |
+
},
|
9090 |
+
{
|
9091 |
+
"epoch": 10.59,
|
9092 |
+
"learning_rate": 1.6516184637548058e-05,
|
9093 |
+
"loss": 0.2515,
|
9094 |
+
"step": 454500
|
9095 |
+
},
|
9096 |
+
{
|
9097 |
+
"epoch": 10.6,
|
9098 |
+
"learning_rate": 1.6374811677518142e-05,
|
9099 |
+
"loss": 0.2517,
|
9100 |
+
"step": 455000
|
9101 |
+
},
|
9102 |
+
{
|
9103 |
+
"epoch": 10.6,
|
9104 |
+
"eval_loss": 0.23542185127735138,
|
9105 |
+
"eval_runtime": 1.4176,
|
9106 |
+
"eval_samples_per_second": 1550.543,
|
9107 |
+
"eval_steps_per_second": 24.69,
|
9108 |
+
"step": 455000
|
9109 |
+
},
|
9110 |
+
{
|
9111 |
+
"epoch": 10.61,
|
9112 |
+
"learning_rate": 1.6234954700338025e-05,
|
9113 |
+
"loss": 0.2516,
|
9114 |
+
"step": 455500
|
9115 |
+
},
|
9116 |
+
{
|
9117 |
+
"epoch": 10.62,
|
9118 |
+
"learning_rate": 1.6096615235461148e-05,
|
9119 |
+
"loss": 0.2515,
|
9120 |
+
"step": 456000
|
9121 |
+
},
|
9122 |
+
{
|
9123 |
+
"epoch": 10.62,
|
9124 |
+
"eval_loss": 0.2339724451303482,
|
9125 |
+
"eval_runtime": 1.3908,
|
9126 |
+
"eval_samples_per_second": 1580.393,
|
9127 |
+
"eval_steps_per_second": 25.165,
|
9128 |
+
"step": 456000
|
9129 |
+
},
|
9130 |
+
{
|
9131 |
+
"epoch": 10.64,
|
9132 |
+
"learning_rate": 1.59597947957458e-05,
|
9133 |
+
"loss": 0.2519,
|
9134 |
+
"step": 456500
|
9135 |
+
},
|
9136 |
+
{
|
9137 |
+
"epoch": 10.65,
|
9138 |
+
"learning_rate": 1.5824494877438344e-05,
|
9139 |
+
"loss": 0.2515,
|
9140 |
+
"step": 457000
|
9141 |
+
},
|
9142 |
+
{
|
9143 |
+
"epoch": 10.65,
|
9144 |
+
"eval_loss": 0.23481862246990204,
|
9145 |
+
"eval_runtime": 1.4326,
|
9146 |
+
"eval_samples_per_second": 1534.248,
|
9147 |
+
"eval_steps_per_second": 24.431,
|
9148 |
+
"step": 457000
|
9149 |
+
},
|
9150 |
+
{
|
9151 |
+
"epoch": 10.66,
|
9152 |
+
"learning_rate": 1.569071696015702e-05,
|
9153 |
+
"loss": 0.2513,
|
9154 |
+
"step": 457500
|
9155 |
+
},
|
9156 |
+
{
|
9157 |
+
"epoch": 10.67,
|
9158 |
+
"learning_rate": 1.555846250687569e-05,
|
9159 |
+
"loss": 0.2517,
|
9160 |
+
"step": 458000
|
9161 |
+
},
|
9162 |
+
{
|
9163 |
+
"epoch": 10.67,
|
9164 |
+
"eval_loss": 0.23701262474060059,
|
9165 |
+
"eval_runtime": 1.4718,
|
9166 |
+
"eval_samples_per_second": 1493.443,
|
9167 |
+
"eval_steps_per_second": 23.781,
|
9168 |
+
"step": 458000
|
9169 |
+
},
|
9170 |
+
{
|
9171 |
+
"epoch": 10.68,
|
9172 |
+
"learning_rate": 1.542773296390789e-05,
|
9173 |
+
"loss": 0.2518,
|
9174 |
+
"step": 458500
|
9175 |
+
},
|
9176 |
+
{
|
9177 |
+
"epoch": 10.69,
|
9178 |
+
"learning_rate": 1.5298529760890945e-05,
|
9179 |
+
"loss": 0.2514,
|
9180 |
+
"step": 459000
|
9181 |
+
},
|
9182 |
+
{
|
9183 |
+
"epoch": 10.69,
|
9184 |
+
"eval_loss": 0.23413069546222687,
|
9185 |
+
"eval_runtime": 1.4496,
|
9186 |
+
"eval_samples_per_second": 1516.277,
|
9187 |
+
"eval_steps_per_second": 24.145,
|
9188 |
+
"step": 459000
|
9189 |
+
},
|
9190 |
+
{
|
9191 |
+
"epoch": 10.71,
|
9192 |
+
"learning_rate": 1.5170854310770376e-05,
|
9193 |
+
"loss": 0.251,
|
9194 |
+
"step": 459500
|
9195 |
+
},
|
9196 |
+
{
|
9197 |
+
"epoch": 10.72,
|
9198 |
+
"learning_rate": 1.5044708009784457e-05,
|
9199 |
+
"loss": 0.2514,
|
9200 |
+
"step": 460000
|
9201 |
+
},
|
9202 |
+
{
|
9203 |
+
"epoch": 10.72,
|
9204 |
+
"eval_loss": 0.2361001968383789,
|
9205 |
+
"eval_runtime": 1.4246,
|
9206 |
+
"eval_samples_per_second": 1542.873,
|
9207 |
+
"eval_steps_per_second": 24.568,
|
9208 |
+
"step": 460000
|
9209 |
}
|
9210 |
],
|
9211 |
"max_steps": 500000,
|
9212 |
"num_train_epochs": 12,
|
9213 |
+
"total_flos": 1.4696383413717932e+22,
|
9214 |
"trial_name": null,
|
9215 |
"trial_params": null
|
9216 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b1ac07a8fd5b7f18f94228f9f3e9b88d618a44073c792161d704c98526bced7
|
3 |
size 102501541
|