Training in progress, step 370000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 202194449
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df6d3d3f9674103740b8f59e2a1f3f36fbba555fa4f14347ef60833e8c7c8d0f
|
3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e10781209da6498c21422e9093158ac9bd1d4e98d2f25e9bf1fc4805ab12c7d1
|
3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f631839d0a0f079647209a72e733a12f2b0a5a840f484d4d4c8016cacc9ffbe
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f631839d0a0f079647209a72e733a12f2b0a5a840f484d4d4c8016cacc9ffbe
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f631839d0a0f079647209a72e733a12f2b0a5a840f484d4d4c8016cacc9ffbe
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f631839d0a0f079647209a72e733a12f2b0a5a840f484d4d4c8016cacc9ffbe
|
3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f631839d0a0f079647209a72e733a12f2b0a5a840f484d4d4c8016cacc9ffbe
|
3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f631839d0a0f079647209a72e733a12f2b0a5a840f484d4d4c8016cacc9ffbe
|
3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f631839d0a0f079647209a72e733a12f2b0a5a840f484d4d4c8016cacc9ffbe
|
3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f631839d0a0f079647209a72e733a12f2b0a5a840f484d4d4c8016cacc9ffbe
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:580594f4768ecd1bd92e87dca92e874365a397305161c1f8781b79f1f0b613ba
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 8.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -7206,11 +7206,211 @@
|
|
7206 |
"eval_samples_per_second": 1504.148,
|
7207 |
"eval_steps_per_second": 23.951,
|
7208 |
"step": 360000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7209 |
}
|
7210 |
],
|
7211 |
"max_steps": 500000,
|
7212 |
"num_train_epochs": 12,
|
7213 |
-
"total_flos": 1.
|
7214 |
"trial_name": null,
|
7215 |
"trial_params": null
|
7216 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 8.620890514690464,
|
5 |
+
"global_step": 370000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
7206 |
"eval_samples_per_second": 1504.148,
|
7207 |
"eval_steps_per_second": 23.951,
|
7208 |
"step": 360000
|
7209 |
+
},
|
7210 |
+
{
|
7211 |
+
"epoch": 8.4,
|
7212 |
+
"learning_rate": 6.746049527150238e-05,
|
7213 |
+
"loss": 0.2596,
|
7214 |
+
"step": 360500
|
7215 |
+
},
|
7216 |
+
{
|
7217 |
+
"epoch": 8.41,
|
7218 |
+
"learning_rate": 6.707871421704209e-05,
|
7219 |
+
"loss": 0.2596,
|
7220 |
+
"step": 361000
|
7221 |
+
},
|
7222 |
+
{
|
7223 |
+
"epoch": 8.41,
|
7224 |
+
"eval_loss": 0.24351955950260162,
|
7225 |
+
"eval_runtime": 1.4786,
|
7226 |
+
"eval_samples_per_second": 1486.571,
|
7227 |
+
"eval_steps_per_second": 23.672,
|
7228 |
+
"step": 361000
|
7229 |
+
},
|
7230 |
+
{
|
7231 |
+
"epoch": 8.42,
|
7232 |
+
"learning_rate": 6.669789465567683e-05,
|
7233 |
+
"loss": 0.2593,
|
7234 |
+
"step": 361500
|
7235 |
+
},
|
7236 |
+
{
|
7237 |
+
"epoch": 8.43,
|
7238 |
+
"learning_rate": 6.631804075198838e-05,
|
7239 |
+
"loss": 0.2595,
|
7240 |
+
"step": 362000
|
7241 |
+
},
|
7242 |
+
{
|
7243 |
+
"epoch": 8.43,
|
7244 |
+
"eval_loss": 0.2438839077949524,
|
7245 |
+
"eval_runtime": 1.4636,
|
7246 |
+
"eval_samples_per_second": 1501.733,
|
7247 |
+
"eval_steps_per_second": 23.913,
|
7248 |
+
"step": 362000
|
7249 |
+
},
|
7250 |
+
{
|
7251 |
+
"epoch": 8.45,
|
7252 |
+
"learning_rate": 6.593915665999816e-05,
|
7253 |
+
"loss": 0.2593,
|
7254 |
+
"step": 362500
|
7255 |
+
},
|
7256 |
+
{
|
7257 |
+
"epoch": 8.46,
|
7258 |
+
"learning_rate": 6.55612465231219e-05,
|
7259 |
+
"loss": 0.2593,
|
7260 |
+
"step": 363000
|
7261 |
+
},
|
7262 |
+
{
|
7263 |
+
"epoch": 8.46,
|
7264 |
+
"eval_loss": 0.24523594975471497,
|
7265 |
+
"eval_runtime": 1.4077,
|
7266 |
+
"eval_samples_per_second": 1561.38,
|
7267 |
+
"eval_steps_per_second": 24.863,
|
7268 |
+
"step": 363000
|
7269 |
+
},
|
7270 |
+
{
|
7271 |
+
"epoch": 8.47,
|
7272 |
+
"learning_rate": 6.518431447412434e-05,
|
7273 |
+
"loss": 0.2593,
|
7274 |
+
"step": 363500
|
7275 |
+
},
|
7276 |
+
{
|
7277 |
+
"epoch": 8.48,
|
7278 |
+
"learning_rate": 6.480836463507392e-05,
|
7279 |
+
"loss": 0.2593,
|
7280 |
+
"step": 364000
|
7281 |
+
},
|
7282 |
+
{
|
7283 |
+
"epoch": 8.48,
|
7284 |
+
"eval_loss": 0.24123093485832214,
|
7285 |
+
"eval_runtime": 1.4526,
|
7286 |
+
"eval_samples_per_second": 1513.098,
|
7287 |
+
"eval_steps_per_second": 24.094,
|
7288 |
+
"step": 364000
|
7289 |
+
},
|
7290 |
+
{
|
7291 |
+
"epoch": 8.49,
|
7292 |
+
"learning_rate": 6.443340111729786e-05,
|
7293 |
+
"loss": 0.259,
|
7294 |
+
"step": 364500
|
7295 |
+
},
|
7296 |
+
{
|
7297 |
+
"epoch": 8.5,
|
7298 |
+
"learning_rate": 6.405942802133713e-05,
|
7299 |
+
"loss": 0.2592,
|
7300 |
+
"step": 365000
|
7301 |
+
},
|
7302 |
+
{
|
7303 |
+
"epoch": 8.5,
|
7304 |
+
"eval_loss": 0.24300608038902283,
|
7305 |
+
"eval_runtime": 1.5063,
|
7306 |
+
"eval_samples_per_second": 1459.248,
|
7307 |
+
"eval_steps_per_second": 23.236,
|
7308 |
+
"step": 365000
|
7309 |
+
},
|
7310 |
+
{
|
7311 |
+
"epoch": 8.52,
|
7312 |
+
"learning_rate": 6.36864494369016e-05,
|
7313 |
+
"loss": 0.259,
|
7314 |
+
"step": 365500
|
7315 |
+
},
|
7316 |
+
{
|
7317 |
+
"epoch": 8.53,
|
7318 |
+
"learning_rate": 6.331446944282534e-05,
|
7319 |
+
"loss": 0.2587,
|
7320 |
+
"step": 366000
|
7321 |
+
},
|
7322 |
+
{
|
7323 |
+
"epoch": 8.53,
|
7324 |
+
"eval_loss": 0.24051520228385925,
|
7325 |
+
"eval_runtime": 1.4527,
|
7326 |
+
"eval_samples_per_second": 1513.01,
|
7327 |
+
"eval_steps_per_second": 24.093,
|
7328 |
+
"step": 366000
|
7329 |
+
},
|
7330 |
+
{
|
7331 |
+
"epoch": 8.54,
|
7332 |
+
"learning_rate": 6.294349210702188e-05,
|
7333 |
+
"loss": 0.2589,
|
7334 |
+
"step": 366500
|
7335 |
+
},
|
7336 |
+
{
|
7337 |
+
"epoch": 8.55,
|
7338 |
+
"learning_rate": 6.257352148643998e-05,
|
7339 |
+
"loss": 0.2591,
|
7340 |
+
"step": 367000
|
7341 |
+
},
|
7342 |
+
{
|
7343 |
+
"epoch": 8.55,
|
7344 |
+
"eval_loss": 0.2446797788143158,
|
7345 |
+
"eval_runtime": 1.4184,
|
7346 |
+
"eval_samples_per_second": 1549.604,
|
7347 |
+
"eval_steps_per_second": 24.675,
|
7348 |
+
"step": 367000
|
7349 |
+
},
|
7350 |
+
{
|
7351 |
+
"epoch": 8.56,
|
7352 |
+
"learning_rate": 6.220456162701908e-05,
|
7353 |
+
"loss": 0.2589,
|
7354 |
+
"step": 367500
|
7355 |
+
},
|
7356 |
+
{
|
7357 |
+
"epoch": 8.57,
|
7358 |
+
"learning_rate": 6.183661656364515e-05,
|
7359 |
+
"loss": 0.2586,
|
7360 |
+
"step": 368000
|
7361 |
+
},
|
7362 |
+
{
|
7363 |
+
"epoch": 8.57,
|
7364 |
+
"eval_loss": 0.24278691411018372,
|
7365 |
+
"eval_runtime": 1.4508,
|
7366 |
+
"eval_samples_per_second": 1515.055,
|
7367 |
+
"eval_steps_per_second": 24.125,
|
7368 |
+
"step": 368000
|
7369 |
+
},
|
7370 |
+
{
|
7371 |
+
"epoch": 8.59,
|
7372 |
+
"learning_rate": 6.146969032010631e-05,
|
7373 |
+
"loss": 0.2589,
|
7374 |
+
"step": 368500
|
7375 |
+
},
|
7376 |
+
{
|
7377 |
+
"epoch": 8.6,
|
7378 |
+
"learning_rate": 6.110378690904928e-05,
|
7379 |
+
"loss": 0.259,
|
7380 |
+
"step": 369000
|
7381 |
+
},
|
7382 |
+
{
|
7383 |
+
"epoch": 8.6,
|
7384 |
+
"eval_loss": 0.24149444699287415,
|
7385 |
+
"eval_runtime": 1.4509,
|
7386 |
+
"eval_samples_per_second": 1514.912,
|
7387 |
+
"eval_steps_per_second": 24.123,
|
7388 |
+
"step": 369000
|
7389 |
+
},
|
7390 |
+
{
|
7391 |
+
"epoch": 8.61,
|
7392 |
+
"learning_rate": 6.073891033193507e-05,
|
7393 |
+
"loss": 0.259,
|
7394 |
+
"step": 369500
|
7395 |
+
},
|
7396 |
+
{
|
7397 |
+
"epoch": 8.62,
|
7398 |
+
"learning_rate": 6.037506457899553e-05,
|
7399 |
+
"loss": 0.2588,
|
7400 |
+
"step": 370000
|
7401 |
+
},
|
7402 |
+
{
|
7403 |
+
"epoch": 8.62,
|
7404 |
+
"eval_loss": 0.23884567618370056,
|
7405 |
+
"eval_runtime": 1.4545,
|
7406 |
+
"eval_samples_per_second": 1511.147,
|
7407 |
+
"eval_steps_per_second": 24.063,
|
7408 |
+
"step": 370000
|
7409 |
}
|
7410 |
],
|
7411 |
"max_steps": 500000,
|
7412 |
"num_train_epochs": 12,
|
7413 |
+
"total_flos": 1.1821004158729246e+22,
|
7414 |
"trial_name": null,
|
7415 |
"trial_params": null
|
7416 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e10781209da6498c21422e9093158ac9bd1d4e98d2f25e9bf1fc4805ab12c7d1
|
3 |
size 102501541
|