Training in progress, epoch 75, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559424792
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eaef454aca5a90004a1f7fa735006aaf07a09b80a0c6a02c48a5c9954a9c62a8
|
3 |
size 559424792
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1118926970
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6db55a8256108f51c16b7c04a0dda48b0840ae7fedf6f98128494c32336c189e
|
3 |
size 1118926970
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95d7a84ac41b5d0b0849140ed4b55c3d3d4144a4583408c6d56207542a111683
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e895d61aa04b2cc52d453ad8ef1da3d7dbc854daaaeb69662e0cdbedb748f6ab
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 12.716951370239258,
|
3 |
"best_model_checkpoint": "/kaggle/working/output/checkpoint-88740",
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -7354,6 +7354,105 @@
|
|
7354 |
"eval_samples_per_second": 29.56,
|
7355 |
"eval_steps_per_second": 3.715,
|
7356 |
"step": 96570
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7357 |
}
|
7358 |
],
|
7359 |
"logging_steps": 100,
|
@@ -7368,7 +7467,7 @@
|
|
7368 |
"early_stopping_threshold": 0.0
|
7369 |
},
|
7370 |
"attributes": {
|
7371 |
-
"early_stopping_patience_counter":
|
7372 |
}
|
7373 |
},
|
7374 |
"TrainerControl": {
|
@@ -7382,7 +7481,7 @@
|
|
7382 |
"attributes": {}
|
7383 |
}
|
7384 |
},
|
7385 |
-
"total_flos": 4.
|
7386 |
"train_batch_size": 8,
|
7387 |
"trial_name": null,
|
7388 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 12.716951370239258,
|
3 |
"best_model_checkpoint": "/kaggle/working/output/checkpoint-88740",
|
4 |
+
"epoch": 75.0,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 97875,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
7354 |
"eval_samples_per_second": 29.56,
|
7355 |
"eval_steps_per_second": 3.715,
|
7356 |
"step": 96570
|
7357 |
+
},
|
7358 |
+
{
|
7359 |
+
"epoch": 74.02298850574712,
|
7360 |
+
"grad_norm": 1.6064398288726807,
|
7361 |
+
"learning_rate": 3.7543103448275864e-06,
|
7362 |
+
"loss": 11.8828,
|
7363 |
+
"step": 96600
|
7364 |
+
},
|
7365 |
+
{
|
7366 |
+
"epoch": 74.09961685823755,
|
7367 |
+
"grad_norm": 2.088803768157959,
|
7368 |
+
"learning_rate": 3.7064176245210733e-06,
|
7369 |
+
"loss": 11.7576,
|
7370 |
+
"step": 96700
|
7371 |
+
},
|
7372 |
+
{
|
7373 |
+
"epoch": 74.17624521072797,
|
7374 |
+
"grad_norm": 1.5417454242706299,
|
7375 |
+
"learning_rate": 3.6585249042145593e-06,
|
7376 |
+
"loss": 11.9239,
|
7377 |
+
"step": 96800
|
7378 |
+
},
|
7379 |
+
{
|
7380 |
+
"epoch": 74.25287356321839,
|
7381 |
+
"grad_norm": 1.5983319282531738,
|
7382 |
+
"learning_rate": 3.610632183908046e-06,
|
7383 |
+
"loss": 11.8119,
|
7384 |
+
"step": 96900
|
7385 |
+
},
|
7386 |
+
{
|
7387 |
+
"epoch": 74.32950191570882,
|
7388 |
+
"grad_norm": 3.7642099857330322,
|
7389 |
+
"learning_rate": 3.5627394636015326e-06,
|
7390 |
+
"loss": 11.8259,
|
7391 |
+
"step": 97000
|
7392 |
+
},
|
7393 |
+
{
|
7394 |
+
"epoch": 74.40613026819923,
|
7395 |
+
"grad_norm": 1.5149072408676147,
|
7396 |
+
"learning_rate": 3.5148467432950195e-06,
|
7397 |
+
"loss": 11.9898,
|
7398 |
+
"step": 97100
|
7399 |
+
},
|
7400 |
+
{
|
7401 |
+
"epoch": 74.48275862068965,
|
7402 |
+
"grad_norm": 0.9915036559104919,
|
7403 |
+
"learning_rate": 3.4669540229885055e-06,
|
7404 |
+
"loss": 11.7665,
|
7405 |
+
"step": 97200
|
7406 |
+
},
|
7407 |
+
{
|
7408 |
+
"epoch": 74.55938697318008,
|
7409 |
+
"grad_norm": 1.2745176553726196,
|
7410 |
+
"learning_rate": 3.4190613026819924e-06,
|
7411 |
+
"loss": 11.9657,
|
7412 |
+
"step": 97300
|
7413 |
+
},
|
7414 |
+
{
|
7415 |
+
"epoch": 74.6360153256705,
|
7416 |
+
"grad_norm": 2.390751600265503,
|
7417 |
+
"learning_rate": 3.3711685823754793e-06,
|
7418 |
+
"loss": 11.6856,
|
7419 |
+
"step": 97400
|
7420 |
+
},
|
7421 |
+
{
|
7422 |
+
"epoch": 74.71264367816092,
|
7423 |
+
"grad_norm": 2.2279295921325684,
|
7424 |
+
"learning_rate": 3.3232758620689653e-06,
|
7425 |
+
"loss": 11.7551,
|
7426 |
+
"step": 97500
|
7427 |
+
},
|
7428 |
+
{
|
7429 |
+
"epoch": 74.78927203065135,
|
7430 |
+
"grad_norm": 1.8389006853103638,
|
7431 |
+
"learning_rate": 3.275383141762452e-06,
|
7432 |
+
"loss": 12.0037,
|
7433 |
+
"step": 97600
|
7434 |
+
},
|
7435 |
+
{
|
7436 |
+
"epoch": 74.86590038314176,
|
7437 |
+
"grad_norm": 1.4288936853408813,
|
7438 |
+
"learning_rate": 3.2274904214559387e-06,
|
7439 |
+
"loss": 12.0561,
|
7440 |
+
"step": 97700
|
7441 |
+
},
|
7442 |
+
{
|
7443 |
+
"epoch": 74.94252873563218,
|
7444 |
+
"grad_norm": 1.037800669670105,
|
7445 |
+
"learning_rate": 3.1795977011494255e-06,
|
7446 |
+
"loss": 11.9257,
|
7447 |
+
"step": 97800
|
7448 |
+
},
|
7449 |
+
{
|
7450 |
+
"epoch": 75.0,
|
7451 |
+
"eval_loss": 12.724896430969238,
|
7452 |
+
"eval_runtime": 44.1538,
|
7453 |
+
"eval_samples_per_second": 29.556,
|
7454 |
+
"eval_steps_per_second": 3.714,
|
7455 |
+
"step": 97875
|
7456 |
}
|
7457 |
],
|
7458 |
"logging_steps": 100,
|
|
|
7467 |
"early_stopping_threshold": 0.0
|
7468 |
},
|
7469 |
"attributes": {
|
7470 |
+
"early_stopping_patience_counter": 7
|
7471 |
}
|
7472 |
},
|
7473 |
"TrainerControl": {
|
|
|
7481 |
"attributes": {}
|
7482 |
}
|
7483 |
},
|
7484 |
+
"total_flos": 4.560071444651981e+16,
|
7485 |
"train_batch_size": 8,
|
7486 |
"trial_name": null,
|
7487 |
"trial_params": null
|