Training in progress, epoch 76, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559424792
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4af379246575accc58f6abfac00aee925ce6b65883c8632448c83ee50ddfd07
|
3 |
size 559424792
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1118926970
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:930083a4131ce65cdc8b356487bf1b813c159fa19cc01194a70d602dd9b94b36
|
3 |
size 1118926970
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:914809037b3f88a1a2608685ab6ce1391a78e990c3ce33067466cc03d6a8480d
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f90a182f176031c61cd117d508af79c4ff26bf3b95484e9bbe4017a087414d71
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 12.716951370239258,
|
3 |
"best_model_checkpoint": "/kaggle/working/output/checkpoint-88740",
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -7453,6 +7453,105 @@
|
|
7453 |
"eval_samples_per_second": 29.556,
|
7454 |
"eval_steps_per_second": 3.714,
|
7455 |
"step": 97875
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7456 |
}
|
7457 |
],
|
7458 |
"logging_steps": 100,
|
@@ -7467,7 +7566,7 @@
|
|
7467 |
"early_stopping_threshold": 0.0
|
7468 |
},
|
7469 |
"attributes": {
|
7470 |
-
"early_stopping_patience_counter":
|
7471 |
}
|
7472 |
},
|
7473 |
"TrainerControl": {
|
@@ -7481,7 +7580,7 @@
|
|
7481 |
"attributes": {}
|
7482 |
}
|
7483 |
},
|
7484 |
-
"total_flos": 4.
|
7485 |
"train_batch_size": 8,
|
7486 |
"trial_name": null,
|
7487 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 12.716951370239258,
|
3 |
"best_model_checkpoint": "/kaggle/working/output/checkpoint-88740",
|
4 |
+
"epoch": 76.0,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 99180,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
7453 |
"eval_samples_per_second": 29.556,
|
7454 |
"eval_steps_per_second": 3.714,
|
7455 |
"step": 97875
|
7456 |
+
},
|
7457 |
+
{
|
7458 |
+
"epoch": 75.01915708812261,
|
7459 |
+
"grad_norm": 0.9783554673194885,
|
7460 |
+
"learning_rate": 3.1317049808429124e-06,
|
7461 |
+
"loss": 11.7455,
|
7462 |
+
"step": 97900
|
7463 |
+
},
|
7464 |
+
{
|
7465 |
+
"epoch": 75.09578544061303,
|
7466 |
+
"grad_norm": 1.4434301853179932,
|
7467 |
+
"learning_rate": 3.0838122605363985e-06,
|
7468 |
+
"loss": 11.99,
|
7469 |
+
"step": 98000
|
7470 |
+
},
|
7471 |
+
{
|
7472 |
+
"epoch": 75.17241379310344,
|
7473 |
+
"grad_norm": 1.2560200691223145,
|
7474 |
+
"learning_rate": 3.035919540229885e-06,
|
7475 |
+
"loss": 11.8445,
|
7476 |
+
"step": 98100
|
7477 |
+
},
|
7478 |
+
{
|
7479 |
+
"epoch": 75.24904214559388,
|
7480 |
+
"grad_norm": 1.123687982559204,
|
7481 |
+
"learning_rate": 2.988026819923372e-06,
|
7482 |
+
"loss": 11.8894,
|
7483 |
+
"step": 98200
|
7484 |
+
},
|
7485 |
+
{
|
7486 |
+
"epoch": 75.32567049808429,
|
7487 |
+
"grad_norm": 1.2393250465393066,
|
7488 |
+
"learning_rate": 2.9401340996168583e-06,
|
7489 |
+
"loss": 11.7591,
|
7490 |
+
"step": 98300
|
7491 |
+
},
|
7492 |
+
{
|
7493 |
+
"epoch": 75.40229885057471,
|
7494 |
+
"grad_norm": 2.023070812225342,
|
7495 |
+
"learning_rate": 2.892241379310345e-06,
|
7496 |
+
"loss": 11.7083,
|
7497 |
+
"step": 98400
|
7498 |
+
},
|
7499 |
+
{
|
7500 |
+
"epoch": 75.47892720306514,
|
7501 |
+
"grad_norm": 1.7746585607528687,
|
7502 |
+
"learning_rate": 2.8443486590038316e-06,
|
7503 |
+
"loss": 12.0237,
|
7504 |
+
"step": 98500
|
7505 |
+
},
|
7506 |
+
{
|
7507 |
+
"epoch": 75.55555555555556,
|
7508 |
+
"grad_norm": 1.6215800046920776,
|
7509 |
+
"learning_rate": 2.796455938697318e-06,
|
7510 |
+
"loss": 11.8271,
|
7511 |
+
"step": 98600
|
7512 |
+
},
|
7513 |
+
{
|
7514 |
+
"epoch": 75.63218390804597,
|
7515 |
+
"grad_norm": 2.3727614879608154,
|
7516 |
+
"learning_rate": 2.7490421455938698e-06,
|
7517 |
+
"loss": 11.9133,
|
7518 |
+
"step": 98700
|
7519 |
+
},
|
7520 |
+
{
|
7521 |
+
"epoch": 75.7088122605364,
|
7522 |
+
"grad_norm": 1.562569260597229,
|
7523 |
+
"learning_rate": 2.7011494252873562e-06,
|
7524 |
+
"loss": 11.8886,
|
7525 |
+
"step": 98800
|
7526 |
+
},
|
7527 |
+
{
|
7528 |
+
"epoch": 75.78544061302682,
|
7529 |
+
"grad_norm": 0.8996521830558777,
|
7530 |
+
"learning_rate": 2.653256704980843e-06,
|
7531 |
+
"loss": 11.6606,
|
7532 |
+
"step": 98900
|
7533 |
+
},
|
7534 |
+
{
|
7535 |
+
"epoch": 75.86206896551724,
|
7536 |
+
"grad_norm": 1.6331411600112915,
|
7537 |
+
"learning_rate": 2.6053639846743296e-06,
|
7538 |
+
"loss": 12.057,
|
7539 |
+
"step": 99000
|
7540 |
+
},
|
7541 |
+
{
|
7542 |
+
"epoch": 75.93869731800767,
|
7543 |
+
"grad_norm": 1.2690104246139526,
|
7544 |
+
"learning_rate": 2.5574712643678165e-06,
|
7545 |
+
"loss": 11.9791,
|
7546 |
+
"step": 99100
|
7547 |
+
},
|
7548 |
+
{
|
7549 |
+
"epoch": 76.0,
|
7550 |
+
"eval_loss": 12.717323303222656,
|
7551 |
+
"eval_runtime": 44.1546,
|
7552 |
+
"eval_samples_per_second": 29.555,
|
7553 |
+
"eval_steps_per_second": 3.714,
|
7554 |
+
"step": 99180
|
7555 |
}
|
7556 |
],
|
7557 |
"logging_steps": 100,
|
|
|
7566 |
"early_stopping_threshold": 0.0
|
7567 |
},
|
7568 |
"attributes": {
|
7569 |
+
"early_stopping_patience_counter": 8
|
7570 |
}
|
7571 |
},
|
7572 |
"TrainerControl": {
|
|
|
7580 |
"attributes": {}
|
7581 |
}
|
7582 |
},
|
7583 |
+
"total_flos": 4.620861214306099e+16,
|
7584 |
"train_batch_size": 8,
|
7585 |
"trial_name": null,
|
7586 |
"trial_params": null
|