Training in progress, step 15500, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 438032472
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9087679e21c72a03308c1f1042fa388a26aa895e37052d6ba8f2760f8ed9d62c
|
3 |
size 438032472
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 876185978
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:179ba7759c2da654ccc0155ea7d3cd89eacd0e0a80f00b4769a3bda069f1542b
|
3 |
size 876185978
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71854c462585f75923baf32808b59ce1d40f6f83c6c3ee96dd1e08a419477550
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02efed2160d416430cfdc4b1c07b94736d779e3a16a8f0c18ef61810e8e70171
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.47014790773391724,
|
3 |
"best_model_checkpoint": "results/checkpoint-14000",
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -457,6 +457,21 @@
|
|
457 |
"eval_samples_per_second": 233.634,
|
458 |
"eval_steps_per_second": 3.662,
|
459 |
"step": 15000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
460 |
}
|
461 |
],
|
462 |
"logging_steps": 500,
|
@@ -476,7 +491,7 @@
|
|
476 |
"attributes": {}
|
477 |
}
|
478 |
},
|
479 |
-
"total_flos": 1.
|
480 |
"train_batch_size": 32,
|
481 |
"trial_name": null,
|
482 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.47014790773391724,
|
3 |
"best_model_checkpoint": "results/checkpoint-14000",
|
4 |
+
"epoch": 1.0311335816923897,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 15500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
457 |
"eval_samples_per_second": 233.634,
|
458 |
"eval_steps_per_second": 3.662,
|
459 |
"step": 15000
|
460 |
+
},
|
461 |
+
{
|
462 |
+
"epoch": 1.0311335816923897,
|
463 |
+
"grad_norm": 5.288357257843018,
|
464 |
+
"learning_rate": 3.281444030512684e-05,
|
465 |
+
"loss": 0.3673,
|
466 |
+
"step": 15500
|
467 |
+
},
|
468 |
+
{
|
469 |
+
"epoch": 1.0311335816923897,
|
470 |
+
"eval_loss": 0.47548824548721313,
|
471 |
+
"eval_runtime": 40.9898,
|
472 |
+
"eval_samples_per_second": 253.722,
|
473 |
+
"eval_steps_per_second": 3.977,
|
474 |
+
"step": 15500
|
475 |
}
|
476 |
],
|
477 |
"logging_steps": 500,
|
|
|
491 |
"attributes": {}
|
492 |
}
|
493 |
},
|
494 |
+
"total_flos": 1.305248890611794e+17,
|
495 |
"train_batch_size": 32,
|
496 |
"trial_name": null,
|
497 |
"trial_params": null
|