Training in progress, step 51900, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e17ce21e8ae5702d27ee13ccfe698b9519f9191b1c9caff5644e90a5bb5c25d9
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a17a30230270a34456dc1ba21c1ead1f7aa39a64e33c3faa6efbf4b18d99cb5f
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dde4f5875f81547bdef6920d87fd6a84a4cb8fc9273b6febfeb843f0442e8a66
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4a50bec4064f680866f4e14ae49d80472966a4af2720d1f7bf3be3feeaef2f0
|
3 |
+
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5317f1b977bf0c5dd9e58209a5a3433f99d034b727592196ceb70ddd3bde3377
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 21.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -8247,6 +8247,70 @@
|
|
8247 |
"eval_samples_per_second": 24.206,
|
8248 |
"eval_steps_per_second": 3.026,
|
8249 |
"step": 51500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8250 |
}
|
8251 |
],
|
8252 |
"logging_steps": 100,
|
@@ -8254,7 +8318,7 @@
|
|
8254 |
"num_input_tokens_seen": 0,
|
8255 |
"num_train_epochs": 30,
|
8256 |
"save_steps": 100,
|
8257 |
-
"total_flos": 5.
|
8258 |
"train_batch_size": 8,
|
8259 |
"trial_name": null,
|
8260 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 21.848031993264577,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 51900,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
8247 |
"eval_samples_per_second": 24.206,
|
8248 |
"eval_steps_per_second": 3.026,
|
8249 |
"step": 51500
|
8250 |
+
},
|
8251 |
+
{
|
8252 |
+
"epoch": 21.72,
|
8253 |
+
"grad_norm": 5.330277919769287,
|
8254 |
+
"learning_rate": 1.3903180212014135e-05,
|
8255 |
+
"loss": 0.3529,
|
8256 |
+
"step": 51600
|
8257 |
+
},
|
8258 |
+
{
|
8259 |
+
"epoch": 21.72,
|
8260 |
+
"eval_cer": 0.34077421688631654,
|
8261 |
+
"eval_loss": 2.4329657554626465,
|
8262 |
+
"eval_runtime": 422.4059,
|
8263 |
+
"eval_samples_per_second": 22.438,
|
8264 |
+
"eval_steps_per_second": 2.805,
|
8265 |
+
"step": 51600
|
8266 |
+
},
|
8267 |
+
{
|
8268 |
+
"epoch": 21.76,
|
8269 |
+
"grad_norm": 2.12868595123291,
|
8270 |
+
"learning_rate": 1.3833215547703182e-05,
|
8271 |
+
"loss": 0.3733,
|
8272 |
+
"step": 51700
|
8273 |
+
},
|
8274 |
+
{
|
8275 |
+
"epoch": 21.76,
|
8276 |
+
"eval_cer": 0.34047358335614564,
|
8277 |
+
"eval_loss": 2.4534361362457275,
|
8278 |
+
"eval_runtime": 382.1988,
|
8279 |
+
"eval_samples_per_second": 24.799,
|
8280 |
+
"eval_steps_per_second": 3.1,
|
8281 |
+
"step": 51700
|
8282 |
+
},
|
8283 |
+
{
|
8284 |
+
"epoch": 21.81,
|
8285 |
+
"grad_norm": 1.6962841749191284,
|
8286 |
+
"learning_rate": 1.3762544169611308e-05,
|
8287 |
+
"loss": 0.3523,
|
8288 |
+
"step": 51800
|
8289 |
+
},
|
8290 |
+
{
|
8291 |
+
"epoch": 21.81,
|
8292 |
+
"eval_cer": 0.3399554182472332,
|
8293 |
+
"eval_loss": 2.549654483795166,
|
8294 |
+
"eval_runtime": 415.7902,
|
8295 |
+
"eval_samples_per_second": 22.795,
|
8296 |
+
"eval_steps_per_second": 2.85,
|
8297 |
+
"step": 51800
|
8298 |
+
},
|
8299 |
+
{
|
8300 |
+
"epoch": 21.85,
|
8301 |
+
"grad_norm": 1.762255072593689,
|
8302 |
+
"learning_rate": 1.3691872791519436e-05,
|
8303 |
+
"loss": 0.3523,
|
8304 |
+
"step": 51900
|
8305 |
+
},
|
8306 |
+
{
|
8307 |
+
"epoch": 21.85,
|
8308 |
+
"eval_cer": 0.33849624574713544,
|
8309 |
+
"eval_loss": 2.5730228424072266,
|
8310 |
+
"eval_runtime": 391.5055,
|
8311 |
+
"eval_samples_per_second": 24.209,
|
8312 |
+
"eval_steps_per_second": 3.027,
|
8313 |
+
"step": 51900
|
8314 |
}
|
8315 |
],
|
8316 |
"logging_steps": 100,
|
|
|
8318 |
"num_input_tokens_seen": 0,
|
8319 |
"num_train_epochs": 30,
|
8320 |
"save_steps": 100,
|
8321 |
+
"total_flos": 5.6872995302509294e+20,
|
8322 |
"train_batch_size": 8,
|
8323 |
"trial_name": null,
|
8324 |
"trial_params": null
|