Training in progress, step 8500, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b51ed093e6c15bc18f94991971206e757e14980a9c5a628c37f62695938205d
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d240111503baf208b3258d8cba674443ef0ea2639039331de2ec6efe7692b794
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66b55bdbc49a9bb2c273056a2b2153ade12b910b7c8f359f9751f5d74fa18c9c
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14567
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:19c12ed0af5993f0a876a6b88522655ebb04b9cbe7b4cb312f3eaf595675e1bf
|
3 |
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b67e153873510a6ad6dd38628964444102a91c7ee710a17268ca1406a3ff007
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 3.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1335,6 +1335,38 @@
|
|
1335 |
"eval_samples_per_second": 26.546,
|
1336 |
"eval_steps_per_second": 3.319,
|
1337 |
"step": 8300
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1338 |
}
|
1339 |
],
|
1340 |
"logging_steps": 100,
|
@@ -1342,7 +1374,7 @@
|
|
1342 |
"num_input_tokens_seen": 0,
|
1343 |
"num_train_epochs": 30,
|
1344 |
"save_steps": 100,
|
1345 |
-
"total_flos": 9.
|
1346 |
"train_batch_size": 8,
|
1347 |
"trial_name": null,
|
1348 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 3.5781940644074934,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 8500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1335 |
"eval_samples_per_second": 26.546,
|
1336 |
"eval_steps_per_second": 3.319,
|
1337 |
"step": 8300
|
1338 |
+
},
|
1339 |
+
{
|
1340 |
+
"epoch": 3.54,
|
1341 |
+
"grad_norm": 1.3938627243041992,
|
1342 |
+
"learning_rate": 4.442332155477032e-05,
|
1343 |
+
"loss": 2.1757,
|
1344 |
+
"step": 8400
|
1345 |
+
},
|
1346 |
+
{
|
1347 |
+
"epoch": 3.54,
|
1348 |
+
"eval_cer": 0.5164395213327597,
|
1349 |
+
"eval_loss": 1.6015843152999878,
|
1350 |
+
"eval_runtime": 375.0822,
|
1351 |
+
"eval_samples_per_second": 25.269,
|
1352 |
+
"eval_steps_per_second": 3.159,
|
1353 |
+
"step": 8400
|
1354 |
+
},
|
1355 |
+
{
|
1356 |
+
"epoch": 3.58,
|
1357 |
+
"grad_norm": 7.331507682800293,
|
1358 |
+
"learning_rate": 4.4352650176678445e-05,
|
1359 |
+
"loss": 2.3463,
|
1360 |
+
"step": 8500
|
1361 |
+
},
|
1362 |
+
{
|
1363 |
+
"epoch": 3.58,
|
1364 |
+
"eval_cer": 0.4924132806695084,
|
1365 |
+
"eval_loss": 3.185905933380127,
|
1366 |
+
"eval_runtime": 357.5938,
|
1367 |
+
"eval_samples_per_second": 26.505,
|
1368 |
+
"eval_steps_per_second": 3.314,
|
1369 |
+
"step": 8500
|
1370 |
}
|
1371 |
],
|
1372 |
"logging_steps": 100,
|
|
|
1374 |
"num_input_tokens_seen": 0,
|
1375 |
"num_train_epochs": 30,
|
1376 |
"save_steps": 100,
|
1377 |
+
"total_flos": 9.318809128668452e+19,
|
1378 |
"train_batch_size": 8,
|
1379 |
"trial_name": null,
|
1380 |
"trial_params": null
|