Training in progress, step 14700, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e8576ccb82b21bcc2b8addf4916f2f16bf5fac1bf334cb43ac6adf88efbcf106
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:650672fda6e5ee3003c98bce94c3d3b2333fe3a6cbc8b34be4d9defa333723aa
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:23908ca485378ebcbd5221e3f9a4dfc2825be11cae4030a2167b67baabd7bf90
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:44e3479363f6845648909d8b217e2aa3767b3ef51cc2decc24f7fd96a7818af1
|
3 |
+
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:73db81e910b9ea3c99b0a41dadeb749a85548b72d68b5ef751adf848d3f6ccc4
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 6.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2295,6 +2295,70 @@
|
|
2295 |
"eval_samples_per_second": 26.237,
|
2296 |
"eval_steps_per_second": 3.28,
|
2297 |
"step": 14300
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2298 |
}
|
2299 |
],
|
2300 |
"logging_steps": 100,
|
@@ -2302,7 +2366,7 @@
|
|
2302 |
"num_input_tokens_seen": 0,
|
2303 |
"num_train_epochs": 30,
|
2304 |
"save_steps": 100,
|
2305 |
-
"total_flos": 1.
|
2306 |
"train_batch_size": 8,
|
2307 |
"trial_name": null,
|
2308 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 6.188170911387076,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 14700,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2295 |
"eval_samples_per_second": 26.237,
|
2296 |
"eval_steps_per_second": 3.28,
|
2297 |
"step": 14300
|
2298 |
+
},
|
2299 |
+
{
|
2300 |
+
"epoch": 6.06,
|
2301 |
+
"grad_norm": 2.6403534412384033,
|
2302 |
+
"learning_rate": 4.018445229681979e-05,
|
2303 |
+
"loss": 1.4007,
|
2304 |
+
"step": 14400
|
2305 |
+
},
|
2306 |
+
{
|
2307 |
+
"epoch": 6.06,
|
2308 |
+
"eval_cer": 0.4687561104376051,
|
2309 |
+
"eval_loss": 2.6799304485321045,
|
2310 |
+
"eval_runtime": 377.8259,
|
2311 |
+
"eval_samples_per_second": 25.086,
|
2312 |
+
"eval_steps_per_second": 3.136,
|
2313 |
+
"step": 14400
|
2314 |
+
},
|
2315 |
+
{
|
2316 |
+
"epoch": 6.1,
|
2317 |
+
"grad_norm": 16.431684494018555,
|
2318 |
+
"learning_rate": 4.011378091872792e-05,
|
2319 |
+
"loss": 1.3673,
|
2320 |
+
"step": 14500
|
2321 |
+
},
|
2322 |
+
{
|
2323 |
+
"epoch": 6.1,
|
2324 |
+
"eval_cer": 0.4627141097336827,
|
2325 |
+
"eval_loss": 2.739257335662842,
|
2326 |
+
"eval_runtime": 365.917,
|
2327 |
+
"eval_samples_per_second": 25.902,
|
2328 |
+
"eval_steps_per_second": 3.238,
|
2329 |
+
"step": 14500
|
2330 |
+
},
|
2331 |
+
{
|
2332 |
+
"epoch": 6.15,
|
2333 |
+
"grad_norm": 1.2545260190963745,
|
2334 |
+
"learning_rate": 4.0043109540636045e-05,
|
2335 |
+
"loss": 1.398,
|
2336 |
+
"step": 14600
|
2337 |
+
},
|
2338 |
+
{
|
2339 |
+
"epoch": 6.15,
|
2340 |
+
"eval_cer": 0.46276054905948144,
|
2341 |
+
"eval_loss": 2.623035430908203,
|
2342 |
+
"eval_runtime": 380.5058,
|
2343 |
+
"eval_samples_per_second": 24.909,
|
2344 |
+
"eval_steps_per_second": 3.114,
|
2345 |
+
"step": 14600
|
2346 |
+
},
|
2347 |
+
{
|
2348 |
+
"epoch": 6.19,
|
2349 |
+
"grad_norm": 4.757970333099365,
|
2350 |
+
"learning_rate": 3.9972438162544173e-05,
|
2351 |
+
"loss": 1.7177,
|
2352 |
+
"step": 14700
|
2353 |
+
},
|
2354 |
+
{
|
2355 |
+
"epoch": 6.19,
|
2356 |
+
"eval_cer": 0.4671600641351531,
|
2357 |
+
"eval_loss": 3.1201841831207275,
|
2358 |
+
"eval_runtime": 364.6269,
|
2359 |
+
"eval_samples_per_second": 25.994,
|
2360 |
+
"eval_steps_per_second": 3.25,
|
2361 |
+
"step": 14700
|
2362 |
}
|
2363 |
],
|
2364 |
"logging_steps": 100,
|
|
|
2366 |
"num_input_tokens_seen": 0,
|
2367 |
"num_train_epochs": 30,
|
2368 |
"save_steps": 100,
|
2369 |
+
"total_flos": 1.6107102446089286e+20,
|
2370 |
"train_batch_size": 8,
|
2371 |
"trial_name": null,
|
2372 |
"trial_params": null
|