ashanhr commited on
Commit
5bd50fa
·
verified ·
1 Parent(s): 118d272

Training in progress, step 8500, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7cb9185b5a4178ca9b95ea9a03e81f42aab94937d5536d0aee20a7d50113c74e
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b51ed093e6c15bc18f94991971206e757e14980a9c5a628c37f62695938205d
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f20f2e893a5805f94e67a072ca607e87d6bd1f6bf961c5d10cf2c83fc0eca10
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d240111503baf208b3258d8cba674443ef0ea2639039331de2ec6efe7692b794
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:483b010e40619bd56631da53658bb649fe7d4dd6c1a8c3d654abffab621a3b4b
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66b55bdbc49a9bb2c273056a2b2153ade12b910b7c8f359f9751f5d74fa18c9c
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cdb949cb9ef5d042fa95175eb1cd636c4986ae56e852d369f16efd31f056bea0
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19c12ed0af5993f0a876a6b88522655ebb04b9cbe7b4cb312f3eaf595675e1bf
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f59710310815d68188af9ff723493a6950c9e2340e3b280d58445c624870566
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b67e153873510a6ad6dd38628964444102a91c7ee710a17268ca1406a3ff007
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.4940012628920227,
5
  "eval_steps": 100,
6
- "global_step": 8300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1335,6 +1335,38 @@
1335
  "eval_samples_per_second": 26.546,
1336
  "eval_steps_per_second": 3.319,
1337
  "step": 8300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1338
  }
1339
  ],
1340
  "logging_steps": 100,
@@ -1342,7 +1374,7 @@
1342
  "num_input_tokens_seen": 0,
1343
  "num_train_epochs": 30,
1344
  "save_steps": 100,
1345
- "total_flos": 9.10041827788282e+19,
1346
  "train_batch_size": 8,
1347
  "trial_name": null,
1348
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.5781940644074934,
5
  "eval_steps": 100,
6
+ "global_step": 8500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1335
  "eval_samples_per_second": 26.546,
1336
  "eval_steps_per_second": 3.319,
1337
  "step": 8300
1338
+ },
1339
+ {
1340
+ "epoch": 3.54,
1341
+ "grad_norm": 1.3938627243041992,
1342
+ "learning_rate": 4.442332155477032e-05,
1343
+ "loss": 2.1757,
1344
+ "step": 8400
1345
+ },
1346
+ {
1347
+ "epoch": 3.54,
1348
+ "eval_cer": 0.5164395213327597,
1349
+ "eval_loss": 1.6015843152999878,
1350
+ "eval_runtime": 375.0822,
1351
+ "eval_samples_per_second": 25.269,
1352
+ "eval_steps_per_second": 3.159,
1353
+ "step": 8400
1354
+ },
1355
+ {
1356
+ "epoch": 3.58,
1357
+ "grad_norm": 7.331507682800293,
1358
+ "learning_rate": 4.4352650176678445e-05,
1359
+ "loss": 2.3463,
1360
+ "step": 8500
1361
+ },
1362
+ {
1363
+ "epoch": 3.58,
1364
+ "eval_cer": 0.4924132806695084,
1365
+ "eval_loss": 3.185905933380127,
1366
+ "eval_runtime": 357.5938,
1367
+ "eval_samples_per_second": 26.505,
1368
+ "eval_steps_per_second": 3.314,
1369
+ "step": 8500
1370
  }
1371
  ],
1372
  "logging_steps": 100,
 
1374
  "num_input_tokens_seen": 0,
1375
  "num_train_epochs": 30,
1376
  "save_steps": 100,
1377
+ "total_flos": 9.318809128668452e+19,
1378
  "train_batch_size": 8,
1379
  "trial_name": null,
1380
  "trial_params": null