ashanhr commited on
Commit
c2add65
·
verified ·
1 Parent(s): c615d8e

Training in progress, step 8300, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:408c6cd9a698b16001a43197f1c14eb97815dc79cf0faf065cdd9fae5f5d7a0c
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cb9185b5a4178ca9b95ea9a03e81f42aab94937d5536d0aee20a7d50113c74e
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29518292804cc1bbadfb0c2d47c708a9ad2a4899919ddbb39ca62659b229c095
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f20f2e893a5805f94e67a072ca607e87d6bd1f6bf961c5d10cf2c83fc0eca10
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c599e77c37f648f3f925b89128d4d5cf7e01fb20ae88a5649c4f9e9edbed6487
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:483b010e40619bd56631da53658bb649fe7d4dd6c1a8c3d654abffab621a3b4b
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2427345fe81496661e617176cf30f03a179e764f54042a1de82a0b0e5e040e18
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdb949cb9ef5d042fa95175eb1cd636c4986ae56e852d369f16efd31f056bea0
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4541330612b9d0a7a65c65620d7604d28710aaed0289ff9c20765e43c3bdf6a7
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f59710310815d68188af9ff723493a6950c9e2340e3b280d58445c624870566
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.4098084613765525,
5
  "eval_steps": 100,
6
- "global_step": 8100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1303,6 +1303,38 @@
1303
  "eval_samples_per_second": 26.451,
1304
  "eval_steps_per_second": 3.307,
1305
  "step": 8100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1306
  }
1307
  ],
1308
  "logging_steps": 100,
@@ -1310,7 +1342,7 @@
1310
  "num_input_tokens_seen": 0,
1311
  "num_train_epochs": 30,
1312
  "save_steps": 100,
1313
- "total_flos": 8.881279509355615e+19,
1314
  "train_batch_size": 8,
1315
  "trial_name": null,
1316
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.4940012628920227,
5
  "eval_steps": 100,
6
+ "global_step": 8300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1303
  "eval_samples_per_second": 26.451,
1304
  "eval_steps_per_second": 3.307,
1305
  "step": 8100
1306
+ },
1307
+ {
1308
+ "epoch": 3.45,
1309
+ "grad_norm": 3.6079511642456055,
1310
+ "learning_rate": 4.456466431095407e-05,
1311
+ "loss": 1.7713,
1312
+ "step": 8200
1313
+ },
1314
+ {
1315
+ "epoch": 3.45,
1316
+ "eval_cer": 0.495703140276094,
1317
+ "eval_loss": 2.7158584594726562,
1318
+ "eval_runtime": 373.6599,
1319
+ "eval_samples_per_second": 25.365,
1320
+ "eval_steps_per_second": 3.171,
1321
+ "step": 8200
1322
+ },
1323
+ {
1324
+ "epoch": 3.49,
1325
+ "grad_norm": 3.969160318374634,
1326
+ "learning_rate": 4.4493992932862195e-05,
1327
+ "loss": 3.3072,
1328
+ "step": 8300
1329
+ },
1330
+ {
1331
+ "epoch": 3.49,
1332
+ "eval_cer": 0.49535606742012434,
1333
+ "eval_loss": 1.9461767673492432,
1334
+ "eval_runtime": 357.0354,
1335
+ "eval_samples_per_second": 26.546,
1336
+ "eval_steps_per_second": 3.319,
1337
+ "step": 8300
1338
  }
1339
  ],
1340
  "logging_steps": 100,
 
1342
  "num_input_tokens_seen": 0,
1343
  "num_train_epochs": 30,
1344
  "save_steps": 100,
1345
+ "total_flos": 9.10041827788282e+19,
1346
  "train_batch_size": 8,
1347
  "trial_name": null,
1348
  "trial_params": null