ashanhr commited on
Commit
762136a
1 Parent(s): c0e4562

Training in progress, step 51900, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6879e7a47865a2d9dbdc1dee441becb941a61f7ae9db55a4342ae07a816d700
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e17ce21e8ae5702d27ee13ccfe698b9519f9191b1c9caff5644e90a5bb5c25d9
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab07935c87ce3f230af1aca71f7d354f2ae99539d4922cd9ff951ce52ecc827f
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a17a30230270a34456dc1ba21c1ead1f7aa39a64e33c3faa6efbf4b18d99cb5f
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:691a9b667c584e2397167aa221b255d9341ff38ca1d8535cfe09f55be3c23054
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dde4f5875f81547bdef6920d87fd6a84a4cb8fc9273b6febfeb843f0442e8a66
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6da86477a7e38c974647d1441bec2689395584cdc4189de87cdd2736695ba37f
3
- size 14631
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4a50bec4064f680866f4e14ae49d80472966a4af2720d1f7bf3be3feeaef2f0
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a0508acb91372d3d8167376986d8fc4095af16c3a72cecf2777e3dd527abab5
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5317f1b977bf0c5dd9e58209a5a3433f99d034b727592196ceb70ddd3bde3377
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 21.679646390233636,
5
  "eval_steps": 100,
6
- "global_step": 51500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -8247,6 +8247,70 @@
8247
  "eval_samples_per_second": 24.206,
8248
  "eval_steps_per_second": 3.026,
8249
  "step": 51500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8250
  }
8251
  ],
8252
  "logging_steps": 100,
@@ -8254,7 +8318,7 @@
8254
  "num_input_tokens_seen": 0,
8255
  "num_train_epochs": 30,
8256
  "save_steps": 100,
8257
- "total_flos": 5.643614886293618e+20,
8258
  "train_batch_size": 8,
8259
  "trial_name": null,
8260
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 21.848031993264577,
5
  "eval_steps": 100,
6
+ "global_step": 51900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
8247
  "eval_samples_per_second": 24.206,
8248
  "eval_steps_per_second": 3.026,
8249
  "step": 51500
8250
+ },
8251
+ {
8252
+ "epoch": 21.72,
8253
+ "grad_norm": 5.330277919769287,
8254
+ "learning_rate": 1.3903180212014135e-05,
8255
+ "loss": 0.3529,
8256
+ "step": 51600
8257
+ },
8258
+ {
8259
+ "epoch": 21.72,
8260
+ "eval_cer": 0.34077421688631654,
8261
+ "eval_loss": 2.4329657554626465,
8262
+ "eval_runtime": 422.4059,
8263
+ "eval_samples_per_second": 22.438,
8264
+ "eval_steps_per_second": 2.805,
8265
+ "step": 51600
8266
+ },
8267
+ {
8268
+ "epoch": 21.76,
8269
+ "grad_norm": 2.12868595123291,
8270
+ "learning_rate": 1.3833215547703182e-05,
8271
+ "loss": 0.3733,
8272
+ "step": 51700
8273
+ },
8274
+ {
8275
+ "epoch": 21.76,
8276
+ "eval_cer": 0.34047358335614564,
8277
+ "eval_loss": 2.4534361362457275,
8278
+ "eval_runtime": 382.1988,
8279
+ "eval_samples_per_second": 24.799,
8280
+ "eval_steps_per_second": 3.1,
8281
+ "step": 51700
8282
+ },
8283
+ {
8284
+ "epoch": 21.81,
8285
+ "grad_norm": 1.6962841749191284,
8286
+ "learning_rate": 1.3762544169611308e-05,
8287
+ "loss": 0.3523,
8288
+ "step": 51800
8289
+ },
8290
+ {
8291
+ "epoch": 21.81,
8292
+ "eval_cer": 0.3399554182472332,
8293
+ "eval_loss": 2.549654483795166,
8294
+ "eval_runtime": 415.7902,
8295
+ "eval_samples_per_second": 22.795,
8296
+ "eval_steps_per_second": 2.85,
8297
+ "step": 51800
8298
+ },
8299
+ {
8300
+ "epoch": 21.85,
8301
+ "grad_norm": 1.762255072593689,
8302
+ "learning_rate": 1.3691872791519436e-05,
8303
+ "loss": 0.3523,
8304
+ "step": 51900
8305
+ },
8306
+ {
8307
+ "epoch": 21.85,
8308
+ "eval_cer": 0.33849624574713544,
8309
+ "eval_loss": 2.5730228424072266,
8310
+ "eval_runtime": 391.5055,
8311
+ "eval_samples_per_second": 24.209,
8312
+ "eval_steps_per_second": 3.027,
8313
+ "step": 51900
8314
  }
8315
  ],
8316
  "logging_steps": 100,
 
8318
  "num_input_tokens_seen": 0,
8319
  "num_train_epochs": 30,
8320
  "save_steps": 100,
8321
+ "total_flos": 5.6872995302509294e+20,
8322
  "train_batch_size": 8,
8323
  "trial_name": null,
8324
  "trial_params": null