ashanhr commited on
Commit
31b928f
1 Parent(s): 570bf67

Training in progress, step 27300, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8e542af8d76367f68d7013ad25645c5c0bb83d400a9c28ba347486b7e1af191
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e15296cc7fc7c22faf537cf2d8c8189212cd4050aec0cc26ecd5ca6b2b31430c
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76ef8497c6e127aeccc2e106755bbf32a546fa8428fa8775d2e9e8d8e5652691
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efb178d8b8052b34bfc9ce5171bddbd008d7445958fd6793e6c78c28460f94e3
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e7d171ad28335cab6bd4c1150438a645740b385e44943066cf6cf9cdad1d0a8
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:616fbff97ed13f412ac2118b023cc68e97d2cdaa667165e80291c9b722839e9c
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b0fd6f9494bd763f598141a559ed76c4d8dfcfafe1cb83a86b361df0b818dea
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e1fa851642b5df9872122f6cd49ffbd3855aa0c135f0547c13649b70bce71d4
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:945cfe3d07d63b571472084df19a5a6f95aade9bf666b3cf8938678e9d425fd2
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1556b06bd9834de95519bce0dfbc04d14a4931ce96de22038d246f85d978575d
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 11.408124605346243,
5
  "eval_steps": 100,
6
- "global_step": 27100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4343,6 +4343,38 @@
4343
  "eval_samples_per_second": 25.753,
4344
  "eval_steps_per_second": 3.22,
4345
  "step": 27100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4346
  }
4347
  ],
4348
  "logging_steps": 100,
@@ -4350,7 +4382,7 @@
4350
  "num_input_tokens_seen": 0,
4351
  "num_train_epochs": 30,
4352
  "save_steps": 100,
4353
- "total_flos": 2.96989846970687e+20,
4354
  "train_batch_size": 8,
4355
  "trial_name": null,
4356
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 11.492317406861714,
5
  "eval_steps": 100,
6
+ "global_step": 27300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4343
  "eval_samples_per_second": 25.753,
4344
  "eval_steps_per_second": 3.22,
4345
  "step": 27100
4346
+ },
4347
+ {
4348
+ "epoch": 11.45,
4349
+ "grad_norm": 1.9599921703338623,
4350
+ "learning_rate": 3.1141342756183745e-05,
4351
+ "loss": 1.1358,
4352
+ "step": 27200
4353
+ },
4354
+ {
4355
+ "epoch": 11.45,
4356
+ "eval_cer": 0.4173404442532556,
4357
+ "eval_loss": 1.598440170288086,
4358
+ "eval_runtime": 388.8994,
4359
+ "eval_samples_per_second": 24.371,
4360
+ "eval_steps_per_second": 3.047,
4361
+ "step": 27200
4362
+ },
4363
+ {
4364
+ "epoch": 11.49,
4365
+ "grad_norm": 1.7773711681365967,
4366
+ "learning_rate": 3.107067137809187e-05,
4367
+ "loss": 1.1567,
4368
+ "step": 27300
4369
+ },
4370
+ {
4371
+ "epoch": 11.49,
4372
+ "eval_cer": 0.4288060693754644,
4373
+ "eval_loss": 3.0100185871124268,
4374
+ "eval_runtime": 392.5508,
4375
+ "eval_samples_per_second": 24.145,
4376
+ "eval_steps_per_second": 3.019,
4377
+ "step": 27300
4378
  }
4379
  ],
4380
  "logging_steps": 100,
 
4382
  "num_input_tokens_seen": 0,
4383
  "num_train_epochs": 30,
4384
  "save_steps": 100,
4385
+ "total_flos": 2.992071149315134e+20,
4386
  "train_batch_size": 8,
4387
  "trial_name": null,
4388
  "trial_params": null