ashanhr commited on
Commit
924c9d1
1 Parent(s): c4c55c9

Training in progress, step 53500, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00ef485d074352aca5556b0497b480b64f5a597428f78f4530b27b6743398719
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e35b78386d3b49c89f85ffffe06aae621443cf1b932a8092b4db090fbd3ec96
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43478402ec9b98f774cd2d2f4bfd0486130906c01dad84a75d6574a44e13c030
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:874666ed33022a022c5899409f513db7f4eb2c26c88a84553937a6e405f29378
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b1c06a4215a8a7ee627724ddb5dc01d00b82023b0098d64dc5a28a3f3f9a399
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81361d862eaf160b9e170db03019275ae10369ec53a2433cdd4bf4a722d8c87d
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b75e5a022877b84c4104618a8d23ec7b90fdc7aadaa0cd12f67cfded1c78a671
3
- size 14631
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d5783c2395d6ceb319348029dab347862318433ddfe999f17859fa9858fbd44
3
+ size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23380186d0e4c7eec5667700cc9af150fab32a4e15bb4b1323f659a444bfb4c0
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc7f8ff46b15057ea0b7cff87e35527c6b328af1a3f89ae33ca25b7b381ad498
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 22.43738160387287,
5
  "eval_steps": 100,
6
- "global_step": 53300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -8535,6 +8535,38 @@
8535
  "eval_samples_per_second": 24.471,
8536
  "eval_steps_per_second": 3.06,
8537
  "step": 53300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8538
  }
8539
  ],
8540
  "logging_steps": 100,
@@ -8542,7 +8574,7 @@
8542
  "num_input_tokens_seen": 0,
8543
  "num_train_epochs": 30,
8544
  "save_steps": 100,
8545
- "total_flos": 5.840653170107694e+20,
8546
  "train_batch_size": 8,
8547
  "trial_name": null,
8548
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 22.52157440538834,
5
  "eval_steps": 100,
6
+ "global_step": 53500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
8535
  "eval_samples_per_second": 24.471,
8536
  "eval_steps_per_second": 3.06,
8537
  "step": 53300
8538
+ },
8539
+ {
8540
+ "epoch": 22.48,
8541
+ "grad_norm": 2.225783586502075,
8542
+ "learning_rate": 1.2631802120141343e-05,
8543
+ "loss": 0.2854,
8544
+ "step": 53400
8545
+ },
8546
+ {
8547
+ "epoch": 22.48,
8548
+ "eval_cer": 0.33407228892104335,
8549
+ "eval_loss": 2.5062761306762695,
8550
+ "eval_runtime": 421.3473,
8551
+ "eval_samples_per_second": 22.495,
8552
+ "eval_steps_per_second": 2.812,
8553
+ "step": 53400
8554
+ },
8555
+ {
8556
+ "epoch": 22.52,
8557
+ "grad_norm": 9.648059844970703,
8558
+ "learning_rate": 1.256113074204947e-05,
8559
+ "loss": 0.2985,
8560
+ "step": 53500
8561
+ },
8562
+ {
8563
+ "epoch": 22.52,
8564
+ "eval_cer": 0.3380709592898205,
8565
+ "eval_loss": 2.483365774154663,
8566
+ "eval_runtime": 392.706,
8567
+ "eval_samples_per_second": 24.135,
8568
+ "eval_steps_per_second": 3.018,
8569
+ "step": 53500
8570
  }
8571
  ],
8572
  "logging_steps": 100,
 
8574
  "num_input_tokens_seen": 0,
8575
  "num_train_epochs": 30,
8576
  "save_steps": 100,
8577
+ "total_flos": 5.862232878225574e+20,
8578
  "train_batch_size": 8,
8579
  "trial_name": null,
8580
  "trial_params": null