ashanhr commited on
Commit
93e8835
·
verified ·
1 Parent(s): 4251560

Training in progress, step 53900, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e35b78386d3b49c89f85ffffe06aae621443cf1b932a8092b4db090fbd3ec96
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10b2dcef9f26179151435e50d8e64e0973728607ca53d2c9bca9890649ccae83
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:874666ed33022a022c5899409f513db7f4eb2c26c88a84553937a6e405f29378
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:631fa048ad4716521ddf3c742f0d06469909e5075a79096a62d3bf501816a346
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81361d862eaf160b9e170db03019275ae10369ec53a2433cdd4bf4a722d8c87d
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0e6a675d71cea3e043d398f0c5c226166bd97c3d96321b395a156a2148869be
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d5783c2395d6ceb319348029dab347862318433ddfe999f17859fa9858fbd44
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5210adbdeeb201664228e6f6260e1e39bb443611ef0bc349a9a68a41921abb3
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc7f8ff46b15057ea0b7cff87e35527c6b328af1a3f89ae33ca25b7b381ad498
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f72bd9fcf066733ad01f4170825cb13eef2cf7fb9d9def60eafcaf568f587f6
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 22.52157440538834,
5
  "eval_steps": 100,
6
- "global_step": 53500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -8567,6 +8567,70 @@
8567
  "eval_samples_per_second": 24.135,
8568
  "eval_steps_per_second": 3.018,
8569
  "step": 53500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8570
  }
8571
  ],
8572
  "logging_steps": 100,
@@ -8574,7 +8638,7 @@
8574
  "num_input_tokens_seen": 0,
8575
  "num_train_epochs": 30,
8576
  "save_steps": 100,
8577
- "total_flos": 5.862232878225574e+20,
8578
  "train_batch_size": 8,
8579
  "trial_name": null,
8580
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 22.68996000841928,
5
  "eval_steps": 100,
6
+ "global_step": 53900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
8567
  "eval_samples_per_second": 24.135,
8568
  "eval_steps_per_second": 3.018,
8569
  "step": 53500
8570
+ },
8571
+ {
8572
+ "epoch": 22.56,
8573
+ "grad_norm": 1.8772177696228027,
8574
+ "learning_rate": 1.2490459363957597e-05,
8575
+ "loss": 0.3069,
8576
+ "step": 53600
8577
+ },
8578
+ {
8579
+ "epoch": 22.56,
8580
+ "eval_cer": 0.3383080442688984,
8581
+ "eval_loss": 2.481276035308838,
8582
+ "eval_runtime": 425.0385,
8583
+ "eval_samples_per_second": 22.299,
8584
+ "eval_steps_per_second": 2.788,
8585
+ "step": 53600
8586
+ },
8587
+ {
8588
+ "epoch": 22.61,
8589
+ "grad_norm": 1.4407734870910645,
8590
+ "learning_rate": 1.2419787985865725e-05,
8591
+ "loss": 0.3018,
8592
+ "step": 53700
8593
+ },
8594
+ {
8595
+ "epoch": 22.61,
8596
+ "eval_cer": 0.3348739783348324,
8597
+ "eval_loss": 2.4129292964935303,
8598
+ "eval_runtime": 399.5718,
8599
+ "eval_samples_per_second": 23.72,
8600
+ "eval_steps_per_second": 2.966,
8601
+ "step": 53700
8602
+ },
8603
+ {
8604
+ "epoch": 22.65,
8605
+ "grad_norm": 5.736636638641357,
8606
+ "learning_rate": 1.2349116607773852e-05,
8607
+ "loss": 0.2925,
8608
+ "step": 53800
8609
+ },
8610
+ {
8611
+ "epoch": 22.65,
8612
+ "eval_cer": 0.3356121191975284,
8613
+ "eval_loss": 2.4841222763061523,
8614
+ "eval_runtime": 427.1982,
8615
+ "eval_samples_per_second": 22.186,
8616
+ "eval_steps_per_second": 2.774,
8617
+ "step": 53800
8618
+ },
8619
+ {
8620
+ "epoch": 22.69,
8621
+ "grad_norm": 6.094464302062988,
8622
+ "learning_rate": 1.227844522968198e-05,
8623
+ "loss": 0.2981,
8624
+ "step": 53900
8625
+ },
8626
+ {
8627
+ "epoch": 22.69,
8628
+ "eval_cer": 0.3367071096163623,
8629
+ "eval_loss": 2.4353983402252197,
8630
+ "eval_runtime": 389.5123,
8631
+ "eval_samples_per_second": 24.333,
8632
+ "eval_steps_per_second": 3.042,
8633
+ "step": 53900
8634
  }
8635
  ],
8636
  "logging_steps": 100,
 
8638
  "num_input_tokens_seen": 0,
8639
  "num_train_epochs": 30,
8640
  "save_steps": 100,
8641
+ "total_flos": 5.906211157631806e+20,
8642
  "train_batch_size": 8,
8643
  "trial_name": null,
8644
  "trial_params": null