ashanhr commited on
Commit
1f4e33b
1 Parent(s): ba3ac19

Training in progress, step 29100, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d251d0c72ed5a7685017bde159d15ce6766dc5be3bb7e7e581c05a02a62c04c5
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fd39bafad8df5816ff6b08c12be7dca52286baccaa654ea2978e0d3f02bf4e5
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b752e7177166bebeb1530b608aac17f572602cfe388058cc6e1fe0d959654324
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67fdf718b07a591ed57ff7ce95664e7e067c38f3eb310eba7c991b75dbb1fafa
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be5846d525ecdc166b3d6f60d5b1bd254c1dd87056dcca60cac2b041ad05f37b
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:737d384bbb23eea51775050fd71e06d427c4606907f23670a2074c1709a9001f
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:855244fb6d2f7e56e44886d31c5782f973bfa784c99f0334fee935eb6b3be025
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89a57ee799b5913dee6e5865779e9212480500a11279764d115cea4398001ef0
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8e7befc489c14232ad92a4c0e54914d7e1f33b1fa2d6656378fe60d0f3de637
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3333223a097f72436e52b6f08ce4e75389646a8070023e25dcc110bc5f88e5d8
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 12.165859818985476,
5
  "eval_steps": 100,
6
- "global_step": 28900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4631,6 +4631,38 @@
4631
  "eval_samples_per_second": 25.45,
4632
  "eval_steps_per_second": 3.182,
4633
  "step": 28900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4634
  }
4635
  ],
4636
  "logging_steps": 100,
@@ -4638,7 +4670,7 @@
4638
  "num_input_tokens_seen": 0,
4639
  "num_train_epochs": 30,
4640
  "save_steps": 100,
4641
- "total_flos": 3.16704104262254e+20,
4642
  "train_batch_size": 8,
4643
  "trial_name": null,
4644
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 12.250052620500947,
5
  "eval_steps": 100,
6
+ "global_step": 29100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4631
  "eval_samples_per_second": 25.45,
4632
  "eval_steps_per_second": 3.182,
4633
  "step": 28900
4634
+ },
4635
+ {
4636
+ "epoch": 12.21,
4637
+ "grad_norm": 15.632731437683105,
4638
+ "learning_rate": 2.9869964664310958e-05,
4639
+ "loss": 1.0708,
4640
+ "step": 29000
4641
+ },
4642
+ {
4643
+ "epoch": 12.21,
4644
+ "eval_cer": 0.41651920143913024,
4645
+ "eval_loss": 2.2883496284484863,
4646
+ "eval_runtime": 388.5509,
4647
+ "eval_samples_per_second": 24.393,
4648
+ "eval_steps_per_second": 3.05,
4649
+ "step": 29000
4650
+ },
4651
+ {
4652
+ "epoch": 12.25,
4653
+ "grad_norm": 6.1355180740356445,
4654
+ "learning_rate": 2.9799293286219082e-05,
4655
+ "loss": 1.0688,
4656
+ "step": 29100
4657
+ },
4658
+ {
4659
+ "epoch": 12.25,
4660
+ "eval_cer": 0.4098588244495718,
4661
+ "eval_loss": 2.406083822250366,
4662
+ "eval_runtime": 374.5529,
4663
+ "eval_samples_per_second": 25.305,
4664
+ "eval_steps_per_second": 3.164,
4665
+ "step": 29100
4666
  }
4667
  ],
4668
  "logging_steps": 100,
 
4670
  "num_input_tokens_seen": 0,
4671
  "num_train_epochs": 30,
4672
  "save_steps": 100,
4673
+ "total_flos": 3.1890627031696395e+20,
4674
  "train_batch_size": 8,
4675
  "trial_name": null,
4676
  "trial_params": null