ashanhr commited on
Commit
6c0988c
1 Parent(s): 38cfba6

Training in progress, step 34900, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9867f224290cad13cee2149e3f5b88b51d71f4cd5c80186c5e74ad5b9c9aa605
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b206f2369d31cae35a08aab2df578e781b6943ef77ce53d8ba5787f71a665d5
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:353be69f812937f23a9ca7a7a09cb32160785e43f6ae776dd247459ea7615c12
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ae2198c7d773d1f56253a07cd8ae7a4c209a916ae2f8d8eda3a288ec2b9d3a1
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82dcac1bc761474f881c627c4621b78ab1a57da749763717e9a164523afda69f
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9be35650088eb24a02be1e6a194809f0cc95f8e622d969c1b31c9e1e0a7e851
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33684118cae1307b6ed1a910d74ee2b4d35f0d7a91b6c16c2caf9ca77105918c
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aab3843d78d7e496fe9b5af38e4b4d8927ff90626a6cfdc3d3f661a4b13faa85
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce84586ac34d9d46ffac97deffc1538ae04a68c67811b91ea3d3bec0fe9c9a7a
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a9054242e692acef1ddb101099ef9d20fe1d70541a460911930073f47b77157
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 14.60745106293412,
5
  "eval_steps": 100,
6
- "global_step": 34700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5559,6 +5559,38 @@
5559
  "eval_samples_per_second": 25.925,
5560
  "eval_steps_per_second": 3.241,
5561
  "step": 34700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5562
  }
5563
  ],
5564
  "logging_steps": 100,
@@ -5566,7 +5598,7 @@
5566
  "num_input_tokens_seen": 0,
5567
  "num_train_epochs": 30,
5568
  "save_steps": 100,
5569
- "total_flos": 3.8025780817217e+20,
5570
  "train_batch_size": 8,
5571
  "trial_name": null,
5572
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 14.69164386444959,
5
  "eval_steps": 100,
6
+ "global_step": 34900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5559
  "eval_samples_per_second": 25.925,
5560
  "eval_steps_per_second": 3.241,
5561
  "step": 34700
5562
+ },
5563
+ {
5564
+ "epoch": 14.65,
5565
+ "grad_norm": 3.3934333324432373,
5566
+ "learning_rate": 2.5771731448763254e-05,
5567
+ "loss": 0.9375,
5568
+ "step": 34800
5569
+ },
5570
+ {
5571
+ "epoch": 14.65,
5572
+ "eval_cer": 0.3903005357631692,
5573
+ "eval_loss": 1.944000005722046,
5574
+ "eval_runtime": 404.5333,
5575
+ "eval_samples_per_second": 23.429,
5576
+ "eval_steps_per_second": 2.929,
5577
+ "step": 34800
5578
+ },
5579
+ {
5580
+ "epoch": 14.69,
5581
+ "grad_norm": 2.0570108890533447,
5582
+ "learning_rate": 2.5701060070671378e-05,
5583
+ "loss": 0.9158,
5584
+ "step": 34900
5585
+ },
5586
+ {
5587
+ "epoch": 14.69,
5588
+ "eval_cer": 0.3864998435727973,
5589
+ "eval_loss": 1.8147820234298706,
5590
+ "eval_runtime": 366.6552,
5591
+ "eval_samples_per_second": 25.85,
5592
+ "eval_steps_per_second": 3.232,
5593
+ "step": 34900
5594
  }
5595
  ],
5596
  "logging_steps": 100,
 
5598
  "num_input_tokens_seen": 0,
5599
  "num_train_epochs": 30,
5600
  "save_steps": 100,
5601
+ "total_flos": 3.824337597929555e+20,
5602
  "train_batch_size": 8,
5603
  "trial_name": null,
5604
  "trial_params": null