ashanhr commited on
Commit
89f3ba8
1 Parent(s): 8a8e855

Training in progress, step 9900, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35ca73b672d977391aef537183758e87218bc9b1f338667515331d5bba2decd1
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5744380703f87f970b57582712646d1195547ea2deb809f1e728c012a141e65
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41e1f8481ef4b2fa8757c7e2080dad3a9464c495f675c86fe8a2b21eb55e1af4
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1dfdf11a2a9cfabe064ab18caf2d667bf041175304c131085bc56d0b7260ec8
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72978f2549bdc36afbaafc26e6e615c11be366a830f1f901381cd993ed7e7933
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9b7d365602a80d5be1805125aa374b02c292b64a53e40e7d4a6fc4b2c32c3b5
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04636db759aef65e061d0a81cea15a49cf6becde8fb0cfb685f69da500fa6b33
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbe42a9a6d1a2aa6e8b3204aaff4e79e9911c21ae2de7ab694db56cf080fb011
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad3c21cfca6b96706a9c82298c8390f24827b562f6561d40e5e026d6ae4092b3
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e024ad4d2c469d51bc12b39f389559da78567c0b22ee2b58dd4cac9872e0c9b3
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.083350873500316,
5
  "eval_steps": 100,
6
- "global_step": 9700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1559,6 +1559,38 @@
1559
  "eval_samples_per_second": 26.17,
1560
  "eval_steps_per_second": 3.272,
1561
  "step": 9700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1562
  }
1563
  ],
1564
  "logging_steps": 100,
@@ -1566,7 +1598,7 @@
1566
  "num_input_tokens_seen": 0,
1567
  "num_train_epochs": 30,
1568
  "save_steps": 100,
1569
- "total_flos": 1.0629240419863537e+20,
1570
  "train_batch_size": 8,
1571
  "trial_name": null,
1572
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.167543675015786,
5
  "eval_steps": 100,
6
+ "global_step": 9900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1559
  "eval_samples_per_second": 26.17,
1560
  "eval_steps_per_second": 3.272,
1561
  "step": 9700
1562
+ },
1563
+ {
1564
+ "epoch": 4.13,
1565
+ "grad_norm": 27.368144989013672,
1566
+ "learning_rate": 4.343462897526502e-05,
1567
+ "loss": 1.6976,
1568
+ "step": 9800
1569
+ },
1570
+ {
1571
+ "epoch": 4.13,
1572
+ "eval_cer": 0.4800359782566188,
1573
+ "eval_loss": 2.5932295322418213,
1574
+ "eval_runtime": 371.4743,
1575
+ "eval_samples_per_second": 25.515,
1576
+ "eval_steps_per_second": 3.19,
1577
+ "step": 9800
1578
+ },
1579
+ {
1580
+ "epoch": 4.17,
1581
+ "grad_norm": 4.671775817871094,
1582
+ "learning_rate": 4.336395759717315e-05,
1583
+ "loss": 1.7712,
1584
+ "step": 9900
1585
+ },
1586
+ {
1587
+ "epoch": 4.17,
1588
+ "eval_cer": 0.4871509718039967,
1589
+ "eval_loss": 2.37919282913208,
1590
+ "eval_runtime": 357.2849,
1591
+ "eval_samples_per_second": 26.528,
1592
+ "eval_steps_per_second": 3.317,
1593
+ "step": 9900
1594
  }
1595
  ],
1596
  "logging_steps": 100,
 
1598
  "num_input_tokens_seen": 0,
1599
  "num_train_epochs": 30,
1600
  "save_steps": 100,
1601
+ "total_flos": 1.0845390616473847e+20,
1602
  "train_batch_size": 8,
1603
  "trial_name": null,
1604
  "trial_params": null