ashanhr commited on
Commit
64ba9c6
·
verified ·
1 Parent(s): 89f3ba8

Training in progress, step 10100, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5744380703f87f970b57582712646d1195547ea2deb809f1e728c012a141e65
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0152ae7665e14b7649e1603869eac2627c4b46968a4140b59e840be45c462043
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1dfdf11a2a9cfabe064ab18caf2d667bf041175304c131085bc56d0b7260ec8
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2ca09b9d6895bf99912a3b620ebf15f2699cd4d85234bae948db7af35d3805b
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9b7d365602a80d5be1805125aa374b02c292b64a53e40e7d4a6fc4b2c32c3b5
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64f7c21bc62e1590ba7d33964a61cd28c89e814d09700786d96a59dfd2d2f37a
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fbe42a9a6d1a2aa6e8b3204aaff4e79e9911c21ae2de7ab694db56cf080fb011
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ac36883a60e8fdddb046d3d099aec879515eeb495fb66554d35da33f561e776
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e024ad4d2c469d51bc12b39f389559da78567c0b22ee2b58dd4cac9872e0c9b3
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c00379d1a32b555abafe33a1610f228542dd2b56a05a75c6d377908358a81a67
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.167543675015786,
5
  "eval_steps": 100,
6
- "global_step": 9900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1591,6 +1591,38 @@
1591
  "eval_samples_per_second": 26.528,
1592
  "eval_steps_per_second": 3.317,
1593
  "step": 9900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1594
  }
1595
  ],
1596
  "logging_steps": 100,
@@ -1598,7 +1630,7 @@
1598
  "num_input_tokens_seen": 0,
1599
  "num_train_epochs": 30,
1600
  "save_steps": 100,
1601
- "total_flos": 1.0845390616473847e+20,
1602
  "train_batch_size": 8,
1603
  "trial_name": null,
1604
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.251736476531256,
5
  "eval_steps": 100,
6
+ "global_step": 10100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1591
  "eval_samples_per_second": 26.528,
1592
  "eval_steps_per_second": 3.317,
1593
  "step": 9900
1594
+ },
1595
+ {
1596
+ "epoch": 4.21,
1597
+ "grad_norm": 2.77481746673584,
1598
+ "learning_rate": 4.329328621908127e-05,
1599
+ "loss": 1.45,
1600
+ "step": 10000
1601
+ },
1602
+ {
1603
+ "epoch": 4.21,
1604
+ "eval_cer": 0.49421952602557584,
1605
+ "eval_loss": 1.6775341033935547,
1606
+ "eval_runtime": 376.5454,
1607
+ "eval_samples_per_second": 25.171,
1608
+ "eval_steps_per_second": 3.147,
1609
+ "step": 10000
1610
+ },
1611
+ {
1612
+ "epoch": 4.25,
1613
+ "grad_norm": 7.3151140213012695,
1614
+ "learning_rate": 4.32226148409894e-05,
1615
+ "loss": 1.5163,
1616
+ "step": 10100
1617
+ },
1618
+ {
1619
+ "epoch": 4.25,
1620
+ "eval_cer": 0.49458370810683977,
1621
+ "eval_loss": 2.463066577911377,
1622
+ "eval_runtime": 349.9183,
1623
+ "eval_samples_per_second": 27.086,
1624
+ "eval_steps_per_second": 3.387,
1625
+ "step": 10100
1626
  }
1627
  ],
1628
  "logging_steps": 100,
 
1630
  "num_input_tokens_seen": 0,
1631
  "num_train_epochs": 30,
1632
  "save_steps": 100,
1633
+ "total_flos": 1.1062718286020878e+20,
1634
  "train_batch_size": 8,
1635
  "trial_name": null,
1636
  "trial_params": null