ashanhr commited on
Commit
672d3d9
1 Parent(s): 57b35b5

Training in progress, step 12700, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e66cbff19b835e3a4efcd7383600618ff3c16476f0e6cd4eea579eddd1014bc
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c6e9580fb798101735047d737cee85a6bb1061fad515187b2725aaa63bfedfc
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc5198d3d1c6fadab280de2b6cc23fa58362393be6da3bf2b3a8b16408384ea7
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e97e01dc64ab5637bb404489fce4bc374a9d40ac15f27ef1b88fa4b9f545c28
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55d601ccae8eead4998f520c0fb37f262fdc4af9674f4598e11e05776a91d57d
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23b5eb8171ce3b32f1473da5c445fd724fddce4624bb8f3de388a3614ee04629
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0247765af733f816c82cb6abb0bd3fc92237d543333cca727d37eb273ded1c69
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d6c38bb1634ce2d5111d8f0da0ee0f0eeb90e7348bc821241c2773d71d06af6
3
+ size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f487427df0aa8bfe421a7abbb408985bddad611dfcbca3d7c23ae5d19832455
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5e2d9a4da1dca4e718c671213c3210706697de61ee8df6908344ec05bce806c
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.262050094716901,
5
  "eval_steps": 100,
6
- "global_step": 12500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2007,6 +2007,38 @@
2007
  "eval_samples_per_second": 24.817,
2008
  "eval_steps_per_second": 3.103,
2009
  "step": 12500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2010
  }
2011
  ],
2012
  "logging_steps": 100,
@@ -2014,7 +2046,7 @@
2014
  "num_input_tokens_seen": 0,
2015
  "num_train_epochs": 30,
2016
  "save_steps": 100,
2017
- "total_flos": 1.3698617507925189e+20,
2018
  "train_batch_size": 8,
2019
  "trial_name": null,
2020
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.346242896232372,
5
  "eval_steps": 100,
6
+ "global_step": 12700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2007
  "eval_samples_per_second": 24.817,
2008
  "eval_steps_per_second": 3.103,
2009
  "step": 12500
2010
+ },
2011
+ {
2012
+ "epoch": 5.3,
2013
+ "grad_norm": 9.076600074768066,
2014
+ "learning_rate": 4.1455830388692577e-05,
2015
+ "loss": 2.0271,
2016
+ "step": 12600
2017
+ },
2018
+ {
2019
+ "epoch": 5.3,
2020
+ "eval_cer": 0.48266591060185365,
2021
+ "eval_loss": 2.858898639678955,
2022
+ "eval_runtime": 405.4609,
2023
+ "eval_samples_per_second": 23.376,
2024
+ "eval_steps_per_second": 2.923,
2025
+ "step": 12600
2026
+ },
2027
+ {
2028
+ "epoch": 5.35,
2029
+ "grad_norm": 1.6710706949234009,
2030
+ "learning_rate": 4.138515901060071e-05,
2031
+ "loss": 1.7331,
2032
+ "step": 12700
2033
+ },
2034
+ {
2035
+ "epoch": 5.35,
2036
+ "eval_cer": 0.501048551093035,
2037
+ "eval_loss": 2.9208521842956543,
2038
+ "eval_runtime": 361.6906,
2039
+ "eval_samples_per_second": 26.205,
2040
+ "eval_steps_per_second": 3.276,
2041
+ "step": 12700
2042
  }
2043
  ],
2044
  "logging_steps": 100,
 
2046
  "num_input_tokens_seen": 0,
2047
  "num_train_epochs": 30,
2048
  "save_steps": 100,
2049
+ "total_flos": 1.3921302086472671e+20,
2050
  "train_batch_size": 8,
2051
  "trial_name": null,
2052
  "trial_params": null