ashanhr commited on
Commit
3b172b7
·
verified ·
1 Parent(s): df82e8b

Training in progress, step 31700, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b873a74cd85a7444c0192282714f2d098858bb6f1a3a0918b2e959c631252ae
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1945bb6ddf30b2e4e4d92dc6ee4fc7d055d8084fe3ccfb116c609ff34fb39a9
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b9847471a8fd7c6857174d23c423cd8a42f9fdb4816150a44f8df1411bd9e1c
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:968c0b29c790917699e87f69bf4aa6ac1b07a8edffd5fe57d7f83fe35c794971
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b25a848c08828cafb3a66f0504b00fb96895d1acc816cd999813ceecd40249ca
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5480abc34a2c091a74e506fa88c8f91b9508337e6b6f219e4137a8ec9418527a
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c23ac0726bb090165d240f34770fc857f9071db19378988c8da075c49c106078
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43d752b29aed24741788ac40b8b42ea9e26aed84562cc2ec34fd9be16f1fa0b9
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d8f86a3c26a655dfe015c220ca87fec81bde322514d5aa3c29e06276681b938
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acfed987dc1065bc7e53e8a29458596da758156c2105dbe2e31f48b243130117
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 13.260366238686592,
5
  "eval_steps": 100,
6
- "global_step": 31500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5047,6 +5047,38 @@
5047
  "eval_samples_per_second": 25.115,
5048
  "eval_steps_per_second": 3.14,
5049
  "step": 31500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5050
  }
5051
  ],
5052
  "logging_steps": 100,
@@ -5054,7 +5086,7 @@
5054
  "num_input_tokens_seen": 0,
5055
  "num_train_epochs": 30,
5056
  "save_steps": 100,
5057
- "total_flos": 3.451975086227115e+20,
5058
  "train_batch_size": 8,
5059
  "trial_name": null,
5060
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 13.344559040202062,
5
  "eval_steps": 100,
6
+ "global_step": 31700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5047
  "eval_samples_per_second": 25.115,
5048
  "eval_steps_per_second": 3.14,
5049
  "step": 31500
5050
+ },
5051
+ {
5052
+ "epoch": 13.3,
5053
+ "grad_norm": 2.9201929569244385,
5054
+ "learning_rate": 2.8033215547703178e-05,
5055
+ "loss": 0.9921,
5056
+ "step": 31600
5057
+ },
5058
+ {
5059
+ "epoch": 13.3,
5060
+ "eval_cer": 0.40619256188651204,
5061
+ "eval_loss": 1.6753425598144531,
5062
+ "eval_runtime": 390.1339,
5063
+ "eval_samples_per_second": 24.294,
5064
+ "eval_steps_per_second": 3.037,
5065
+ "step": 31600
5066
+ },
5067
+ {
5068
+ "epoch": 13.34,
5069
+ "grad_norm": 3.985402822494507,
5070
+ "learning_rate": 2.796254416961131e-05,
5071
+ "loss": 0.9951,
5072
+ "step": 31700
5073
+ },
5074
+ {
5075
+ "epoch": 13.34,
5076
+ "eval_cer": 0.4047969379375073,
5077
+ "eval_loss": 1.8533233404159546,
5078
+ "eval_runtime": 373.1578,
5079
+ "eval_samples_per_second": 25.399,
5080
+ "eval_steps_per_second": 3.176,
5081
+ "step": 31700
5082
  }
5083
  ],
5084
  "logging_steps": 100,
 
5086
  "num_input_tokens_seen": 0,
5087
  "num_train_epochs": 30,
5088
  "save_steps": 100,
5089
+ "total_flos": 3.473760721513529e+20,
5090
  "train_batch_size": 8,
5091
  "trial_name": null,
5092
  "trial_params": null