ashanhr commited on
Commit
d008ce4
·
verified ·
1 Parent(s): 5f07e85

Training in progress, step 49500, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74a357b98e4ee095ed45472c47587cbff53b11bba8ee52364cf747aec2edad57
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f4e034f2e43c0fca95585a1c74b8ba288002aeef4e94f7f869a1f6d24f8b7d8
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1045be07891224de41ffc49353bbd9c71f37abedb35e78847a85b94a93414d1f
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a4f8cab60168f6324315199235ff245062dddd3799c0d85af85527ecee3fe8a
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab63e00126966b51e82c6cfc37d4154dc70a54f57af4954047f6d1bfa10482e3
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94ac800ef4ed1b5dbe2e129011c90053617c45f04b68159ae45c8bd542d485ca
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3413234ec77271dbb44be6798264935b5bc29bd7a383e5efca859e1b414dc0d
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a109c2513b60d30766caef58a6a250c617ed41bd13414a63f31b8ac63646d1d0
3
+ size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:776174e0cdc8de84863dc2bc871e616ade46ecb971d9ed338d192083a8322ec2
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f46b18b91d6715a6101674870a673d34999d2c470954bab9b6ef70b9b1b09992
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 20.75352557356346,
5
  "eval_steps": 100,
6
- "global_step": 49300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -7895,6 +7895,38 @@
7895
  "eval_samples_per_second": 24.101,
7896
  "eval_steps_per_second": 3.013,
7897
  "step": 49300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7898
  }
7899
  ],
7900
  "logging_steps": 100,
@@ -7902,7 +7934,7 @@
7902
  "num_input_tokens_seen": 0,
7903
  "num_train_epochs": 30,
7904
  "save_steps": 100,
7905
- "total_flos": 5.4016158411070066e+20,
7906
  "train_batch_size": 8,
7907
  "trial_name": null,
7908
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 20.837718375078932,
5
  "eval_steps": 100,
6
+ "global_step": 49500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
7895
  "eval_samples_per_second": 24.101,
7896
  "eval_steps_per_second": 3.013,
7897
  "step": 49300
7898
+ },
7899
+ {
7900
+ "epoch": 20.8,
7901
+ "grad_norm": 3.681830406188965,
7902
+ "learning_rate": 1.5457950530035336e-05,
7903
+ "loss": 0.4141,
7904
+ "step": 49400
7905
+ },
7906
+ {
7907
+ "epoch": 20.8,
7908
+ "eval_cer": 0.3429764185991944,
7909
+ "eval_loss": 3.280654191970825,
7910
+ "eval_runtime": 408.0304,
7911
+ "eval_samples_per_second": 23.229,
7912
+ "eval_steps_per_second": 2.904,
7913
+ "step": 49400
7914
+ },
7915
+ {
7916
+ "epoch": 20.84,
7917
+ "grad_norm": 3.107179641723633,
7918
+ "learning_rate": 1.5387279151943464e-05,
7919
+ "loss": 0.4127,
7920
+ "step": 49500
7921
+ },
7922
+ {
7923
+ "epoch": 20.84,
7924
+ "eval_cer": 0.34173233350279614,
7925
+ "eval_loss": 2.8979499340057373,
7926
+ "eval_runtime": 387.9855,
7927
+ "eval_samples_per_second": 24.429,
7928
+ "eval_steps_per_second": 3.054,
7929
+ "step": 49500
7930
  }
7931
  ],
7932
  "logging_steps": 100,
 
7934
  "num_input_tokens_seen": 0,
7935
  "num_train_epochs": 30,
7936
  "save_steps": 100,
7937
+ "total_flos": 5.423486958333301e+20,
7938
  "train_batch_size": 8,
7939
  "trial_name": null,
7940
  "trial_params": null