ashanhr commited on
Commit
edb457d
1 Parent(s): 38eea55

Training in progress, step 49300, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e998f8468cc7c346dc4ab88bf0c2ea3d0619ac332532e3a814afb1ae30e5137
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74a357b98e4ee095ed45472c47587cbff53b11bba8ee52364cf747aec2edad57
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26f75452f5c6dacef05179ac21358b2dfe0213c1b720af143f8e2325f3b80e9b
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1045be07891224de41ffc49353bbd9c71f37abedb35e78847a85b94a93414d1f
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f320ce414b15cc9e8e335856b46b099587922107eefcdd05c2003fd1b73a4a1
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab63e00126966b51e82c6cfc37d4154dc70a54f57af4954047f6d1bfa10482e3
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fbf00c1e2916dc93efcaa39d2802c751ace54d5e0176b36b9c71c02426bcbae3
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3413234ec77271dbb44be6798264935b5bc29bd7a383e5efca859e1b414dc0d
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3526b76b5bb1c00b0ec05511da6eda21f6c0133130e655d44d777a0153ed64a2
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:776174e0cdc8de84863dc2bc871e616ade46ecb971d9ed338d192083a8322ec2
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 20.66933277204799,
5
  "eval_steps": 100,
6
- "global_step": 49100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -7863,6 +7863,38 @@
7863
  "eval_samples_per_second": 24.202,
7864
  "eval_steps_per_second": 3.026,
7865
  "step": 49100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7866
  }
7867
  ],
7868
  "logging_steps": 100,
@@ -7870,7 +7902,7 @@
7870
  "num_input_tokens_seen": 0,
7871
  "num_train_epochs": 30,
7872
  "save_steps": 100,
7873
- "total_flos": 5.379768925487233e+20,
7874
  "train_batch_size": 8,
7875
  "trial_name": null,
7876
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 20.75352557356346,
5
  "eval_steps": 100,
6
+ "global_step": 49300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
7863
  "eval_samples_per_second": 24.202,
7864
  "eval_steps_per_second": 3.026,
7865
  "step": 49100
7866
+ },
7867
+ {
7868
+ "epoch": 20.71,
7869
+ "grad_norm": 5.6570048332214355,
7870
+ "learning_rate": 1.5599293286219083e-05,
7871
+ "loss": 0.4067,
7872
+ "step": 49200
7873
+ },
7874
+ {
7875
+ "epoch": 20.71,
7876
+ "eval_cer": 0.3445235814008056,
7877
+ "eval_loss": 3.190380811691284,
7878
+ "eval_runtime": 410.6709,
7879
+ "eval_samples_per_second": 23.079,
7880
+ "eval_steps_per_second": 2.886,
7881
+ "step": 49200
7882
+ },
7883
+ {
7884
+ "epoch": 20.75,
7885
+ "grad_norm": 6.448336124420166,
7886
+ "learning_rate": 1.5528621908127208e-05,
7887
+ "loss": 0.428,
7888
+ "step": 49300
7889
+ },
7890
+ {
7891
+ "epoch": 20.75,
7892
+ "eval_cer": 0.3451126275859372,
7893
+ "eval_loss": 3.366471529006958,
7894
+ "eval_runtime": 393.2567,
7895
+ "eval_samples_per_second": 24.101,
7896
+ "eval_steps_per_second": 3.013,
7897
+ "step": 49300
7898
  }
7899
  ],
7900
  "logging_steps": 100,
 
7902
  "num_input_tokens_seen": 0,
7903
  "num_train_epochs": 30,
7904
  "save_steps": 100,
7905
+ "total_flos": 5.4016158411070066e+20,
7906
  "train_batch_size": 8,
7907
  "trial_name": null,
7908
  "trial_params": null