ashanhr commited on
Commit
6c3048c
·
verified ·
1 Parent(s): 120a79d

Training in progress, step 55500, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f38a0345160eafc7a2bff16ecb3b832e4bed866ef2900dd0baf0b2f9d5d0eb0
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3be30c038fe83e2c45066c8865edcd4f97638d7c9497d22507519438f766b9e
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c019cff8f9be930c3fc0d37425de55b6c57a313e24c2bb3dc34b7f2486efdfbe
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc37f0c081dea232ed47687524d26d35478631d3af4618df338fc236e166a184
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7e4a457495ababe0eb08547a34b3470fd36b5930b98f33eb20ef364970e0c83
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5479170340108cb57e7920fc9709a1e6e98ddda8596cfda43bc01b7188a5ccc2
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f106411af63d8226ffb8f49ed683b71c1bcf027a8df03274802b22a6f8fda8f
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70e547d0244ac6c17f72a2e2c794d7af278410f2b12b34a7dbb487ec9d9fcdcb
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5aa0f94c39f4406774a86e68d3997d19d329feb34dcd618fc1dc95189d45b9f
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a30e334ce630adfe0f015710ad2e5f2148b3a474ea9fa476144c76c38fe45245
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 23.279309619027572,
5
  "eval_steps": 100,
6
- "global_step": 55300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -8855,6 +8855,38 @@
8855
  "eval_samples_per_second": 24.017,
8856
  "eval_steps_per_second": 3.003,
8857
  "step": 55300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8858
  }
8859
  ],
8860
  "logging_steps": 100,
@@ -8862,7 +8894,7 @@
8862
  "num_input_tokens_seen": 0,
8863
  "num_train_epochs": 30,
8864
  "save_steps": 100,
8865
- "total_flos": 6.060174194718131e+20,
8866
  "train_batch_size": 8,
8867
  "trial_name": null,
8868
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 23.363502420543043,
5
  "eval_steps": 100,
6
+ "global_step": 55500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
8855
  "eval_samples_per_second": 24.017,
8856
  "eval_steps_per_second": 3.003,
8857
  "step": 55300
8858
+ },
8859
+ {
8860
+ "epoch": 23.32,
8861
+ "grad_norm": 2.5659921169281006,
8862
+ "learning_rate": 1.1218374558303888e-05,
8863
+ "loss": 0.2612,
8864
+ "step": 55400
8865
+ },
8866
+ {
8867
+ "epoch": 23.32,
8868
+ "eval_cer": 0.3338303155918814,
8869
+ "eval_loss": 2.4699885845184326,
8870
+ "eval_runtime": 423.6808,
8871
+ "eval_samples_per_second": 22.371,
8872
+ "eval_steps_per_second": 2.797,
8873
+ "step": 55400
8874
+ },
8875
+ {
8876
+ "epoch": 23.36,
8877
+ "grad_norm": 1.429592490196228,
8878
+ "learning_rate": 1.1147703180212014e-05,
8879
+ "loss": 0.2702,
8880
+ "step": 55500
8881
+ },
8882
+ {
8883
+ "epoch": 23.36,
8884
+ "eval_cer": 0.33553879394626723,
8885
+ "eval_loss": 2.7780275344848633,
8886
+ "eval_runtime": 398.5381,
8887
+ "eval_samples_per_second": 23.782,
8888
+ "eval_steps_per_second": 2.973,
8889
+ "step": 55500
8890
  }
8891
  ],
8892
  "logging_steps": 100,
 
8894
  "num_input_tokens_seen": 0,
8895
  "num_train_epochs": 30,
8896
  "save_steps": 100,
8897
+ "total_flos": 6.081637645988172e+20,
8898
  "train_batch_size": 8,
8899
  "trial_name": null,
8900
  "trial_params": null