ashanhr commited on
Commit
169fccc
1 Parent(s): 5c21166

Training in progress, step 17100, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87bc7c877cdfbe54847b102871e2d4073e1e3b3e77f4628403c62609d4215ee0
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b9e8f1cd682c600b8cb11187fc1ab5ae248a9db42188a27ec84ab8dd6b176e3
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3fa0adf8ce583fad0205dd992aeb22f4bb05366efc58cf6b0feae933acf7733a
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e55ffbe26a558cd4d44bf377902f7de44a99d96212bf6909a06d0b6c973855f0
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1057f87e50786d1aca543245abd8aa5218ae3920b5ed10328f1cc6e8d05b8fb8
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d85e4e6b570f463ab7d57a8eb340d363fee4aedfddaa988b4c8532024ed27e4d
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70bc272cae15954849873b08592843a4166415a3756dbd7282f8116aed448377
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9edfa95c867c886482be8a9997b5cdbca3a08cde04092b0ac7f65fec6704c015
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5e32e74e7ac82af1d3f98e8ce057551c88fd1fff091ef003d7c5821593146b8
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c97c09909b76b35ea84168af91d3198272a70d988945ee6a69678b1b1242e4e
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.1142917280572515,
5
  "eval_steps": 100,
6
- "global_step": 16900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2711,6 +2711,38 @@
2711
  "eval_samples_per_second": 25.986,
2712
  "eval_steps_per_second": 3.249,
2713
  "step": 16900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2714
  }
2715
  ],
2716
  "logging_steps": 100,
@@ -2718,7 +2750,7 @@
2718
  "num_input_tokens_seen": 0,
2719
  "num_train_epochs": 30,
2720
  "save_steps": 100,
2721
- "total_flos": 1.8525968673735387e+20,
2722
  "train_batch_size": 8,
2723
  "trial_name": null,
2724
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 7.198484529572721,
5
  "eval_steps": 100,
6
+ "global_step": 17100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2711
  "eval_samples_per_second": 25.986,
2712
  "eval_steps_per_second": 3.249,
2713
  "step": 16900
2714
+ },
2715
+ {
2716
+ "epoch": 7.16,
2717
+ "grad_norm": 4.391873359680176,
2718
+ "learning_rate": 3.834699646643109e-05,
2719
+ "loss": 1.3334,
2720
+ "step": 17000
2721
+ },
2722
+ {
2723
+ "epoch": 7.16,
2724
+ "eval_cer": 0.4652682726526143,
2725
+ "eval_loss": 2.7629072666168213,
2726
+ "eval_runtime": 380.8752,
2727
+ "eval_samples_per_second": 24.885,
2728
+ "eval_steps_per_second": 3.111,
2729
+ "step": 17000
2730
+ },
2731
+ {
2732
+ "epoch": 7.2,
2733
+ "grad_norm": 7.738865375518799,
2734
+ "learning_rate": 3.827632508833923e-05,
2735
+ "loss": 1.4858,
2736
+ "step": 17100
2737
+ },
2738
+ {
2739
+ "epoch": 7.2,
2740
+ "eval_cer": 0.4701737319619882,
2741
+ "eval_loss": 2.189877986907959,
2742
+ "eval_runtime": 363.6078,
2743
+ "eval_samples_per_second": 26.067,
2744
+ "eval_steps_per_second": 3.259,
2745
+ "step": 17100
2746
  }
2747
  ],
2748
  "logging_steps": 100,
 
2750
  "num_input_tokens_seen": 0,
2751
  "num_train_epochs": 30,
2752
  "save_steps": 100,
2753
+ "total_flos": 1.8747010341512125e+20,
2754
  "train_batch_size": 8,
2755
  "trial_name": null,
2756
  "trial_params": null