ashanhr commited on
Commit
1513abb
1 Parent(s): a57f13c

Training in progress, step 5400, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:526f37a1fd8614e33d688ca0c95a0b4bdb2270b3c6287907514481f46e211e72
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49a9af9605c3906a1b6201f31fe2e2fefb03a67c40a338ba51cff95ed8f7b20e
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ce64a952a4cce525f3a0cb0ed6d5e65ba07369000da2ff59f3ff55e450ce479
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2774d795ce38b6d916deba57ca044f1e6fc174438a05dde9c1651a651c9e878
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c379ec78bc415dd3c67bb2c638a8e23762a3c06fee68b3ab33b1391af723912b
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63825d3eaa200eddbf762aeb8547833820295db590b26732e0e85a911b0d5d91
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:268b899628f3fbd3f8e86b05216b622b8e7edbffdb48136b8a16736e8c90ff48
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e3f4a2dab0aeb0cd0b06645f79437bf62a24263c82bcbe99dbb49bfb3319ca7
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:784d3c121c9f359553146d9fe14f9140da42065b084f4fa423f15dfc626b4e3b
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dfb0d0b3b5a880ecd881c57b956364f90897bc6653e089f1f8e6e7eff0c817d
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.1890128394022312,
5
  "eval_steps": 100,
6
- "global_step": 5200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -839,6 +839,38 @@
839
  "eval_samples_per_second": 26.241,
840
  "eval_steps_per_second": 3.281,
841
  "step": 5200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
842
  }
843
  ],
844
  "logging_steps": 100,
@@ -846,7 +878,7 @@
846
  "num_input_tokens_seen": 0,
847
  "num_train_epochs": 30,
848
  "save_steps": 100,
849
- "total_flos": 5.703634893009586e+19,
850
  "train_batch_size": 8,
851
  "trial_name": null,
852
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.2732056409177015,
5
  "eval_steps": 100,
6
+ "global_step": 5400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
839
  "eval_samples_per_second": 26.241,
840
  "eval_steps_per_second": 3.281,
841
  "step": 5200
842
+ },
843
+ {
844
+ "epoch": 2.23,
845
+ "grad_norm": 2.109961986541748,
846
+ "learning_rate": 4.661342756183746e-05,
847
+ "loss": 2.4585,
848
+ "step": 5300
849
+ },
850
+ {
851
+ "epoch": 2.23,
852
+ "eval_cer": 0.5222273278323101,
853
+ "eval_loss": 1.6703613996505737,
854
+ "eval_runtime": 374.7053,
855
+ "eval_samples_per_second": 25.295,
856
+ "eval_steps_per_second": 3.162,
857
+ "step": 5300
858
+ },
859
+ {
860
+ "epoch": 2.27,
861
+ "grad_norm": 3.7831201553344727,
862
+ "learning_rate": 4.654275618374558e-05,
863
+ "loss": 1.6107,
864
+ "step": 5400
865
+ },
866
+ {
867
+ "epoch": 2.27,
868
+ "eval_cer": 0.5470601462594346,
869
+ "eval_loss": 2.0870959758758545,
870
+ "eval_runtime": 367.5169,
871
+ "eval_samples_per_second": 25.789,
872
+ "eval_steps_per_second": 3.224,
873
+ "step": 5400
874
  }
875
  ],
876
  "logging_steps": 100,
 
878
  "num_input_tokens_seen": 0,
879
  "num_train_epochs": 30,
880
  "save_steps": 100,
881
+ "total_flos": 5.92170964075638e+19,
882
  "train_batch_size": 8,
883
  "trial_name": null,
884
  "trial_params": null