ashanhr commited on
Commit
8ef00d8
1 Parent(s): bc1a94c

Training in progress, step 5800, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49a9af9605c3906a1b6201f31fe2e2fefb03a67c40a338ba51cff95ed8f7b20e
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:057f9570a6cc72688abe71630c100e57d8ed5e79a107a7c1dbf260b46e1fd4da
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2774d795ce38b6d916deba57ca044f1e6fc174438a05dde9c1651a651c9e878
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36620432df2b5bec3be20dd101be03752eb30e228a15f087605a4b1b3817ad40
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63825d3eaa200eddbf762aeb8547833820295db590b26732e0e85a911b0d5d91
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d7b821883cd1ae362dfa89c4e8eef945c5104a2fd3f8917f33805e64a31688a
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e3f4a2dab0aeb0cd0b06645f79437bf62a24263c82bcbe99dbb49bfb3319ca7
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29a3b30c167fde20793f59d220cb75d7e62231451f8337cafebba1ca6c36c689
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5dfb0d0b3b5a880ecd881c57b956364f90897bc6653e089f1f8e6e7eff0c817d
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72f36e4a30c4e4159533881d5d1202edb3c351c6c5b1afb1031c411b06ae9325
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.2732056409177015,
5
  "eval_steps": 100,
6
- "global_step": 5400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -871,6 +871,70 @@
871
  "eval_samples_per_second": 25.789,
872
  "eval_steps_per_second": 3.224,
873
  "step": 5400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
874
  }
875
  ],
876
  "logging_steps": 100,
@@ -878,7 +942,7 @@
878
  "num_input_tokens_seen": 0,
879
  "num_train_epochs": 30,
880
  "save_steps": 100,
881
- "total_flos": 5.92170964075638e+19,
882
  "train_batch_size": 8,
883
  "trial_name": null,
884
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.4415912439486425,
5
  "eval_steps": 100,
6
+ "global_step": 5800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
871
  "eval_samples_per_second": 25.789,
872
  "eval_steps_per_second": 3.224,
873
  "step": 5400
874
+ },
875
+ {
876
+ "epoch": 2.32,
877
+ "grad_norm": 12.008744239807129,
878
+ "learning_rate": 4.6472084805653715e-05,
879
+ "loss": 2.3883,
880
+ "step": 5500
881
+ },
882
+ {
883
+ "epoch": 2.32,
884
+ "eval_cer": 0.5516771929138478,
885
+ "eval_loss": 3.582747459411621,
886
+ "eval_runtime": 376.938,
887
+ "eval_samples_per_second": 25.145,
888
+ "eval_steps_per_second": 3.144,
889
+ "step": 5500
890
+ },
891
+ {
892
+ "epoch": 2.36,
893
+ "grad_norm": 2.4807026386260986,
894
+ "learning_rate": 4.6401413427561844e-05,
895
+ "loss": 2.1991,
896
+ "step": 5600
897
+ },
898
+ {
899
+ "epoch": 2.36,
900
+ "eval_cer": 0.5373152203668218,
901
+ "eval_loss": 3.5865066051483154,
902
+ "eval_runtime": 360.1614,
903
+ "eval_samples_per_second": 26.316,
904
+ "eval_steps_per_second": 3.29,
905
+ "step": 5600
906
+ },
907
+ {
908
+ "epoch": 2.4,
909
+ "grad_norm": 6.418155670166016,
910
+ "learning_rate": 4.6330742049469965e-05,
911
+ "loss": 2.3905,
912
+ "step": 5700
913
+ },
914
+ {
915
+ "epoch": 2.4,
916
+ "eval_cer": 0.527748719252278,
917
+ "eval_loss": 2.6406588554382324,
918
+ "eval_runtime": 379.816,
919
+ "eval_samples_per_second": 24.954,
920
+ "eval_steps_per_second": 3.12,
921
+ "step": 5700
922
+ },
923
+ {
924
+ "epoch": 2.44,
925
+ "grad_norm": 2.4806301593780518,
926
+ "learning_rate": 4.626007067137809e-05,
927
+ "loss": 3.3218,
928
+ "step": 5800
929
+ },
930
+ {
931
+ "epoch": 2.44,
932
+ "eval_cer": 0.5217727112744907,
933
+ "eval_loss": 1.8109639883041382,
934
+ "eval_runtime": 351.8213,
935
+ "eval_samples_per_second": 26.94,
936
+ "eval_steps_per_second": 3.368,
937
+ "step": 5800
938
  }
939
  ],
940
  "logging_steps": 100,
 
942
  "num_input_tokens_seen": 0,
943
  "num_train_epochs": 30,
944
  "save_steps": 100,
945
+ "total_flos": 6.356688358918305e+19,
946
  "train_batch_size": 8,
947
  "trial_name": null,
948
  "trial_params": null