ashanhr commited on
Commit
1990d51
1 Parent(s): 1ffcb64

Training in progress, step 6000, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:057f9570a6cc72688abe71630c100e57d8ed5e79a107a7c1dbf260b46e1fd4da
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43d64164218ceb70df3188e5d55bcfa4289cb63f7182305fcc19b63af2b99185
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36620432df2b5bec3be20dd101be03752eb30e228a15f087605a4b1b3817ad40
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0c5f7365e986a4df25beb4c71b990dd0017fde5984b8e75111d186387f61b95
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d7b821883cd1ae362dfa89c4e8eef945c5104a2fd3f8917f33805e64a31688a
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc61d44fe8afd95895c257c0e8778313394a3453a26e03b583759623c7260b89
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29a3b30c167fde20793f59d220cb75d7e62231451f8337cafebba1ca6c36c689
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f69fe8240e55c5995426573757e5623029e976dcfdb9639d983a89e4307225f9
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72f36e4a30c4e4159533881d5d1202edb3c351c6c5b1afb1031c411b06ae9325
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fe5c154bf4966b68a2d37446afad8d6e4131f9298c56c1acc1015e0b966ad66
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.4415912439486425,
5
  "eval_steps": 100,
6
- "global_step": 5800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -935,6 +935,38 @@
935
  "eval_samples_per_second": 26.94,
936
  "eval_steps_per_second": 3.368,
937
  "step": 5800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
938
  }
939
  ],
940
  "logging_steps": 100,
@@ -942,7 +974,7 @@
942
  "num_input_tokens_seen": 0,
943
  "num_train_epochs": 30,
944
  "save_steps": 100,
945
- "total_flos": 6.356688358918305e+19,
946
  "train_batch_size": 8,
947
  "trial_name": null,
948
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.5257840454641127,
5
  "eval_steps": 100,
6
+ "global_step": 6000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
935
  "eval_samples_per_second": 26.94,
936
  "eval_steps_per_second": 3.368,
937
  "step": 5800
938
+ },
939
+ {
940
+ "epoch": 2.48,
941
+ "grad_norm": 2.4776511192321777,
942
+ "learning_rate": 4.618939929328622e-05,
943
+ "loss": 2.3876,
944
+ "step": 5900
945
+ },
946
+ {
947
+ "epoch": 2.48,
948
+ "eval_cer": 0.5089310156036134,
949
+ "eval_loss": 3.134875774383545,
950
+ "eval_runtime": 372.2436,
951
+ "eval_samples_per_second": 25.462,
952
+ "eval_steps_per_second": 3.183,
953
+ "step": 5900
954
+ },
955
+ {
956
+ "epoch": 2.53,
957
+ "grad_norm": 9.97956657409668,
958
+ "learning_rate": 4.611872791519435e-05,
959
+ "loss": 4.1527,
960
+ "step": 6000
961
+ },
962
+ {
963
+ "epoch": 2.53,
964
+ "eval_cer": 0.4981815337687224,
965
+ "eval_loss": 1.7425427436828613,
966
+ "eval_runtime": 363.658,
967
+ "eval_samples_per_second": 26.063,
968
+ "eval_steps_per_second": 3.259,
969
+ "step": 6000
970
  }
971
  ],
972
  "logging_steps": 100,
 
974
  "num_input_tokens_seen": 0,
975
  "num_train_epochs": 30,
976
  "save_steps": 100,
977
+ "total_flos": 6.577336385232916e+19,
978
  "train_batch_size": 8,
979
  "trial_name": null,
980
  "trial_params": null