ashanhr commited on
Commit
3ab6ee1
·
verified ·
1 Parent(s): 810c49d

Training in progress, step 12900, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c6e9580fb798101735047d737cee85a6bb1061fad515187b2725aaa63bfedfc
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57c6b54d077cb667a4cfbe738582da2ecd39159286c41485b7c6efd53cb7bc1a
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e97e01dc64ab5637bb404489fce4bc374a9d40ac15f27ef1b88fa4b9f545c28
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45da60abe68c6f65c5ae53251ba8b983302267f69d34edaa7bf0ebf95d54ac3a
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23b5eb8171ce3b32f1473da5c445fd724fddce4624bb8f3de388a3614ee04629
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a67ddb96ec88d7be4083d93ecc9ee80924bab46ac10dc058c48085c087713d8
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d6c38bb1634ce2d5111d8f0da0ee0f0eeb90e7348bc821241c2773d71d06af6
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b3ac36e7fbd4b3c080b9441c4f2b70acea99b1b8d1002061ebda98ec5266a7e
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5e2d9a4da1dca4e718c671213c3210706697de61ee8df6908344ec05bce806c
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c402405be7dbbda177a1e84950f8dcd4bc4f3baf7fe67d401f484f341b04e122
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.346242896232372,
5
  "eval_steps": 100,
6
- "global_step": 12700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2039,6 +2039,38 @@
2039
  "eval_samples_per_second": 26.205,
2040
  "eval_steps_per_second": 3.276,
2041
  "step": 12700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2042
  }
2043
  ],
2044
  "logging_steps": 100,
@@ -2046,7 +2078,7 @@
2046
  "num_input_tokens_seen": 0,
2047
  "num_train_epochs": 30,
2048
  "save_steps": 100,
2049
- "total_flos": 1.3921302086472671e+20,
2050
  "train_batch_size": 8,
2051
  "trial_name": null,
2052
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.430435697747843,
5
  "eval_steps": 100,
6
+ "global_step": 12900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2039
  "eval_samples_per_second": 26.205,
2040
  "eval_steps_per_second": 3.276,
2041
  "step": 12700
2042
+ },
2043
+ {
2044
+ "epoch": 5.39,
2045
+ "grad_norm": 1.902030348777771,
2046
+ "learning_rate": 4.131448763250883e-05,
2047
+ "loss": 2.1405,
2048
+ "step": 12800
2049
+ },
2050
+ {
2051
+ "epoch": 5.39,
2052
+ "eval_cer": 0.4769636502287748,
2053
+ "eval_loss": 2.750805139541626,
2054
+ "eval_runtime": 383.778,
2055
+ "eval_samples_per_second": 24.697,
2056
+ "eval_steps_per_second": 3.088,
2057
+ "step": 12800
2058
+ },
2059
+ {
2060
+ "epoch": 5.43,
2061
+ "grad_norm": 5.2548041343688965,
2062
+ "learning_rate": 4.124381625441696e-05,
2063
+ "loss": 1.967,
2064
+ "step": 12900
2065
+ },
2066
+ {
2067
+ "epoch": 5.43,
2068
+ "eval_cer": 0.4813216143287318,
2069
+ "eval_loss": 2.39349627494812,
2070
+ "eval_runtime": 366.8876,
2071
+ "eval_samples_per_second": 25.834,
2072
+ "eval_steps_per_second": 3.23,
2073
+ "step": 12900
2074
  }
2075
  ],
2076
  "logging_steps": 100,
 
2078
  "num_input_tokens_seen": 0,
2079
  "num_train_epochs": 30,
2080
  "save_steps": 100,
2081
+ "total_flos": 1.4138922393346692e+20,
2082
  "train_batch_size": 8,
2083
  "trial_name": null,
2084
  "trial_params": null