ashanhr commited on
Commit
35e73c9
·
verified ·
1 Parent(s): ecbda97

Training in progress, step 13100, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57c6b54d077cb667a4cfbe738582da2ecd39159286c41485b7c6efd53cb7bc1a
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15272146b88a824069cf989c514d7a86851658753712d4906dff19411477f5a3
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45da60abe68c6f65c5ae53251ba8b983302267f69d34edaa7bf0ebf95d54ac3a
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28dee3eb1b5dbc17cd7055dd49b59c3229cd41008819761a6fe2d0873e23dc31
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a67ddb96ec88d7be4083d93ecc9ee80924bab46ac10dc058c48085c087713d8
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1686978d9f8b694b0c18e4cc3de858b2d92028154846e1a833d9c377f88ed652
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b3ac36e7fbd4b3c080b9441c4f2b70acea99b1b8d1002061ebda98ec5266a7e
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90f78bc01501a673d4e6bb87f06a245f60bff276cc0939cfea2b3049bc720ada
3
+ size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c402405be7dbbda177a1e84950f8dcd4bc4f3baf7fe67d401f484f341b04e122
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:945f2872409982395f4d4eaed58eeb0f905ab3c39b6e7047ea7eac693a1e3901
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.430435697747843,
5
  "eval_steps": 100,
6
- "global_step": 12900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2071,6 +2071,38 @@
2071
  "eval_samples_per_second": 25.834,
2072
  "eval_steps_per_second": 3.23,
2073
  "step": 12900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2074
  }
2075
  ],
2076
  "logging_steps": 100,
@@ -2078,7 +2110,7 @@
2078
  "num_input_tokens_seen": 0,
2079
  "num_train_epochs": 30,
2080
  "save_steps": 100,
2081
- "total_flos": 1.4138922393346692e+20,
2082
  "train_batch_size": 8,
2083
  "trial_name": null,
2084
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.514628499263313,
5
  "eval_steps": 100,
6
+ "global_step": 13100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2071
  "eval_samples_per_second": 25.834,
2072
  "eval_steps_per_second": 3.23,
2073
  "step": 12900
2074
+ },
2075
+ {
2076
+ "epoch": 5.47,
2077
+ "grad_norm": 3.4912497997283936,
2078
+ "learning_rate": 4.1173144876325096e-05,
2079
+ "loss": 2.5075,
2080
+ "step": 13000
2081
+ },
2082
+ {
2083
+ "epoch": 5.47,
2084
+ "eval_cer": 0.48205731101638577,
2085
+ "eval_loss": 2.9853949546813965,
2086
+ "eval_runtime": 382.1207,
2087
+ "eval_samples_per_second": 24.804,
2088
+ "eval_steps_per_second": 3.101,
2089
+ "step": 13000
2090
+ },
2091
+ {
2092
+ "epoch": 5.51,
2093
+ "grad_norm": 3.671482801437378,
2094
+ "learning_rate": 4.110247349823322e-05,
2095
+ "loss": 2.3712,
2096
+ "step": 13100
2097
+ },
2098
+ {
2099
+ "epoch": 5.51,
2100
+ "eval_cer": 0.47388154550076256,
2101
+ "eval_loss": 2.6945626735687256,
2102
+ "eval_runtime": 374.115,
2103
+ "eval_samples_per_second": 25.334,
2104
+ "eval_steps_per_second": 3.167,
2105
+ "step": 13100
2106
  }
2107
  ],
2108
  "logging_steps": 100,
 
2110
  "num_input_tokens_seen": 0,
2111
  "num_train_epochs": 30,
2112
  "save_steps": 100,
2113
+ "total_flos": 1.4359858237396415e+20,
2114
  "train_batch_size": 8,
2115
  "trial_name": null,
2116
  "trial_params": null