ashanhr commited on
Commit
88f9c32
1 Parent(s): 2ddce50

Training in progress, step 13300, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15272146b88a824069cf989c514d7a86851658753712d4906dff19411477f5a3
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b64bc0a1397bc980ff597684a085c93f8dd2fe2b7b54d58711479664a09d54c3
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28dee3eb1b5dbc17cd7055dd49b59c3229cd41008819761a6fe2d0873e23dc31
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd1ceb3845f2cb7eceaae674378edb7dd24738c83638a474995504a3ff2267a2
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1686978d9f8b694b0c18e4cc3de858b2d92028154846e1a833d9c377f88ed652
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a32194dd689a72f849b21ab9782143a146208cc1d25018113d6d979749680f1
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90f78bc01501a673d4e6bb87f06a245f60bff276cc0939cfea2b3049bc720ada
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9853e2c9f15d5d445435ee3d094c763606e84907256808b6d8c015e81b32d039
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:945f2872409982395f4d4eaed58eeb0f905ab3c39b6e7047ea7eac693a1e3901
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1558a03e21b124dc4cf9c782c9fd3ddda4c5e0f9d6236b7ad5d4b4ead1dac474
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.514628499263313,
5
  "eval_steps": 100,
6
- "global_step": 13100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2103,6 +2103,38 @@
2103
  "eval_samples_per_second": 25.334,
2104
  "eval_steps_per_second": 3.167,
2105
  "step": 13100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2106
  }
2107
  ],
2108
  "logging_steps": 100,
@@ -2110,7 +2142,7 @@
2110
  "num_input_tokens_seen": 0,
2111
  "num_train_epochs": 30,
2112
  "save_steps": 100,
2113
- "total_flos": 1.4359858237396415e+20,
2114
  "train_batch_size": 8,
2115
  "trial_name": null,
2116
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.598821300778783,
5
  "eval_steps": 100,
6
+ "global_step": 13300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2103
  "eval_samples_per_second": 25.334,
2104
  "eval_steps_per_second": 3.167,
2105
  "step": 13100
2106
+ },
2107
+ {
2108
+ "epoch": 5.56,
2109
+ "grad_norm": 3.400202512741089,
2110
+ "learning_rate": 4.1031802120141345e-05,
2111
+ "loss": 2.035,
2112
+ "step": 13200
2113
+ },
2114
+ {
2115
+ "epoch": 5.56,
2116
+ "eval_cer": 0.4721559579210825,
2117
+ "eval_loss": 2.723193645477295,
2118
+ "eval_runtime": 442.6463,
2119
+ "eval_samples_per_second": 21.412,
2120
+ "eval_steps_per_second": 2.677,
2121
+ "step": 13200
2122
+ },
2123
+ {
2124
+ "epoch": 5.6,
2125
+ "grad_norm": 28.22166633605957,
2126
+ "learning_rate": 4.0961130742049467e-05,
2127
+ "loss": 2.583,
2128
+ "step": 13300
2129
+ },
2130
+ {
2131
+ "epoch": 5.6,
2132
+ "eval_cer": 0.4679446443236479,
2133
+ "eval_loss": 2.848484516143799,
2134
+ "eval_runtime": 385.7533,
2135
+ "eval_samples_per_second": 24.57,
2136
+ "eval_steps_per_second": 3.072,
2137
+ "step": 13300
2138
  }
2139
  ],
2140
  "logging_steps": 100,
 
2142
  "num_input_tokens_seen": 0,
2143
  "num_train_epochs": 30,
2144
  "save_steps": 100,
2145
+ "total_flos": 1.4577321342657543e+20,
2146
  "train_batch_size": 8,
2147
  "trial_name": null,
2148
  "trial_params": null