ashanhr commited on
Commit
9014b72
1 Parent(s): dc0658a

Training in progress, step 13500, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b64bc0a1397bc980ff597684a085c93f8dd2fe2b7b54d58711479664a09d54c3
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31b62a1eed64814fb09932d72e4b158e825506d75ad15f298d9e787f0552502f
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd1ceb3845f2cb7eceaae674378edb7dd24738c83638a474995504a3ff2267a2
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a47b4b5e03532ead7e8fc7c3e923c4911144dbd2504a20958ebe8635de704ed1
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a32194dd689a72f849b21ab9782143a146208cc1d25018113d6d979749680f1
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78a5cb22af6a2e357b345873f02b48b612899f785e45880c5d3859603cec97ca
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9853e2c9f15d5d445435ee3d094c763606e84907256808b6d8c015e81b32d039
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e72175515cc54637e49cc1705761942275c0209bb6b05ae32b0a8db952e415f
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1558a03e21b124dc4cf9c782c9fd3ddda4c5e0f9d6236b7ad5d4b4ead1dac474
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a4b19e6e6a244eb7dd17c9a36301bdbe54a114d743b96df3cd2e7dd5ae72aa2
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.598821300778783,
5
  "eval_steps": 100,
6
- "global_step": 13300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2135,6 +2135,38 @@
2135
  "eval_samples_per_second": 24.57,
2136
  "eval_steps_per_second": 3.072,
2137
  "step": 13300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2138
  }
2139
  ],
2140
  "logging_steps": 100,
@@ -2142,7 +2174,7 @@
2142
  "num_input_tokens_seen": 0,
2143
  "num_train_epochs": 30,
2144
  "save_steps": 100,
2145
- "total_flos": 1.4577321342657543e+20,
2146
  "train_batch_size": 8,
2147
  "trial_name": null,
2148
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.683014102294254,
5
  "eval_steps": 100,
6
+ "global_step": 13500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2135
  "eval_samples_per_second": 24.57,
2136
  "eval_steps_per_second": 3.072,
2137
  "step": 13300
2138
+ },
2139
+ {
2140
+ "epoch": 5.64,
2141
+ "grad_norm": 2.6984710693359375,
2142
+ "learning_rate": 4.08904593639576e-05,
2143
+ "loss": 1.4041,
2144
+ "step": 13400
2145
+ },
2146
+ {
2147
+ "epoch": 5.64,
2148
+ "eval_cer": 0.46995620038324665,
2149
+ "eval_loss": 2.8926751613616943,
2150
+ "eval_runtime": 379.5665,
2151
+ "eval_samples_per_second": 24.971,
2152
+ "eval_steps_per_second": 3.122,
2153
+ "step": 13400
2154
+ },
2155
+ {
2156
+ "epoch": 5.68,
2157
+ "grad_norm": 28.124893188476562,
2158
+ "learning_rate": 4.081978798586573e-05,
2159
+ "loss": 1.3813,
2160
+ "step": 13500
2161
+ },
2162
+ {
2163
+ "epoch": 5.68,
2164
+ "eval_cer": 0.453462907199562,
2165
+ "eval_loss": 3.064070701599121,
2166
+ "eval_runtime": 364.1898,
2167
+ "eval_samples_per_second": 26.025,
2168
+ "eval_steps_per_second": 3.254,
2169
+ "step": 13500
2170
  }
2171
  ],
2172
  "logging_steps": 100,
 
2174
  "num_input_tokens_seen": 0,
2175
  "num_train_epochs": 30,
2176
  "save_steps": 100,
2177
+ "total_flos": 1.4794168929030072e+20,
2178
  "train_batch_size": 8,
2179
  "trial_name": null,
2180
  "trial_params": null