Rakhman16 commited on
Commit
32c91f4
·
verified ·
1 Parent(s): 1c42d8a

Training in progress, step 12000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3866dd2a6475ce2b7385f181f047f2dde98048fd09569a5804ff351fd5e9912f
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0426db5bf32594814a8c37e618d6398246b9271f06265c61e6b718a3b1d4b11f
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9fa541d3192bd288c7edb3660d558e142b5e462f59453619d7373f84590e81f7
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:109669df17b5f6b02118f596a85fbd94dfbe94bfa26ddbed9e0a32a469bb37b2
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8fe8dfec12d6b343eb28ee2b43b75b45c3ae2d185dfc75ecd757d9ec7c567e31
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0ddcf15b11b5d2f3a427728cce5454c9ef039ba8bfd4e438abdef47917e7a39
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2504d9c9451078f79b98032c170c402f3ffeb92d11496a90427e835bd48cd055
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff57c48e4ef354be063370e4a408d39b39e1d0cde79b3adacb1e6416a8aedc46
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.7797173732335825,
5
  "eval_steps": 500,
6
- "global_step": 11500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -200,6 +200,13 @@
200
  "learning_rate": 8.89443059019119e-07,
201
  "loss": 0.3117,
202
  "step": 11500
 
 
 
 
 
 
 
203
  }
204
  ],
205
  "logging_steps": 500,
@@ -219,7 +226,7 @@
219
  "attributes": {}
220
  }
221
  },
222
- "total_flos": 2.800962713419776e+16,
223
  "train_batch_size": 4,
224
  "trial_name": null,
225
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.987531172069826,
5
  "eval_steps": 500,
6
+ "global_step": 12000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
200
  "learning_rate": 8.89443059019119e-07,
201
  "loss": 0.3117,
202
  "step": 11500
203
+ },
204
+ {
205
+ "epoch": 4.987531172069826,
206
+ "grad_norm": 0.9830431342124939,
207
+ "learning_rate": 5.8187863674147975e-08,
208
+ "loss": 0.3083,
209
+ "step": 12000
210
  }
211
  ],
212
  "logging_steps": 500,
 
226
  "attributes": {}
227
  }
228
  },
229
+ "total_flos": 2.922754291531776e+16,
230
  "train_batch_size": 4,
231
  "trial_name": null,
232
  "trial_params": null