Training in progress, step 15000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 891558696
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e05637d9fe00567351aebe30b8907548391539066a69466b08d62fb0de2c8b6a
|
3 |
size 891558696
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1783272762
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd2dde3b7d6cb9a958c80e4da86c9ac7e84d7b0aad33d337c26e27372676e0e8
|
3 |
size 1783272762
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ac6d446adeddd129c374743386b9fda911e1104accc0a9ad12d81db0a9913ff
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6e468fb4a523a6bf7dace3eac71fcc8bc1ed6b95078548573228e864e9505bcd
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.08158940076828003,
|
3 |
"best_model_checkpoint": "./fine-tuned/checkpoint-12500",
|
4 |
-
"epoch": 2.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2269,6 +2269,84 @@
|
|
2269 |
"eval_samples_per_second": 17.144,
|
2270 |
"eval_steps_per_second": 2.143,
|
2271 |
"step": 14500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2272 |
}
|
2273 |
],
|
2274 |
"logging_steps": 50,
|
@@ -2288,7 +2366,7 @@
|
|
2288 |
"attributes": {}
|
2289 |
}
|
2290 |
},
|
2291 |
-
"total_flos": 7.
|
2292 |
"train_batch_size": 8,
|
2293 |
"trial_name": null,
|
2294 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.08158940076828003,
|
3 |
"best_model_checkpoint": "./fine-tuned/checkpoint-12500",
|
4 |
+
"epoch": 2.4,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 15000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2269 |
"eval_samples_per_second": 17.144,
|
2270 |
"eval_steps_per_second": 2.143,
|
2271 |
"step": 14500
|
2272 |
+
},
|
2273 |
+
{
|
2274 |
+
"epoch": 2.328,
|
2275 |
+
"grad_norm": 7419.63623046875,
|
2276 |
+
"learning_rate": 6.72e-06,
|
2277 |
+
"loss": 0.0527,
|
2278 |
+
"step": 14550
|
2279 |
+
},
|
2280 |
+
{
|
2281 |
+
"epoch": 2.336,
|
2282 |
+
"grad_norm": 6152.6513671875,
|
2283 |
+
"learning_rate": 6.64e-06,
|
2284 |
+
"loss": 0.048,
|
2285 |
+
"step": 14600
|
2286 |
+
},
|
2287 |
+
{
|
2288 |
+
"epoch": 2.344,
|
2289 |
+
"grad_norm": 6703.68994140625,
|
2290 |
+
"learning_rate": 6.560000000000001e-06,
|
2291 |
+
"loss": 0.0537,
|
2292 |
+
"step": 14650
|
2293 |
+
},
|
2294 |
+
{
|
2295 |
+
"epoch": 2.352,
|
2296 |
+
"grad_norm": 8612.31640625,
|
2297 |
+
"learning_rate": 6.48e-06,
|
2298 |
+
"loss": 0.0512,
|
2299 |
+
"step": 14700
|
2300 |
+
},
|
2301 |
+
{
|
2302 |
+
"epoch": 2.36,
|
2303 |
+
"grad_norm": 6183.3798828125,
|
2304 |
+
"learning_rate": 6.4000000000000006e-06,
|
2305 |
+
"loss": 0.0499,
|
2306 |
+
"step": 14750
|
2307 |
+
},
|
2308 |
+
{
|
2309 |
+
"epoch": 2.368,
|
2310 |
+
"grad_norm": 7795.396484375,
|
2311 |
+
"learning_rate": 6.3200000000000005e-06,
|
2312 |
+
"loss": 0.0525,
|
2313 |
+
"step": 14800
|
2314 |
+
},
|
2315 |
+
{
|
2316 |
+
"epoch": 2.376,
|
2317 |
+
"grad_norm": 6911.2099609375,
|
2318 |
+
"learning_rate": 6.2399999999999995e-06,
|
2319 |
+
"loss": 0.0503,
|
2320 |
+
"step": 14850
|
2321 |
+
},
|
2322 |
+
{
|
2323 |
+
"epoch": 2.384,
|
2324 |
+
"grad_norm": 9744.9267578125,
|
2325 |
+
"learning_rate": 6.16e-06,
|
2326 |
+
"loss": 0.0509,
|
2327 |
+
"step": 14900
|
2328 |
+
},
|
2329 |
+
{
|
2330 |
+
"epoch": 2.392,
|
2331 |
+
"grad_norm": 4487.8115234375,
|
2332 |
+
"learning_rate": 6.08e-06,
|
2333 |
+
"loss": 0.0504,
|
2334 |
+
"step": 14950
|
2335 |
+
},
|
2336 |
+
{
|
2337 |
+
"epoch": 2.4,
|
2338 |
+
"grad_norm": 6276.47607421875,
|
2339 |
+
"learning_rate": 6e-06,
|
2340 |
+
"loss": 0.0505,
|
2341 |
+
"step": 15000
|
2342 |
+
},
|
2343 |
+
{
|
2344 |
+
"epoch": 2.4,
|
2345 |
+
"eval_loss": 0.08178989589214325,
|
2346 |
+
"eval_runtime": 116.6529,
|
2347 |
+
"eval_samples_per_second": 17.145,
|
2348 |
+
"eval_steps_per_second": 2.143,
|
2349 |
+
"step": 15000
|
2350 |
}
|
2351 |
],
|
2352 |
"logging_steps": 50,
|
|
|
2366 |
"attributes": {}
|
2367 |
}
|
2368 |
},
|
2369 |
+
"total_flos": 7.30749468672e+16,
|
2370 |
"train_batch_size": 8,
|
2371 |
"trial_name": null,
|
2372 |
"trial_params": null
|