Training in progress, step 500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 144748392
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e1ddf20e43ee498cc289f2bad036aa9eb0970219206ea831c5581b208af2226
|
3 |
size 144748392
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 73877972
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:800b02184e3f2fd61d964ff1a7bb0ea20318449052b1c3837ad59923d32c1d68
|
3 |
size 73877972
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f7e4d7d2b0c8aa3af21365f6f0926784e1d68c844ac2fbc1cc56728a1f7c21d
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b782a524e5b57eb023365370accae538ac5e68454bafa53a6dd8b2c51cead56
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -327,6 +327,84 @@
|
|
327 |
"eval_samples_per_second": 25.263,
|
328 |
"eval_steps_per_second": 6.316,
|
329 |
"step": 400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
330 |
}
|
331 |
],
|
332 |
"logging_steps": 10,
|
@@ -355,7 +433,7 @@
|
|
355 |
"attributes": {}
|
356 |
}
|
357 |
},
|
358 |
-
"total_flos":
|
359 |
"train_batch_size": 8,
|
360 |
"trial_name": null,
|
361 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.5024861097335815,
|
3 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-500",
|
4 |
+
"epoch": 0.10111734668082309,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
327 |
"eval_samples_per_second": 25.263,
|
328 |
"eval_steps_per_second": 6.316,
|
329 |
"step": 400
|
330 |
+
},
|
331 |
+
{
|
332 |
+
"epoch": 0.08291622427827494,
|
333 |
+
"grad_norm": 0.16395606100559235,
|
334 |
+
"learning_rate": 4.695790918802576e-05,
|
335 |
+
"loss": 0.6875,
|
336 |
+
"step": 410
|
337 |
+
},
|
338 |
+
{
|
339 |
+
"epoch": 0.0849385712118914,
|
340 |
+
"grad_norm": 0.53562331199646,
|
341 |
+
"learning_rate": 4.252125897855932e-05,
|
342 |
+
"loss": 0.5775,
|
343 |
+
"step": 420
|
344 |
+
},
|
345 |
+
{
|
346 |
+
"epoch": 0.08696091814550787,
|
347 |
+
"grad_norm": 0.15994961559772491,
|
348 |
+
"learning_rate": 3.824753850538082e-05,
|
349 |
+
"loss": 0.5303,
|
350 |
+
"step": 430
|
351 |
+
},
|
352 |
+
{
|
353 |
+
"epoch": 0.08898326507912432,
|
354 |
+
"grad_norm": 0.2935360074043274,
|
355 |
+
"learning_rate": 3.414886209349615e-05,
|
356 |
+
"loss": 0.5177,
|
357 |
+
"step": 440
|
358 |
+
},
|
359 |
+
{
|
360 |
+
"epoch": 0.09100561201274078,
|
361 |
+
"grad_norm": 0.4813726544380188,
|
362 |
+
"learning_rate": 3.0236847886501542e-05,
|
363 |
+
"loss": 0.2539,
|
364 |
+
"step": 450
|
365 |
+
},
|
366 |
+
{
|
367 |
+
"epoch": 0.09302795894635725,
|
368 |
+
"grad_norm": 0.15252335369586945,
|
369 |
+
"learning_rate": 2.6522584913693294e-05,
|
370 |
+
"loss": 0.6666,
|
371 |
+
"step": 460
|
372 |
+
},
|
373 |
+
{
|
374 |
+
"epoch": 0.0950503058799737,
|
375 |
+
"grad_norm": 0.3821258246898651,
|
376 |
+
"learning_rate": 2.301660165700936e-05,
|
377 |
+
"loss": 0.5569,
|
378 |
+
"step": 470
|
379 |
+
},
|
380 |
+
{
|
381 |
+
"epoch": 0.09707265281359018,
|
382 |
+
"grad_norm": 0.14530214667320251,
|
383 |
+
"learning_rate": 1.9728836206903656e-05,
|
384 |
+
"loss": 0.5426,
|
385 |
+
"step": 480
|
386 |
+
},
|
387 |
+
{
|
388 |
+
"epoch": 0.09909499974720663,
|
389 |
+
"grad_norm": 0.28957322239875793,
|
390 |
+
"learning_rate": 1.6668608091748495e-05,
|
391 |
+
"loss": 0.5386,
|
392 |
+
"step": 490
|
393 |
+
},
|
394 |
+
{
|
395 |
+
"epoch": 0.10111734668082309,
|
396 |
+
"grad_norm": 0.19388361275196075,
|
397 |
+
"learning_rate": 1.3844591860619383e-05,
|
398 |
+
"loss": 0.2322,
|
399 |
+
"step": 500
|
400 |
+
},
|
401 |
+
{
|
402 |
+
"epoch": 0.10111734668082309,
|
403 |
+
"eval_loss": 0.5024861097335815,
|
404 |
+
"eval_runtime": 330.0244,
|
405 |
+
"eval_samples_per_second": 25.234,
|
406 |
+
"eval_steps_per_second": 6.309,
|
407 |
+
"step": 500
|
408 |
}
|
409 |
],
|
410 |
"logging_steps": 10,
|
|
|
433 |
"attributes": {}
|
434 |
}
|
435 |
},
|
436 |
+
"total_flos": 2.2973010092752896e+17,
|
437 |
"train_batch_size": 8,
|
438 |
"trial_name": null,
|
439 |
"trial_params": null
|