Training in progress, step 2346, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 56662456
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c143e49ec80b9efa7c889526d4266a7b197596fb39eb75eb8f0c60a959dfb5b3
|
3 |
size 56662456
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 29091284
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:229ef2852a3e0ca7253143b05e4b688888a7bf5fee9127a04663067e45d8cf7d
|
3 |
size 29091284
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:07d6aa011454a4748f5199ba02a257f0c3397f7d63ff5d7de731bdce7a2a6006
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:82f6fb6c04e83ad8c7e7774f50cfceed3bf90e6e42ded09b4deef26723be76bc
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.007140692323446274,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-2300",
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -16299,6 +16299,328 @@
|
|
16299 |
"eval_samples_per_second": 10.677,
|
16300 |
"eval_steps_per_second": 2.669,
|
16301 |
"step": 2300
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16302 |
}
|
16303 |
],
|
16304 |
"logging_steps": 1,
|
@@ -16322,12 +16644,12 @@
|
|
16322 |
"should_evaluate": false,
|
16323 |
"should_log": false,
|
16324 |
"should_save": true,
|
16325 |
-
"should_training_stop":
|
16326 |
},
|
16327 |
"attributes": {}
|
16328 |
}
|
16329 |
},
|
16330 |
-
"total_flos": 1.
|
16331 |
"train_batch_size": 4,
|
16332 |
"trial_name": null,
|
16333 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.007140692323446274,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-2300",
|
4 |
+
"epoch": 0.6973063347575701,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 2346,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
16299 |
"eval_samples_per_second": 10.677,
|
16300 |
"eval_steps_per_second": 2.669,
|
16301 |
"step": 2300
|
16302 |
+
},
|
16303 |
+
{
|
16304 |
+
"epoch": 0.6839308935537805,
|
16305 |
+
"grad_norm": 0.11718721687793732,
|
16306 |
+
"learning_rate": 1.8306973193326084e-07,
|
16307 |
+
"loss": 0.0639,
|
16308 |
+
"step": 2301
|
16309 |
+
},
|
16310 |
+
{
|
16311 |
+
"epoch": 0.6842281255805313,
|
16312 |
+
"grad_norm": 0.11004548519849777,
|
16313 |
+
"learning_rate": 1.7502605215715672e-07,
|
16314 |
+
"loss": 0.0404,
|
16315 |
+
"step": 2302
|
16316 |
+
},
|
16317 |
+
{
|
16318 |
+
"epoch": 0.6845253576072822,
|
16319 |
+
"grad_norm": 0.10758353769779205,
|
16320 |
+
"learning_rate": 1.671629205999836e-07,
|
16321 |
+
"loss": 0.0538,
|
16322 |
+
"step": 2303
|
16323 |
+
},
|
16324 |
+
{
|
16325 |
+
"epoch": 0.6848225896340331,
|
16326 |
+
"grad_norm": 0.1499020904302597,
|
16327 |
+
"learning_rate": 1.5948035148338757e-07,
|
16328 |
+
"loss": 0.082,
|
16329 |
+
"step": 2304
|
16330 |
+
},
|
16331 |
+
{
|
16332 |
+
"epoch": 0.685119821660784,
|
16333 |
+
"grad_norm": 0.10471717268228531,
|
16334 |
+
"learning_rate": 1.5197835870242038e-07,
|
16335 |
+
"loss": 0.049,
|
16336 |
+
"step": 2305
|
16337 |
+
},
|
16338 |
+
{
|
16339 |
+
"epoch": 0.6854170536875348,
|
16340 |
+
"grad_norm": 0.09845124185085297,
|
16341 |
+
"learning_rate": 1.446569558255395e-07,
|
16342 |
+
"loss": 0.0395,
|
16343 |
+
"step": 2306
|
16344 |
+
},
|
16345 |
+
{
|
16346 |
+
"epoch": 0.6857142857142857,
|
16347 |
+
"grad_norm": 0.10133890062570572,
|
16348 |
+
"learning_rate": 1.375161560946081e-07,
|
16349 |
+
"loss": 0.044,
|
16350 |
+
"step": 2307
|
16351 |
+
},
|
16352 |
+
{
|
16353 |
+
"epoch": 0.6860115177410366,
|
16354 |
+
"grad_norm": 0.12090011686086655,
|
16355 |
+
"learning_rate": 1.305559724248062e-07,
|
16356 |
+
"loss": 0.0723,
|
16357 |
+
"step": 2308
|
16358 |
+
},
|
16359 |
+
{
|
16360 |
+
"epoch": 0.6863087497677874,
|
16361 |
+
"grad_norm": 0.10536789149045944,
|
16362 |
+
"learning_rate": 1.2377641740464187e-07,
|
16363 |
+
"loss": 0.0536,
|
16364 |
+
"step": 2309
|
16365 |
+
},
|
16366 |
+
{
|
16367 |
+
"epoch": 0.6866059817945384,
|
16368 |
+
"grad_norm": 0.13396191596984863,
|
16369 |
+
"learning_rate": 1.1717750329595101e-07,
|
16370 |
+
"loss": 0.0716,
|
16371 |
+
"step": 2310
|
16372 |
+
},
|
16373 |
+
{
|
16374 |
+
"epoch": 0.6869032138212893,
|
16375 |
+
"grad_norm": 0.11562539637088776,
|
16376 |
+
"learning_rate": 1.1075924203385324e-07,
|
16377 |
+
"loss": 0.061,
|
16378 |
+
"step": 2311
|
16379 |
+
},
|
16380 |
+
{
|
16381 |
+
"epoch": 0.6872004458480401,
|
16382 |
+
"grad_norm": 0.08731340616941452,
|
16383 |
+
"learning_rate": 1.0452164522671837e-07,
|
16384 |
+
"loss": 0.0424,
|
16385 |
+
"step": 2312
|
16386 |
+
},
|
16387 |
+
{
|
16388 |
+
"epoch": 0.687497677874791,
|
16389 |
+
"grad_norm": 0.10618704557418823,
|
16390 |
+
"learning_rate": 9.846472415615537e-08,
|
16391 |
+
"loss": 0.0484,
|
16392 |
+
"step": 2313
|
16393 |
+
},
|
16394 |
+
{
|
16395 |
+
"epoch": 0.6877949099015419,
|
16396 |
+
"grad_norm": 0.10936351865530014,
|
16397 |
+
"learning_rate": 9.258848977700129e-08,
|
16398 |
+
"loss": 0.0648,
|
16399 |
+
"step": 2314
|
16400 |
+
},
|
16401 |
+
{
|
16402 |
+
"epoch": 0.6880921419282928,
|
16403 |
+
"grad_norm": 0.09159952402114868,
|
16404 |
+
"learning_rate": 8.689295271729902e-08,
|
16405 |
+
"loss": 0.0359,
|
16406 |
+
"step": 2315
|
16407 |
+
},
|
16408 |
+
{
|
16409 |
+
"epoch": 0.6883893739550436,
|
16410 |
+
"grad_norm": 0.09538505971431732,
|
16411 |
+
"learning_rate": 8.13781232782751e-08,
|
16412 |
+
"loss": 0.0423,
|
16413 |
+
"step": 2316
|
16414 |
+
},
|
16415 |
+
{
|
16416 |
+
"epoch": 0.6886866059817945,
|
16417 |
+
"grad_norm": 0.1170530915260315,
|
16418 |
+
"learning_rate": 7.604401143430639e-08,
|
16419 |
+
"loss": 0.0589,
|
16420 |
+
"step": 2317
|
16421 |
+
},
|
16422 |
+
{
|
16423 |
+
"epoch": 0.6889838380085455,
|
16424 |
+
"grad_norm": 0.10562342405319214,
|
16425 |
+
"learning_rate": 7.089062683292014e-08,
|
16426 |
+
"loss": 0.0508,
|
16427 |
+
"step": 2318
|
16428 |
+
},
|
16429 |
+
{
|
16430 |
+
"epoch": 0.6892810700352963,
|
16431 |
+
"grad_norm": 0.11546720564365387,
|
16432 |
+
"learning_rate": 6.591797879478279e-08,
|
16433 |
+
"loss": 0.0588,
|
16434 |
+
"step": 2319
|
16435 |
+
},
|
16436 |
+
{
|
16437 |
+
"epoch": 0.6895783020620472,
|
16438 |
+
"grad_norm": 0.14187178015708923,
|
16439 |
+
"learning_rate": 6.112607631364453e-08,
|
16440 |
+
"loss": 0.0705,
|
16441 |
+
"step": 2320
|
16442 |
+
},
|
16443 |
+
{
|
16444 |
+
"epoch": 0.6898755340887981,
|
16445 |
+
"grad_norm": 0.10787046700716019,
|
16446 |
+
"learning_rate": 5.65149280563948e-08,
|
16447 |
+
"loss": 0.0587,
|
16448 |
+
"step": 2321
|
16449 |
+
},
|
16450 |
+
{
|
16451 |
+
"epoch": 0.6901727661155489,
|
16452 |
+
"grad_norm": 0.08616691827774048,
|
16453 |
+
"learning_rate": 5.208454236296234e-08,
|
16454 |
+
"loss": 0.0341,
|
16455 |
+
"step": 2322
|
16456 |
+
},
|
16457 |
+
{
|
16458 |
+
"epoch": 0.6904699981422998,
|
16459 |
+
"grad_norm": 0.13538287580013275,
|
16460 |
+
"learning_rate": 4.783492724635963e-08,
|
16461 |
+
"loss": 0.0772,
|
16462 |
+
"step": 2323
|
16463 |
+
},
|
16464 |
+
{
|
16465 |
+
"epoch": 0.6907672301690507,
|
16466 |
+
"grad_norm": 0.11333715915679932,
|
16467 |
+
"learning_rate": 4.376609039262736e-08,
|
16468 |
+
"loss": 0.0569,
|
16469 |
+
"step": 2324
|
16470 |
+
},
|
16471 |
+
{
|
16472 |
+
"epoch": 0.6910644621958016,
|
16473 |
+
"grad_norm": 0.12235751003026962,
|
16474 |
+
"learning_rate": 3.9878039160878844e-08,
|
16475 |
+
"loss": 0.0556,
|
16476 |
+
"step": 2325
|
16477 |
+
},
|
16478 |
+
{
|
16479 |
+
"epoch": 0.6913616942225524,
|
16480 |
+
"grad_norm": 0.10557149350643158,
|
16481 |
+
"learning_rate": 3.617078058322232e-08,
|
16482 |
+
"loss": 0.046,
|
16483 |
+
"step": 2326
|
16484 |
+
},
|
16485 |
+
{
|
16486 |
+
"epoch": 0.6916589262493034,
|
16487 |
+
"grad_norm": 0.12478914111852646,
|
16488 |
+
"learning_rate": 3.264432136478313e-08,
|
16489 |
+
"loss": 0.0733,
|
16490 |
+
"step": 2327
|
16491 |
+
},
|
16492 |
+
{
|
16493 |
+
"epoch": 0.6919561582760543,
|
16494 |
+
"grad_norm": 0.08897604048252106,
|
16495 |
+
"learning_rate": 2.9298667883692622e-08,
|
16496 |
+
"loss": 0.0388,
|
16497 |
+
"step": 2328
|
16498 |
+
},
|
16499 |
+
{
|
16500 |
+
"epoch": 0.6922533903028051,
|
16501 |
+
"grad_norm": 0.09747358411550522,
|
16502 |
+
"learning_rate": 2.6133826191032663e-08,
|
16503 |
+
"loss": 0.0401,
|
16504 |
+
"step": 2329
|
16505 |
+
},
|
16506 |
+
{
|
16507 |
+
"epoch": 0.692550622329556,
|
16508 |
+
"grad_norm": 0.09661777317523956,
|
16509 |
+
"learning_rate": 2.3149802010913323e-08,
|
16510 |
+
"loss": 0.0403,
|
16511 |
+
"step": 2330
|
16512 |
+
},
|
16513 |
+
{
|
16514 |
+
"epoch": 0.6928478543563069,
|
16515 |
+
"grad_norm": 0.12822963297367096,
|
16516 |
+
"learning_rate": 2.034660074037298e-08,
|
16517 |
+
"loss": 0.0739,
|
16518 |
+
"step": 2331
|
16519 |
+
},
|
16520 |
+
{
|
16521 |
+
"epoch": 0.6931450863830578,
|
16522 |
+
"grad_norm": 0.12262982130050659,
|
16523 |
+
"learning_rate": 1.7724227449422705e-08,
|
16524 |
+
"loss": 0.0491,
|
16525 |
+
"step": 2332
|
16526 |
+
},
|
16527 |
+
{
|
16528 |
+
"epoch": 0.6934423184098086,
|
16529 |
+
"grad_norm": 0.11021570861339569,
|
16530 |
+
"learning_rate": 1.5282686881001875e-08,
|
16531 |
+
"loss": 0.0444,
|
16532 |
+
"step": 2333
|
16533 |
+
},
|
16534 |
+
{
|
16535 |
+
"epoch": 0.6937395504365595,
|
16536 |
+
"grad_norm": 0.10723091661930084,
|
16537 |
+
"learning_rate": 1.3021983451000364e-08,
|
16538 |
+
"loss": 0.0525,
|
16539 |
+
"step": 2334
|
16540 |
+
},
|
16541 |
+
{
|
16542 |
+
"epoch": 0.6940367824633105,
|
16543 |
+
"grad_norm": 0.12233065813779831,
|
16544 |
+
"learning_rate": 1.094212124824745e-08,
|
16545 |
+
"loss": 0.0541,
|
16546 |
+
"step": 2335
|
16547 |
+
},
|
16548 |
+
{
|
16549 |
+
"epoch": 0.6943340144900613,
|
16550 |
+
"grad_norm": 0.12585288286209106,
|
16551 |
+
"learning_rate": 9.043104034456295e-09,
|
16552 |
+
"loss": 0.0507,
|
16553 |
+
"step": 2336
|
16554 |
+
},
|
16555 |
+
{
|
16556 |
+
"epoch": 0.6946312465168122,
|
16557 |
+
"grad_norm": 0.08642668277025223,
|
16558 |
+
"learning_rate": 7.324935244301667e-09,
|
16559 |
+
"loss": 0.0327,
|
16560 |
+
"step": 2337
|
16561 |
+
},
|
16562 |
+
{
|
16563 |
+
"epoch": 0.6949284785435631,
|
16564 |
+
"grad_norm": 0.10163281857967377,
|
16565 |
+
"learning_rate": 5.78761798534222e-09,
|
16566 |
+
"loss": 0.0378,
|
16567 |
+
"step": 2338
|
16568 |
+
},
|
16569 |
+
{
|
16570 |
+
"epoch": 0.6952257105703139,
|
16571 |
+
"grad_norm": 0.1360352784395218,
|
16572 |
+
"learning_rate": 4.431155038031598e-09,
|
16573 |
+
"loss": 0.0524,
|
16574 |
+
"step": 2339
|
16575 |
+
},
|
16576 |
+
{
|
16577 |
+
"epoch": 0.6955229425970648,
|
16578 |
+
"grad_norm": 0.12937673926353455,
|
16579 |
+
"learning_rate": 3.255548855740642e-09,
|
16580 |
+
"loss": 0.0615,
|
16581 |
+
"step": 2340
|
16582 |
+
},
|
16583 |
+
{
|
16584 |
+
"epoch": 0.6958201746238157,
|
16585 |
+
"grad_norm": 0.11729301512241364,
|
16586 |
+
"learning_rate": 2.260801564735182e-09,
|
16587 |
+
"loss": 0.0516,
|
16588 |
+
"step": 2341
|
16589 |
+
},
|
16590 |
+
{
|
16591 |
+
"epoch": 0.6961174066505667,
|
16592 |
+
"grad_norm": 0.1295451819896698,
|
16593 |
+
"learning_rate": 1.4469149641538337e-09,
|
16594 |
+
"loss": 0.0682,
|
16595 |
+
"step": 2342
|
16596 |
+
},
|
16597 |
+
{
|
16598 |
+
"epoch": 0.6964146386773175,
|
16599 |
+
"grad_norm": 0.13985326886177063,
|
16600 |
+
"learning_rate": 8.138905260302032e-10,
|
16601 |
+
"loss": 0.0777,
|
16602 |
+
"step": 2343
|
16603 |
+
},
|
16604 |
+
{
|
16605 |
+
"epoch": 0.6967118707040684,
|
16606 |
+
"grad_norm": 0.10082249343395233,
|
16607 |
+
"learning_rate": 3.617293952817846e-10,
|
16608 |
+
"loss": 0.0439,
|
16609 |
+
"step": 2344
|
16610 |
+
},
|
16611 |
+
{
|
16612 |
+
"epoch": 0.6970091027308193,
|
16613 |
+
"grad_norm": 0.11734326928853989,
|
16614 |
+
"learning_rate": 9.043238970996016e-11,
|
16615 |
+
"loss": 0.0561,
|
16616 |
+
"step": 2345
|
16617 |
+
},
|
16618 |
+
{
|
16619 |
+
"epoch": 0.6973063347575701,
|
16620 |
+
"grad_norm": 0.10924455523490906,
|
16621 |
+
"learning_rate": 0.0,
|
16622 |
+
"loss": 0.0507,
|
16623 |
+
"step": 2346
|
16624 |
}
|
16625 |
],
|
16626 |
"logging_steps": 1,
|
|
|
16644 |
"should_evaluate": false,
|
16645 |
"should_log": false,
|
16646 |
"should_save": true,
|
16647 |
+
"should_training_stop": true
|
16648 |
},
|
16649 |
"attributes": {}
|
16650 |
}
|
16651 |
},
|
16652 |
+
"total_flos": 1.0845724172605194e+18,
|
16653 |
"train_batch_size": 4,
|
16654 |
"trial_name": null,
|
16655 |
"trial_params": null
|