Training in progress, step 24200
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2226478553
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:582d50a5a8f019141e250e840870fee5833dcefaefb338a10e0a481e3b50a51c
|
3 |
size 2226478553
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1113252715
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:505bf07c8a223940e75e097e032e480f06d37db6918c15d3e73cac6183432a98
|
3 |
size 1113252715
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17563
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88a6c9c9d9a0e90d25aefe3b1eec75c6a8d9602de8536842eb2cffe609f70cab
|
3 |
size 17563
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:23bf027f9216fbe8b17c91852878854c7eb1a9b37ceb42d5492b73cb3332b194
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4216a163f98df4253a3d2dda8c01570c7d3175dacb0f7d8217e0ca3a27141a6
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -14406,11 +14406,131 @@
|
|
14406 |
"learning_rate": 1.7895276330400616e-06,
|
14407 |
"loss": 1.08,
|
14408 |
"step": 24000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14409 |
}
|
14410 |
],
|
14411 |
"max_steps": 24414,
|
14412 |
"num_train_epochs": 2,
|
14413 |
-
"total_flos": 3.
|
14414 |
"trial_name": null,
|
14415 |
"trial_params": null
|
14416 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.9824590147147672,
|
5 |
+
"global_step": 24200,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
14406 |
"learning_rate": 1.7895276330400616e-06,
|
14407 |
"loss": 1.08,
|
14408 |
"step": 24000
|
14409 |
+
},
|
14410 |
+
{
|
14411 |
+
"epoch": 1.97,
|
14412 |
+
"learning_rate": 1.7468181429913726e-06,
|
14413 |
+
"loss": 1.0849,
|
14414 |
+
"step": 24010
|
14415 |
+
},
|
14416 |
+
{
|
14417 |
+
"epoch": 1.97,
|
14418 |
+
"learning_rate": 1.704108652942684e-06,
|
14419 |
+
"loss": 1.0995,
|
14420 |
+
"step": 24020
|
14421 |
+
},
|
14422 |
+
{
|
14423 |
+
"epoch": 1.97,
|
14424 |
+
"learning_rate": 1.661399162893995e-06,
|
14425 |
+
"loss": 1.0837,
|
14426 |
+
"step": 24030
|
14427 |
+
},
|
14428 |
+
{
|
14429 |
+
"epoch": 1.97,
|
14430 |
+
"learning_rate": 1.6186896728453061e-06,
|
14431 |
+
"loss": 1.0905,
|
14432 |
+
"step": 24040
|
14433 |
+
},
|
14434 |
+
{
|
14435 |
+
"epoch": 1.97,
|
14436 |
+
"learning_rate": 1.5759801827966177e-06,
|
14437 |
+
"loss": 1.0942,
|
14438 |
+
"step": 24050
|
14439 |
+
},
|
14440 |
+
{
|
14441 |
+
"epoch": 1.97,
|
14442 |
+
"learning_rate": 1.5332706927479286e-06,
|
14443 |
+
"loss": 1.0924,
|
14444 |
+
"step": 24060
|
14445 |
+
},
|
14446 |
+
{
|
14447 |
+
"epoch": 1.97,
|
14448 |
+
"learning_rate": 1.49056120269924e-06,
|
14449 |
+
"loss": 1.0925,
|
14450 |
+
"step": 24070
|
14451 |
+
},
|
14452 |
+
{
|
14453 |
+
"epoch": 1.97,
|
14454 |
+
"learning_rate": 1.447851712650551e-06,
|
14455 |
+
"loss": 1.0886,
|
14456 |
+
"step": 24080
|
14457 |
+
},
|
14458 |
+
{
|
14459 |
+
"epoch": 1.97,
|
14460 |
+
"learning_rate": 1.4051422226018622e-06,
|
14461 |
+
"loss": 1.1016,
|
14462 |
+
"step": 24090
|
14463 |
+
},
|
14464 |
+
{
|
14465 |
+
"epoch": 1.97,
|
14466 |
+
"learning_rate": 1.3624327325531735e-06,
|
14467 |
+
"loss": 1.0909,
|
14468 |
+
"step": 24100
|
14469 |
+
},
|
14470 |
+
{
|
14471 |
+
"epoch": 1.98,
|
14472 |
+
"learning_rate": 1.3197232425044844e-06,
|
14473 |
+
"loss": 1.1014,
|
14474 |
+
"step": 24110
|
14475 |
+
},
|
14476 |
+
{
|
14477 |
+
"epoch": 1.98,
|
14478 |
+
"learning_rate": 1.2770137524557958e-06,
|
14479 |
+
"loss": 1.0913,
|
14480 |
+
"step": 24120
|
14481 |
+
},
|
14482 |
+
{
|
14483 |
+
"epoch": 1.98,
|
14484 |
+
"learning_rate": 1.234304262407107e-06,
|
14485 |
+
"loss": 1.0932,
|
14486 |
+
"step": 24130
|
14487 |
+
},
|
14488 |
+
{
|
14489 |
+
"epoch": 1.98,
|
14490 |
+
"learning_rate": 1.191594772358418e-06,
|
14491 |
+
"loss": 1.1051,
|
14492 |
+
"step": 24140
|
14493 |
+
},
|
14494 |
+
{
|
14495 |
+
"epoch": 1.98,
|
14496 |
+
"learning_rate": 1.1488852823097293e-06,
|
14497 |
+
"loss": 1.0882,
|
14498 |
+
"step": 24150
|
14499 |
+
},
|
14500 |
+
{
|
14501 |
+
"epoch": 1.98,
|
14502 |
+
"learning_rate": 1.1061757922610405e-06,
|
14503 |
+
"loss": 1.0917,
|
14504 |
+
"step": 24160
|
14505 |
+
},
|
14506 |
+
{
|
14507 |
+
"epoch": 1.98,
|
14508 |
+
"learning_rate": 1.0634663022123516e-06,
|
14509 |
+
"loss": 1.0956,
|
14510 |
+
"step": 24170
|
14511 |
+
},
|
14512 |
+
{
|
14513 |
+
"epoch": 1.98,
|
14514 |
+
"learning_rate": 1.020756812163663e-06,
|
14515 |
+
"loss": 1.0957,
|
14516 |
+
"step": 24180
|
14517 |
+
},
|
14518 |
+
{
|
14519 |
+
"epoch": 1.98,
|
14520 |
+
"learning_rate": 9.780473221149738e-07,
|
14521 |
+
"loss": 1.0886,
|
14522 |
+
"step": 24190
|
14523 |
+
},
|
14524 |
+
{
|
14525 |
+
"epoch": 1.98,
|
14526 |
+
"learning_rate": 9.353378320662852e-07,
|
14527 |
+
"loss": 1.0992,
|
14528 |
+
"step": 24200
|
14529 |
}
|
14530 |
],
|
14531 |
"max_steps": 24414,
|
14532 |
"num_train_epochs": 2,
|
14533 |
+
"total_flos": 3.269571533476485e+18,
|
14534 |
"trial_name": null,
|
14535 |
"trial_params": null
|
14536 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1113252715
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:505bf07c8a223940e75e097e032e480f06d37db6918c15d3e73cac6183432a98
|
3 |
size 1113252715
|