Training in progress, step 24400
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2226478553
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:210a128933aae1983894b72b165841d8274c0d0117d155aea16bf708d2227228
|
3 |
size 2226478553
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1113252715
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e4015cb96f359b6403e2646b4472afdbae87d2268f8843bd956ec7adf9182921
|
3 |
size 1113252715
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17563
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:60e36185845bc18a656a31db5076cb45fb810dd43869bde58c051cae5a0e36a7
|
3 |
size 17563
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c8213aaa5f90a2b250062d1c60bbdb54ae5dfd572df54e04a4bade191c0a7c5
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a748bb88559c27203523064ee7efe8a457e4bd76047f9c37d15baf1de97eaec9
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -14526,11 +14526,131 @@
|
|
14526 |
"learning_rate": 9.353378320662852e-07,
|
14527 |
"loss": 1.0992,
|
14528 |
"step": 24200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14529 |
}
|
14530 |
],
|
14531 |
"max_steps": 24414,
|
14532 |
"num_train_epochs": 2,
|
14533 |
-
"total_flos": 3.
|
14534 |
"trial_name": null,
|
14535 |
"trial_params": null
|
14536 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.9988428888866134,
|
5 |
+
"global_step": 24400,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
14526 |
"learning_rate": 9.353378320662852e-07,
|
14527 |
"loss": 1.0992,
|
14528 |
"step": 24200
|
14529 |
+
},
|
14530 |
+
{
|
14531 |
+
"epoch": 1.98,
|
14532 |
+
"learning_rate": 8.968992910224651e-07,
|
14533 |
+
"loss": 1.0964,
|
14534 |
+
"step": 24210
|
14535 |
+
},
|
14536 |
+
{
|
14537 |
+
"epoch": 1.98,
|
14538 |
+
"learning_rate": 8.541898009737764e-07,
|
14539 |
+
"loss": 1.0921,
|
14540 |
+
"step": 24220
|
14541 |
+
},
|
14542 |
+
{
|
14543 |
+
"epoch": 1.98,
|
14544 |
+
"learning_rate": 8.114803109250876e-07,
|
14545 |
+
"loss": 1.0858,
|
14546 |
+
"step": 24230
|
14547 |
+
},
|
14548 |
+
{
|
14549 |
+
"epoch": 1.99,
|
14550 |
+
"learning_rate": 7.687708208763988e-07,
|
14551 |
+
"loss": 1.0917,
|
14552 |
+
"step": 24240
|
14553 |
+
},
|
14554 |
+
{
|
14555 |
+
"epoch": 1.99,
|
14556 |
+
"learning_rate": 7.260613308277099e-07,
|
14557 |
+
"loss": 1.0885,
|
14558 |
+
"step": 24250
|
14559 |
+
},
|
14560 |
+
{
|
14561 |
+
"epoch": 1.99,
|
14562 |
+
"learning_rate": 6.833518407790211e-07,
|
14563 |
+
"loss": 1.0898,
|
14564 |
+
"step": 24260
|
14565 |
+
},
|
14566 |
+
{
|
14567 |
+
"epoch": 1.99,
|
14568 |
+
"learning_rate": 6.406423507303323e-07,
|
14569 |
+
"loss": 1.0852,
|
14570 |
+
"step": 24270
|
14571 |
+
},
|
14572 |
+
{
|
14573 |
+
"epoch": 1.99,
|
14574 |
+
"learning_rate": 5.979328606816435e-07,
|
14575 |
+
"loss": 1.0954,
|
14576 |
+
"step": 24280
|
14577 |
+
},
|
14578 |
+
{
|
14579 |
+
"epoch": 1.99,
|
14580 |
+
"learning_rate": 5.552233706329546e-07,
|
14581 |
+
"loss": 1.1051,
|
14582 |
+
"step": 24290
|
14583 |
+
},
|
14584 |
+
{
|
14585 |
+
"epoch": 1.99,
|
14586 |
+
"learning_rate": 5.125138805842659e-07,
|
14587 |
+
"loss": 1.0875,
|
14588 |
+
"step": 24300
|
14589 |
+
},
|
14590 |
+
{
|
14591 |
+
"epoch": 1.99,
|
14592 |
+
"learning_rate": 4.6980439053557705e-07,
|
14593 |
+
"loss": 1.0967,
|
14594 |
+
"step": 24310
|
14595 |
+
},
|
14596 |
+
{
|
14597 |
+
"epoch": 1.99,
|
14598 |
+
"learning_rate": 4.270949004868882e-07,
|
14599 |
+
"loss": 1.099,
|
14600 |
+
"step": 24320
|
14601 |
+
},
|
14602 |
+
{
|
14603 |
+
"epoch": 1.99,
|
14604 |
+
"learning_rate": 3.843854104381994e-07,
|
14605 |
+
"loss": 1.0952,
|
14606 |
+
"step": 24330
|
14607 |
+
},
|
14608 |
+
{
|
14609 |
+
"epoch": 1.99,
|
14610 |
+
"learning_rate": 3.4167592038951053e-07,
|
14611 |
+
"loss": 1.0911,
|
14612 |
+
"step": 24340
|
14613 |
+
},
|
14614 |
+
{
|
14615 |
+
"epoch": 1.99,
|
14616 |
+
"learning_rate": 2.9896643034082176e-07,
|
14617 |
+
"loss": 1.0909,
|
14618 |
+
"step": 24350
|
14619 |
+
},
|
14620 |
+
{
|
14621 |
+
"epoch": 2.0,
|
14622 |
+
"learning_rate": 2.5625694029213294e-07,
|
14623 |
+
"loss": 1.0973,
|
14624 |
+
"step": 24360
|
14625 |
+
},
|
14626 |
+
{
|
14627 |
+
"epoch": 2.0,
|
14628 |
+
"learning_rate": 2.135474502434441e-07,
|
14629 |
+
"loss": 1.0926,
|
14630 |
+
"step": 24370
|
14631 |
+
},
|
14632 |
+
{
|
14633 |
+
"epoch": 2.0,
|
14634 |
+
"learning_rate": 1.7083796019475527e-07,
|
14635 |
+
"loss": 1.0924,
|
14636 |
+
"step": 24380
|
14637 |
+
},
|
14638 |
+
{
|
14639 |
+
"epoch": 2.0,
|
14640 |
+
"learning_rate": 1.2812847014606647e-07,
|
14641 |
+
"loss": 1.0848,
|
14642 |
+
"step": 24390
|
14643 |
+
},
|
14644 |
+
{
|
14645 |
+
"epoch": 2.0,
|
14646 |
+
"learning_rate": 8.541898009737763e-08,
|
14647 |
+
"loss": 1.1018,
|
14648 |
+
"step": 24400
|
14649 |
}
|
14650 |
],
|
14651 |
"max_steps": 24414,
|
14652 |
"num_train_epochs": 2,
|
14653 |
+
"total_flos": 3.296592748745349e+18,
|
14654 |
"trial_name": null,
|
14655 |
"trial_params": null
|
14656 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1113252715
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e4015cb96f359b6403e2646b4472afdbae87d2268f8843bd956ec7adf9182921
|
3 |
size 1113252715
|