Training in progress, step 21000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- last-checkpoint/training_args.bin +1 -1
- pytorch_model.bin +1 -1
- training_args.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2226478553
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:39a351e6af26803f9eeacbdc3599fe7ba53df6f5641098b8daa5f7b9e0aa7b85
|
3 |
size 2226478553
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1113252715
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:59dcfd93fbced501dd096334a41c07a4b2e21743fa787c29c4db14612e8f084f
|
3 |
size 1113252715
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17563
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2427d3e67c681daaaba70098244ec32f3b70c7781145897a05d923a006eff520
|
3 |
size 17563
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88c633fcf373eeb32eac8eef8541ef4cf7ce7b0edbad3ac306cf7779b78afda6
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b337a80290075bf247d1150cdeaf1a5b708e8ccc1775639aea6fdc42b499e1c
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -12486,11 +12486,131 @@
|
|
12486 |
"learning_rate": 1.5456564448620482e-05,
|
12487 |
"loss": 1.1033,
|
12488 |
"step": 20800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12489 |
}
|
12490 |
],
|
12491 |
"max_steps": 24414,
|
12492 |
"num_train_epochs": 2,
|
12493 |
-
"total_flos": 2.
|
12494 |
"trial_name": null,
|
12495 |
"trial_params": null
|
12496 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.7203170279652253,
|
5 |
+
"global_step": 21000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
12486 |
"learning_rate": 1.5456564448620482e-05,
|
12487 |
"loss": 1.1033,
|
12488 |
"step": 20800
|
12489 |
+
},
|
12490 |
+
{
|
12491 |
+
"epoch": 1.7,
|
12492 |
+
"learning_rate": 1.5413854958571797e-05,
|
12493 |
+
"loss": 1.1021,
|
12494 |
+
"step": 20810
|
12495 |
+
},
|
12496 |
+
{
|
12497 |
+
"epoch": 1.71,
|
12498 |
+
"learning_rate": 1.5371145468523108e-05,
|
12499 |
+
"loss": 1.0975,
|
12500 |
+
"step": 20820
|
12501 |
+
},
|
12502 |
+
{
|
12503 |
+
"epoch": 1.71,
|
12504 |
+
"learning_rate": 1.532843597847442e-05,
|
12505 |
+
"loss": 1.0981,
|
12506 |
+
"step": 20830
|
12507 |
+
},
|
12508 |
+
{
|
12509 |
+
"epoch": 1.71,
|
12510 |
+
"learning_rate": 1.528572648842573e-05,
|
12511 |
+
"loss": 1.1019,
|
12512 |
+
"step": 20840
|
12513 |
+
},
|
12514 |
+
{
|
12515 |
+
"epoch": 1.71,
|
12516 |
+
"learning_rate": 1.524301699837704e-05,
|
12517 |
+
"loss": 1.1089,
|
12518 |
+
"step": 20850
|
12519 |
+
},
|
12520 |
+
{
|
12521 |
+
"epoch": 1.71,
|
12522 |
+
"learning_rate": 1.5200307508328351e-05,
|
12523 |
+
"loss": 1.1111,
|
12524 |
+
"step": 20860
|
12525 |
+
},
|
12526 |
+
{
|
12527 |
+
"epoch": 1.71,
|
12528 |
+
"learning_rate": 1.5157598018279662e-05,
|
12529 |
+
"loss": 1.1091,
|
12530 |
+
"step": 20870
|
12531 |
+
},
|
12532 |
+
{
|
12533 |
+
"epoch": 1.71,
|
12534 |
+
"learning_rate": 1.5114888528230975e-05,
|
12535 |
+
"loss": 1.1001,
|
12536 |
+
"step": 20880
|
12537 |
+
},
|
12538 |
+
{
|
12539 |
+
"epoch": 1.71,
|
12540 |
+
"learning_rate": 1.5072179038182286e-05,
|
12541 |
+
"loss": 1.0987,
|
12542 |
+
"step": 20890
|
12543 |
+
},
|
12544 |
+
{
|
12545 |
+
"epoch": 1.71,
|
12546 |
+
"learning_rate": 1.5029469548133595e-05,
|
12547 |
+
"loss": 1.107,
|
12548 |
+
"step": 20900
|
12549 |
+
},
|
12550 |
+
{
|
12551 |
+
"epoch": 1.71,
|
12552 |
+
"learning_rate": 1.4986760058084906e-05,
|
12553 |
+
"loss": 1.0973,
|
12554 |
+
"step": 20910
|
12555 |
+
},
|
12556 |
+
{
|
12557 |
+
"epoch": 1.71,
|
12558 |
+
"learning_rate": 1.494405056803622e-05,
|
12559 |
+
"loss": 1.0978,
|
12560 |
+
"step": 20920
|
12561 |
+
},
|
12562 |
+
{
|
12563 |
+
"epoch": 1.71,
|
12564 |
+
"learning_rate": 1.490134107798753e-05,
|
12565 |
+
"loss": 1.0993,
|
12566 |
+
"step": 20930
|
12567 |
+
},
|
12568 |
+
{
|
12569 |
+
"epoch": 1.72,
|
12570 |
+
"learning_rate": 1.485863158793884e-05,
|
12571 |
+
"loss": 1.093,
|
12572 |
+
"step": 20940
|
12573 |
+
},
|
12574 |
+
{
|
12575 |
+
"epoch": 1.72,
|
12576 |
+
"learning_rate": 1.4815922097890153e-05,
|
12577 |
+
"loss": 1.1091,
|
12578 |
+
"step": 20950
|
12579 |
+
},
|
12580 |
+
{
|
12581 |
+
"epoch": 1.72,
|
12582 |
+
"learning_rate": 1.4773212607841464e-05,
|
12583 |
+
"loss": 1.1066,
|
12584 |
+
"step": 20960
|
12585 |
+
},
|
12586 |
+
{
|
12587 |
+
"epoch": 1.72,
|
12588 |
+
"learning_rate": 1.4730503117792775e-05,
|
12589 |
+
"loss": 1.1056,
|
12590 |
+
"step": 20970
|
12591 |
+
},
|
12592 |
+
{
|
12593 |
+
"epoch": 1.72,
|
12594 |
+
"learning_rate": 1.4687793627744084e-05,
|
12595 |
+
"loss": 1.101,
|
12596 |
+
"step": 20980
|
12597 |
+
},
|
12598 |
+
{
|
12599 |
+
"epoch": 1.72,
|
12600 |
+
"learning_rate": 1.4645084137695398e-05,
|
12601 |
+
"loss": 1.1034,
|
12602 |
+
"step": 20990
|
12603 |
+
},
|
12604 |
+
{
|
12605 |
+
"epoch": 1.72,
|
12606 |
+
"learning_rate": 1.4602374647646707e-05,
|
12607 |
+
"loss": 1.0953,
|
12608 |
+
"step": 21000
|
12609 |
}
|
12610 |
],
|
12611 |
"max_steps": 24414,
|
12612 |
"num_train_epochs": 2,
|
12613 |
+
"total_flos": 2.837232089174661e+18,
|
12614 |
"trial_name": null,
|
12615 |
"trial_params": null
|
12616 |
}
|
last-checkpoint/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3439
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6610c57ac1474bf5097bd000e69f982c66ca3a4b3ce31c1509196f6a47bc4144
|
3 |
size 3439
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1113252715
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:59dcfd93fbced501dd096334a41c07a4b2e21743fa787c29c4db14612e8f084f
|
3 |
size 1113252715
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3439
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6610c57ac1474bf5097bd000e69f982c66ca3a4b3ce31c1509196f6a47bc4144
|
3 |
size 3439
|