Upload checkpoint 3300
Browse files- model-00001-of-00002.safetensors +1 -1
- model-00002-of-00002.safetensors +1 -1
- optimizer.pt +1 -1
- scheduler.pt +1 -1
- trainer_state.json +122 -3
model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4957560304
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:447ea1baf5ce4d3011df6f9cd40e05e076cbb62e1b5b4bd59b2b44e1b3600425
|
3 |
size 4957560304
|
model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3989163248
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:649859d9b29ef3ee4a62106e3696336aee4f9cf11919324ac3cb0cd58eca6bd1
|
3 |
size 3989163248
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17893874312
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:34eabe2409758e1c785b61e49de22c99478d71b4e287056e9ac7b8e66f22d2e7
|
3 |
size 17893874312
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e3e5d81ec2d6c897aa97cd1ed656c7729f595b7fd89a2af10c54571be04f6c2
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -7588,6 +7588,125 @@
|
|
7588 |
"learning_rate": 6.597910240324967e-07,
|
7589 |
"loss": 0.6038,
|
7590 |
"step": 3249
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7591 |
}
|
7592 |
],
|
7593 |
"logging_steps": 3,
|
@@ -7607,7 +7726,7 @@
|
|
7607 |
"attributes": {}
|
7608 |
}
|
7609 |
},
|
7610 |
-
"total_flos": 2.
|
7611 |
"train_batch_size": 8,
|
7612 |
"trial_name": null,
|
7613 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9786476868327402,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 3300,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
7588 |
"learning_rate": 6.597910240324967e-07,
|
7589 |
"loss": 0.6038,
|
7590 |
"step": 3249
|
7591 |
+
},
|
7592 |
+
{
|
7593 |
+
"epoch": 0.9644128113879004,
|
7594 |
+
"grad_norm": 0.26171875,
|
7595 |
+
"learning_rate": 6.280319158544989e-07,
|
7596 |
+
"loss": 0.6301,
|
7597 |
+
"step": 3252
|
7598 |
+
},
|
7599 |
+
{
|
7600 |
+
"epoch": 0.9653024911032029,
|
7601 |
+
"grad_norm": 0.265625,
|
7602 |
+
"learning_rate": 5.970537346853156e-07,
|
7603 |
+
"loss": 0.6007,
|
7604 |
+
"step": 3255
|
7605 |
+
},
|
7606 |
+
{
|
7607 |
+
"epoch": 0.9661921708185054,
|
7608 |
+
"grad_norm": 0.2470703125,
|
7609 |
+
"learning_rate": 5.668567239708323e-07,
|
7610 |
+
"loss": 0.5789,
|
7611 |
+
"step": 3258
|
7612 |
+
},
|
7613 |
+
{
|
7614 |
+
"epoch": 0.9670818505338078,
|
7615 |
+
"grad_norm": 0.265625,
|
7616 |
+
"learning_rate": 5.374411210180341e-07,
|
7617 |
+
"loss": 0.5964,
|
7618 |
+
"step": 3261
|
7619 |
+
},
|
7620 |
+
{
|
7621 |
+
"epoch": 0.9679715302491103,
|
7622 |
+
"grad_norm": 0.25390625,
|
7623 |
+
"learning_rate": 5.088071569931185e-07,
|
7624 |
+
"loss": 0.5953,
|
7625 |
+
"step": 3264
|
7626 |
+
},
|
7627 |
+
{
|
7628 |
+
"epoch": 0.9688612099644128,
|
7629 |
+
"grad_norm": 0.2431640625,
|
7630 |
+
"learning_rate": 4.809550569196519e-07,
|
7631 |
+
"loss": 0.5877,
|
7632 |
+
"step": 3267
|
7633 |
+
},
|
7634 |
+
{
|
7635 |
+
"epoch": 0.9697508896797153,
|
7636 |
+
"grad_norm": 0.263671875,
|
7637 |
+
"learning_rate": 4.5388503967683793e-07,
|
7638 |
+
"loss": 0.5923,
|
7639 |
+
"step": 3270
|
7640 |
+
},
|
7641 |
+
{
|
7642 |
+
"epoch": 0.9706405693950177,
|
7643 |
+
"grad_norm": 0.255859375,
|
7644 |
+
"learning_rate": 4.275973179977855e-07,
|
7645 |
+
"loss": 0.5958,
|
7646 |
+
"step": 3273
|
7647 |
+
},
|
7648 |
+
{
|
7649 |
+
"epoch": 0.9715302491103203,
|
7650 |
+
"grad_norm": 0.271484375,
|
7651 |
+
"learning_rate": 4.0209209846783224e-07,
|
7652 |
+
"loss": 0.5977,
|
7653 |
+
"step": 3276
|
7654 |
+
},
|
7655 |
+
{
|
7656 |
+
"epoch": 0.9724199288256228,
|
7657 |
+
"grad_norm": 0.265625,
|
7658 |
+
"learning_rate": 3.773695815229239e-07,
|
7659 |
+
"loss": 0.592,
|
7660 |
+
"step": 3279
|
7661 |
+
},
|
7662 |
+
{
|
7663 |
+
"epoch": 0.9733096085409253,
|
7664 |
+
"grad_norm": 0.248046875,
|
7665 |
+
"learning_rate": 3.534299614480596e-07,
|
7666 |
+
"loss": 0.5702,
|
7667 |
+
"step": 3282
|
7668 |
+
},
|
7669 |
+
{
|
7670 |
+
"epoch": 0.9741992882562278,
|
7671 |
+
"grad_norm": 0.255859375,
|
7672 |
+
"learning_rate": 3.3027342637572676e-07,
|
7673 |
+
"loss": 0.5893,
|
7674 |
+
"step": 3285
|
7675 |
+
},
|
7676 |
+
{
|
7677 |
+
"epoch": 0.9750889679715302,
|
7678 |
+
"grad_norm": 0.251953125,
|
7679 |
+
"learning_rate": 3.079001582844354e-07,
|
7680 |
+
"loss": 0.6177,
|
7681 |
+
"step": 3288
|
7682 |
+
},
|
7683 |
+
{
|
7684 |
+
"epoch": 0.9759786476868327,
|
7685 |
+
"grad_norm": 0.341796875,
|
7686 |
+
"learning_rate": 2.8631033299730825e-07,
|
7687 |
+
"loss": 0.6178,
|
7688 |
+
"step": 3291
|
7689 |
+
},
|
7690 |
+
{
|
7691 |
+
"epoch": 0.9768683274021353,
|
7692 |
+
"grad_norm": 0.255859375,
|
7693 |
+
"learning_rate": 2.655041201806707e-07,
|
7694 |
+
"loss": 0.5924,
|
7695 |
+
"step": 3294
|
7696 |
+
},
|
7697 |
+
{
|
7698 |
+
"epoch": 0.9777580071174378,
|
7699 |
+
"grad_norm": 0.259765625,
|
7700 |
+
"learning_rate": 2.454816833427631e-07,
|
7701 |
+
"loss": 0.6021,
|
7702 |
+
"step": 3297
|
7703 |
+
},
|
7704 |
+
{
|
7705 |
+
"epoch": 0.9786476868327402,
|
7706 |
+
"grad_norm": 0.2578125,
|
7707 |
+
"learning_rate": 2.2624317983239718e-07,
|
7708 |
+
"loss": 0.6131,
|
7709 |
+
"step": 3300
|
7710 |
}
|
7711 |
],
|
7712 |
"logging_steps": 3,
|
|
|
7726 |
"attributes": {}
|
7727 |
}
|
7728 |
},
|
7729 |
+
"total_flos": 2.1603485806523056e+19,
|
7730 |
"train_batch_size": 8,
|
7731 |
"trial_name": null,
|
7732 |
"trial_params": null
|