Training in progress, step 290000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 202194449
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f9d4a02e518c6040f400dc71e199bb375bfe7d386fc2532b4f6b29080e6ba85
|
3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce2bfde6677c6364de7dd881d110ad8c08a2d67e11a6db2ce02cc6aaf9d80410
|
3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab923fbe4c25572b44a728c580cd0d9f3b121bade2ff37e411ad54b9de809e39
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab923fbe4c25572b44a728c580cd0d9f3b121bade2ff37e411ad54b9de809e39
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab923fbe4c25572b44a728c580cd0d9f3b121bade2ff37e411ad54b9de809e39
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab923fbe4c25572b44a728c580cd0d9f3b121bade2ff37e411ad54b9de809e39
|
3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab923fbe4c25572b44a728c580cd0d9f3b121bade2ff37e411ad54b9de809e39
|
3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab923fbe4c25572b44a728c580cd0d9f3b121bade2ff37e411ad54b9de809e39
|
3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab923fbe4c25572b44a728c580cd0d9f3b121bade2ff37e411ad54b9de809e39
|
3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab923fbe4c25572b44a728c580cd0d9f3b121bade2ff37e411ad54b9de809e39
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:39bc196d8aea9810b9698ff8cd04e2aeef8774f706fbd61ae0f0055bbacd0eaf
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 6.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -5606,11 +5606,211 @@
|
|
5606 |
"eval_samples_per_second": 1557.23,
|
5607 |
"eval_steps_per_second": 24.797,
|
5608 |
"step": 280000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5609 |
}
|
5610 |
],
|
5611 |
"max_steps": 500000,
|
5612 |
"num_train_epochs": 12,
|
5613 |
-
"total_flos":
|
5614 |
"trial_name": null,
|
5615 |
"trial_params": null
|
5616 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 6.756914187189823,
|
5 |
+
"global_step": 290000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
5606 |
"eval_samples_per_second": 1557.23,
|
5607 |
"eval_steps_per_second": 24.797,
|
5608 |
"step": 280000
|
5609 |
+
},
|
5610 |
+
{
|
5611 |
+
"epoch": 6.54,
|
5612 |
+
"learning_rate": 0.00013777852207079235,
|
5613 |
+
"loss": 0.2691,
|
5614 |
+
"step": 280500
|
5615 |
+
},
|
5616 |
+
{
|
5617 |
+
"epoch": 6.55,
|
5618 |
+
"learning_rate": 0.00013730250483349825,
|
5619 |
+
"loss": 0.2686,
|
5620 |
+
"step": 281000
|
5621 |
+
},
|
5622 |
+
{
|
5623 |
+
"epoch": 6.55,
|
5624 |
+
"eval_loss": 0.2525520324707031,
|
5625 |
+
"eval_runtime": 1.4509,
|
5626 |
+
"eval_samples_per_second": 1514.902,
|
5627 |
+
"eval_steps_per_second": 24.123,
|
5628 |
+
"step": 281000
|
5629 |
+
},
|
5630 |
+
{
|
5631 |
+
"epoch": 6.56,
|
5632 |
+
"learning_rate": 0.00013682668113317584,
|
5633 |
+
"loss": 0.2688,
|
5634 |
+
"step": 281500
|
5635 |
+
},
|
5636 |
+
{
|
5637 |
+
"epoch": 6.57,
|
5638 |
+
"learning_rate": 0.00013635105617335703,
|
5639 |
+
"loss": 0.2686,
|
5640 |
+
"step": 282000
|
5641 |
+
},
|
5642 |
+
{
|
5643 |
+
"epoch": 6.57,
|
5644 |
+
"eval_loss": 0.2530405819416046,
|
5645 |
+
"eval_runtime": 1.446,
|
5646 |
+
"eval_samples_per_second": 1520.041,
|
5647 |
+
"eval_steps_per_second": 24.204,
|
5648 |
+
"step": 282000
|
5649 |
+
},
|
5650 |
+
{
|
5651 |
+
"epoch": 6.58,
|
5652 |
+
"learning_rate": 0.00013587563515539996,
|
5653 |
+
"loss": 0.2686,
|
5654 |
+
"step": 282500
|
5655 |
+
},
|
5656 |
+
{
|
5657 |
+
"epoch": 6.59,
|
5658 |
+
"learning_rate": 0.00013540042327843296,
|
5659 |
+
"loss": 0.2681,
|
5660 |
+
"step": 283000
|
5661 |
+
},
|
5662 |
+
{
|
5663 |
+
"epoch": 6.59,
|
5664 |
+
"eval_loss": 0.252304345369339,
|
5665 |
+
"eval_runtime": 1.4523,
|
5666 |
+
"eval_samples_per_second": 1513.433,
|
5667 |
+
"eval_steps_per_second": 24.099,
|
5668 |
+
"step": 283000
|
5669 |
+
},
|
5670 |
+
{
|
5671 |
+
"epoch": 6.61,
|
5672 |
+
"learning_rate": 0.00013492542573929678,
|
5673 |
+
"loss": 0.2686,
|
5674 |
+
"step": 283500
|
5675 |
+
},
|
5676 |
+
{
|
5677 |
+
"epoch": 6.62,
|
5678 |
+
"learning_rate": 0.00013445064773248846,
|
5679 |
+
"loss": 0.2683,
|
5680 |
+
"step": 284000
|
5681 |
+
},
|
5682 |
+
{
|
5683 |
+
"epoch": 6.62,
|
5684 |
+
"eval_loss": 0.2517680823802948,
|
5685 |
+
"eval_runtime": 1.4564,
|
5686 |
+
"eval_samples_per_second": 1509.219,
|
5687 |
+
"eval_steps_per_second": 24.032,
|
5688 |
+
"step": 284000
|
5689 |
+
},
|
5690 |
+
{
|
5691 |
+
"epoch": 6.63,
|
5692 |
+
"learning_rate": 0.00013397609445010432,
|
5693 |
+
"loss": 0.2684,
|
5694 |
+
"step": 284500
|
5695 |
+
},
|
5696 |
+
{
|
5697 |
+
"epoch": 6.64,
|
5698 |
+
"learning_rate": 0.00013350177108178288,
|
5699 |
+
"loss": 0.268,
|
5700 |
+
"step": 285000
|
5701 |
+
},
|
5702 |
+
{
|
5703 |
+
"epoch": 6.64,
|
5704 |
+
"eval_loss": 0.25080546736717224,
|
5705 |
+
"eval_runtime": 1.4547,
|
5706 |
+
"eval_samples_per_second": 1510.937,
|
5707 |
+
"eval_steps_per_second": 24.06,
|
5708 |
+
"step": 285000
|
5709 |
+
},
|
5710 |
+
{
|
5711 |
+
"epoch": 6.65,
|
5712 |
+
"learning_rate": 0.00013302768281464863,
|
5713 |
+
"loss": 0.2682,
|
5714 |
+
"step": 285500
|
5715 |
+
},
|
5716 |
+
{
|
5717 |
+
"epoch": 6.66,
|
5718 |
+
"learning_rate": 0.0001325538348332548,
|
5719 |
+
"loss": 0.2683,
|
5720 |
+
"step": 286000
|
5721 |
+
},
|
5722 |
+
{
|
5723 |
+
"epoch": 6.66,
|
5724 |
+
"eval_loss": 0.25022614002227783,
|
5725 |
+
"eval_runtime": 1.4198,
|
5726 |
+
"eval_samples_per_second": 1548.074,
|
5727 |
+
"eval_steps_per_second": 24.651,
|
5728 |
+
"step": 286000
|
5729 |
+
},
|
5730 |
+
{
|
5731 |
+
"epoch": 6.68,
|
5732 |
+
"learning_rate": 0.00013208023231952706,
|
5733 |
+
"loss": 0.2684,
|
5734 |
+
"step": 286500
|
5735 |
+
},
|
5736 |
+
{
|
5737 |
+
"epoch": 6.69,
|
5738 |
+
"learning_rate": 0.0001316068804527066,
|
5739 |
+
"loss": 0.2681,
|
5740 |
+
"step": 287000
|
5741 |
+
},
|
5742 |
+
{
|
5743 |
+
"epoch": 6.69,
|
5744 |
+
"eval_loss": 0.2507327198982239,
|
5745 |
+
"eval_runtime": 1.423,
|
5746 |
+
"eval_samples_per_second": 1544.594,
|
5747 |
+
"eval_steps_per_second": 24.595,
|
5748 |
+
"step": 287000
|
5749 |
+
},
|
5750 |
+
{
|
5751 |
+
"epoch": 6.7,
|
5752 |
+
"learning_rate": 0.00013113378440929353,
|
5753 |
+
"loss": 0.2682,
|
5754 |
+
"step": 287500
|
5755 |
+
},
|
5756 |
+
{
|
5757 |
+
"epoch": 6.71,
|
5758 |
+
"learning_rate": 0.00013066094936299056,
|
5759 |
+
"loss": 0.2681,
|
5760 |
+
"step": 288000
|
5761 |
+
},
|
5762 |
+
{
|
5763 |
+
"epoch": 6.71,
|
5764 |
+
"eval_loss": 0.25175246596336365,
|
5765 |
+
"eval_runtime": 1.4465,
|
5766 |
+
"eval_samples_per_second": 1519.496,
|
5767 |
+
"eval_steps_per_second": 24.196,
|
5768 |
+
"step": 288000
|
5769 |
+
},
|
5770 |
+
{
|
5771 |
+
"epoch": 6.72,
|
5772 |
+
"learning_rate": 0.00013018838048464582,
|
5773 |
+
"loss": 0.2677,
|
5774 |
+
"step": 288500
|
5775 |
+
},
|
5776 |
+
{
|
5777 |
+
"epoch": 6.73,
|
5778 |
+
"learning_rate": 0.00012971608294219702,
|
5779 |
+
"loss": 0.2679,
|
5780 |
+
"step": 289000
|
5781 |
+
},
|
5782 |
+
{
|
5783 |
+
"epoch": 6.73,
|
5784 |
+
"eval_loss": 0.2503984272480011,
|
5785 |
+
"eval_runtime": 1.4389,
|
5786 |
+
"eval_samples_per_second": 1527.568,
|
5787 |
+
"eval_steps_per_second": 24.324,
|
5788 |
+
"step": 289000
|
5789 |
+
},
|
5790 |
+
{
|
5791 |
+
"epoch": 6.75,
|
5792 |
+
"learning_rate": 0.00012924406190061423,
|
5793 |
+
"loss": 0.2677,
|
5794 |
+
"step": 289500
|
5795 |
+
},
|
5796 |
+
{
|
5797 |
+
"epoch": 6.76,
|
5798 |
+
"learning_rate": 0.0001287723225218441,
|
5799 |
+
"loss": 0.2675,
|
5800 |
+
"step": 290000
|
5801 |
+
},
|
5802 |
+
{
|
5803 |
+
"epoch": 6.76,
|
5804 |
+
"eval_loss": 0.250543475151062,
|
5805 |
+
"eval_runtime": 1.4231,
|
5806 |
+
"eval_samples_per_second": 1544.544,
|
5807 |
+
"eval_steps_per_second": 24.595,
|
5808 |
+
"step": 290000
|
5809 |
}
|
5810 |
],
|
5811 |
"max_steps": 500000,
|
5812 |
"num_train_epochs": 12,
|
5813 |
+
"total_flos": 9.265112042515059e+21,
|
5814 |
"trial_name": null,
|
5815 |
"trial_params": null
|
5816 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce2bfde6677c6364de7dd881d110ad8c08a2d67e11a6db2ce02cc6aaf9d80410
|
3 |
size 102501541
|