Training in progress, epoch 150, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 166496880
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c68708ddbc2ce9bc9e9ccad6d7415318162ee67548e6384ae6b28daeaab67d4a
|
3 |
size 166496880
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 330495866
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb53150c0e656b9af7412cfa4aa611989973d87a578aaf3904c2959edee764c6
|
3 |
size 330495866
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ceefbb484abd4ed8dc8842b1277aaf2814ba0b1602ab48247135b7cfe2173c79
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:531fa53bf2bb93608e8c8228dd565bd5fe95292cd882c1935da101ac5d413c48
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.24468238651752472,
|
3 |
"best_model_checkpoint": "chickens-composite-403232323232-150-epochs-wo-transform-metrics-test/checkpoint-95000",
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -38643,6 +38643,270 @@
|
|
38643 |
"eval_samples_per_second": 14.74,
|
38644 |
"eval_steps_per_second": 1.843,
|
38645 |
"step": 149000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38646 |
}
|
38647 |
],
|
38648 |
"logging_steps": 30,
|
@@ -38657,12 +38921,12 @@
|
|
38657 |
"should_evaluate": false,
|
38658 |
"should_log": false,
|
38659 |
"should_save": true,
|
38660 |
-
"should_training_stop":
|
38661 |
},
|
38662 |
"attributes": {}
|
38663 |
}
|
38664 |
},
|
38665 |
-
"total_flos": 5.
|
38666 |
"train_batch_size": 2,
|
38667 |
"trial_name": null,
|
38668 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.24468238651752472,
|
3 |
"best_model_checkpoint": "chickens-composite-403232323232-150-epochs-wo-transform-metrics-test/checkpoint-95000",
|
4 |
+
"epoch": 150.0,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 150000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
38643 |
"eval_samples_per_second": 14.74,
|
38644 |
"eval_steps_per_second": 1.843,
|
38645 |
"step": 149000
|
38646 |
+
},
|
38647 |
+
{
|
38648 |
+
"epoch": 149.01,
|
38649 |
+
"grad_norm": 43.52349090576172,
|
38650 |
+
"learning_rate": 1.074761413334957e-09,
|
38651 |
+
"loss": 0.1746,
|
38652 |
+
"step": 149010
|
38653 |
+
},
|
38654 |
+
{
|
38655 |
+
"epoch": 149.04,
|
38656 |
+
"grad_norm": 46.12135696411133,
|
38657 |
+
"learning_rate": 1.0106134441850712e-09,
|
38658 |
+
"loss": 0.178,
|
38659 |
+
"step": 149040
|
38660 |
+
},
|
38661 |
+
{
|
38662 |
+
"epoch": 149.07,
|
38663 |
+
"grad_norm": 46.01811981201172,
|
38664 |
+
"learning_rate": 9.484389968766882e-10,
|
38665 |
+
"loss": 0.1987,
|
38666 |
+
"step": 149070
|
38667 |
+
},
|
38668 |
+
{
|
38669 |
+
"epoch": 149.1,
|
38670 |
+
"grad_norm": 93.64151763916016,
|
38671 |
+
"learning_rate": 8.88238095955174e-10,
|
38672 |
+
"loss": 0.1794,
|
38673 |
+
"step": 149100
|
38674 |
+
},
|
38675 |
+
{
|
38676 |
+
"epoch": 149.13,
|
38677 |
+
"grad_norm": 70.12222290039062,
|
38678 |
+
"learning_rate": 8.300107651859623e-10,
|
38679 |
+
"loss": 0.1959,
|
38680 |
+
"step": 149130
|
38681 |
+
},
|
38682 |
+
{
|
38683 |
+
"epoch": 149.16,
|
38684 |
+
"grad_norm": 52.35565948486328,
|
38685 |
+
"learning_rate": 7.737570275573314e-10,
|
38686 |
+
"loss": 0.1725,
|
38687 |
+
"step": 149160
|
38688 |
+
},
|
38689 |
+
{
|
38690 |
+
"epoch": 149.19,
|
38691 |
+
"grad_norm": 94.19457244873047,
|
38692 |
+
"learning_rate": 7.194769052765171e-10,
|
38693 |
+
"loss": 0.1915,
|
38694 |
+
"step": 149190
|
38695 |
+
},
|
38696 |
+
{
|
38697 |
+
"epoch": 149.22,
|
38698 |
+
"grad_norm": 60.45103454589844,
|
38699 |
+
"learning_rate": 6.671704197735995e-10,
|
38700 |
+
"loss": 0.1758,
|
38701 |
+
"step": 149220
|
38702 |
+
},
|
38703 |
+
{
|
38704 |
+
"epoch": 149.25,
|
38705 |
+
"grad_norm": 82.6115951538086,
|
38706 |
+
"learning_rate": 6.168375916970615e-10,
|
38707 |
+
"loss": 0.185,
|
38708 |
+
"step": 149250
|
38709 |
+
},
|
38710 |
+
{
|
38711 |
+
"epoch": 149.28,
|
38712 |
+
"grad_norm": 54.11577224731445,
|
38713 |
+
"learning_rate": 5.684784409182298e-10,
|
38714 |
+
"loss": 0.1871,
|
38715 |
+
"step": 149280
|
38716 |
+
},
|
38717 |
+
{
|
38718 |
+
"epoch": 149.31,
|
38719 |
+
"grad_norm": 39.203857421875,
|
38720 |
+
"learning_rate": 5.220929865284996e-10,
|
38721 |
+
"loss": 0.178,
|
38722 |
+
"step": 149310
|
38723 |
+
},
|
38724 |
+
{
|
38725 |
+
"epoch": 149.34,
|
38726 |
+
"grad_norm": 53.700557708740234,
|
38727 |
+
"learning_rate": 4.776812468398895e-10,
|
38728 |
+
"loss": 0.1823,
|
38729 |
+
"step": 149340
|
38730 |
+
},
|
38731 |
+
{
|
38732 |
+
"epoch": 149.37,
|
38733 |
+
"grad_norm": 119.4373550415039,
|
38734 |
+
"learning_rate": 4.3524323938559655e-10,
|
38735 |
+
"loss": 0.1805,
|
38736 |
+
"step": 149370
|
38737 |
+
},
|
38738 |
+
{
|
38739 |
+
"epoch": 149.4,
|
38740 |
+
"grad_norm": 34.55750274658203,
|
38741 |
+
"learning_rate": 3.9477898091944135e-10,
|
38742 |
+
"loss": 0.1574,
|
38743 |
+
"step": 149400
|
38744 |
+
},
|
38745 |
+
{
|
38746 |
+
"epoch": 149.43,
|
38747 |
+
"grad_norm": 66.53358459472656,
|
38748 |
+
"learning_rate": 3.562884874158679e-10,
|
38749 |
+
"loss": 0.1903,
|
38750 |
+
"step": 149430
|
38751 |
+
},
|
38752 |
+
{
|
38753 |
+
"epoch": 149.46,
|
38754 |
+
"grad_norm": 45.662715911865234,
|
38755 |
+
"learning_rate": 3.1977177407105376e-10,
|
38756 |
+
"loss": 0.1777,
|
38757 |
+
"step": 149460
|
38758 |
+
},
|
38759 |
+
{
|
38760 |
+
"epoch": 149.49,
|
38761 |
+
"grad_norm": 487.13580322265625,
|
38762 |
+
"learning_rate": 2.8522885530013475e-10,
|
38763 |
+
"loss": 0.1914,
|
38764 |
+
"step": 149490
|
38765 |
+
},
|
38766 |
+
{
|
38767 |
+
"epoch": 149.52,
|
38768 |
+
"grad_norm": 39.44456481933594,
|
38769 |
+
"learning_rate": 2.5265974474109054e-10,
|
38770 |
+
"loss": 0.1685,
|
38771 |
+
"step": 149520
|
38772 |
+
},
|
38773 |
+
{
|
38774 |
+
"epoch": 149.55,
|
38775 |
+
"grad_norm": 154.51235961914062,
|
38776 |
+
"learning_rate": 2.2206445525085886e-10,
|
38777 |
+
"loss": 0.173,
|
38778 |
+
"step": 149550
|
38779 |
+
},
|
38780 |
+
{
|
38781 |
+
"epoch": 149.58,
|
38782 |
+
"grad_norm": 43.05788040161133,
|
38783 |
+
"learning_rate": 1.9344299890866614e-10,
|
38784 |
+
"loss": 0.1993,
|
38785 |
+
"step": 149580
|
38786 |
+
},
|
38787 |
+
{
|
38788 |
+
"epoch": 149.61,
|
38789 |
+
"grad_norm": 55.20720291137695,
|
38790 |
+
"learning_rate": 1.6679538701325215e-10,
|
38791 |
+
"loss": 0.19,
|
38792 |
+
"step": 149610
|
38793 |
+
},
|
38794 |
+
{
|
38795 |
+
"epoch": 149.64,
|
38796 |
+
"grad_norm": 58.366329193115234,
|
38797 |
+
"learning_rate": 1.4212163008509028e-10,
|
38798 |
+
"loss": 0.1794,
|
38799 |
+
"step": 149640
|
38800 |
+
},
|
38801 |
+
{
|
38802 |
+
"epoch": 149.67,
|
38803 |
+
"grad_norm": 79.63877868652344,
|
38804 |
+
"learning_rate": 1.1942173786527732e-10,
|
38805 |
+
"loss": 0.1677,
|
38806 |
+
"step": 149670
|
38807 |
+
},
|
38808 |
+
{
|
38809 |
+
"epoch": 149.7,
|
38810 |
+
"grad_norm": 37.779273986816406,
|
38811 |
+
"learning_rate": 9.869571931442334e-11,
|
38812 |
+
"loss": 0.1708,
|
38813 |
+
"step": 149700
|
38814 |
+
},
|
38815 |
+
{
|
38816 |
+
"epoch": 149.73,
|
38817 |
+
"grad_norm": 55.10169219970703,
|
38818 |
+
"learning_rate": 7.994358261542712e-11,
|
38819 |
+
"loss": 0.1637,
|
38820 |
+
"step": 149730
|
38821 |
+
},
|
38822 |
+
{
|
38823 |
+
"epoch": 149.76,
|
38824 |
+
"grad_norm": 43.75123596191406,
|
38825 |
+
"learning_rate": 6.316533517125578e-11,
|
38826 |
+
"loss": 0.173,
|
38827 |
+
"step": 149760
|
38828 |
+
},
|
38829 |
+
{
|
38830 |
+
"epoch": 149.79,
|
38831 |
+
"grad_norm": 54.85829162597656,
|
38832 |
+
"learning_rate": 4.83609836054999e-11,
|
38833 |
+
"loss": 0.1813,
|
38834 |
+
"step": 149790
|
38835 |
+
},
|
38836 |
+
{
|
38837 |
+
"epoch": 149.82,
|
38838 |
+
"grad_norm": 114.27557373046875,
|
38839 |
+
"learning_rate": 3.55305337634837e-11,
|
38840 |
+
"loss": 0.1563,
|
38841 |
+
"step": 149820
|
38842 |
+
},
|
38843 |
+
{
|
38844 |
+
"epoch": 149.85,
|
38845 |
+
"grad_norm": 42.012298583984375,
|
38846 |
+
"learning_rate": 2.467399070893439e-11,
|
38847 |
+
"loss": 0.1784,
|
38848 |
+
"step": 149850
|
38849 |
+
},
|
38850 |
+
{
|
38851 |
+
"epoch": 149.88,
|
38852 |
+
"grad_norm": 34.330322265625,
|
38853 |
+
"learning_rate": 1.57913587295333e-11,
|
38854 |
+
"loss": 0.1947,
|
38855 |
+
"step": 149880
|
38856 |
+
},
|
38857 |
+
{
|
38858 |
+
"epoch": 149.91,
|
38859 |
+
"grad_norm": 118.9654769897461,
|
38860 |
+
"learning_rate": 8.882641330809627e-12,
|
38861 |
+
"loss": 0.2011,
|
38862 |
+
"step": 149910
|
38863 |
+
},
|
38864 |
+
{
|
38865 |
+
"epoch": 149.94,
|
38866 |
+
"grad_norm": 51.00263595581055,
|
38867 |
+
"learning_rate": 3.947841241136452e-12,
|
38868 |
+
"loss": 0.1609,
|
38869 |
+
"step": 149940
|
38870 |
+
},
|
38871 |
+
{
|
38872 |
+
"epoch": 149.97,
|
38873 |
+
"grad_norm": 93.073486328125,
|
38874 |
+
"learning_rate": 9.869604078449612e-13,
|
38875 |
+
"loss": 0.1643,
|
38876 |
+
"step": 149970
|
38877 |
+
},
|
38878 |
+
{
|
38879 |
+
"epoch": 150.0,
|
38880 |
+
"grad_norm": 69.35248565673828,
|
38881 |
+
"learning_rate": 0.0,
|
38882 |
+
"loss": 0.1556,
|
38883 |
+
"step": 150000
|
38884 |
+
},
|
38885 |
+
{
|
38886 |
+
"epoch": 150.0,
|
38887 |
+
"eval_loss": 0.25540733337402344,
|
38888 |
+
"eval_map": 0.8454,
|
38889 |
+
"eval_map_50": 0.9638,
|
38890 |
+
"eval_map_75": 0.9414,
|
38891 |
+
"eval_map_chicken": 0.8417,
|
38892 |
+
"eval_map_duck": 0.7981,
|
38893 |
+
"eval_map_large": 0.8191,
|
38894 |
+
"eval_map_medium": 0.8525,
|
38895 |
+
"eval_map_plant": 0.8964,
|
38896 |
+
"eval_map_small": 0.3586,
|
38897 |
+
"eval_mar_1": 0.3399,
|
38898 |
+
"eval_mar_10": 0.8795,
|
38899 |
+
"eval_mar_100": 0.8819,
|
38900 |
+
"eval_mar_100_chicken": 0.8813,
|
38901 |
+
"eval_mar_100_duck": 0.8407,
|
38902 |
+
"eval_mar_100_plant": 0.9237,
|
38903 |
+
"eval_mar_large": 0.8593,
|
38904 |
+
"eval_mar_medium": 0.8889,
|
38905 |
+
"eval_mar_small": 0.5029,
|
38906 |
+
"eval_runtime": 13.4354,
|
38907 |
+
"eval_samples_per_second": 14.886,
|
38908 |
+
"eval_steps_per_second": 1.861,
|
38909 |
+
"step": 150000
|
38910 |
}
|
38911 |
],
|
38912 |
"logging_steps": 30,
|
|
|
38921 |
"should_evaluate": false,
|
38922 |
"should_log": false,
|
38923 |
"should_save": true,
|
38924 |
+
"should_training_stop": true
|
38925 |
},
|
38926 |
"attributes": {}
|
38927 |
}
|
38928 |
},
|
38929 |
+
"total_flos": 5.160346693632e+19,
|
38930 |
"train_batch_size": 2,
|
38931 |
"trial_name": null,
|
38932 |
"trial_params": null
|