{ "best_metric": 2.005824327468872, "best_model_checkpoint": "saves/ChineseLLaMA2-7B-Chat/lora/SFT/checkpoint-1500", "epoch": 2.0547945205479454, "eval_steps": 100, "global_step": 1500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 0.000999995369868095, "loss": 4.0099, "step": 5 }, { "epoch": 0.01, "learning_rate": 0.0009999670749281082, "loss": 2.887, "step": 10 }, { "epoch": 0.02, "learning_rate": 0.000999913058797528, "loss": 2.6742, "step": 15 }, { "epoch": 0.03, "learning_rate": 0.0009998333242552556, "loss": 2.7939, "step": 20 }, { "epoch": 0.03, "learning_rate": 0.0009997278754032958, "loss": 2.6062, "step": 25 }, { "epoch": 0.04, "learning_rate": 0.0009995967176665461, "loss": 2.7508, "step": 30 }, { "epoch": 0.05, "learning_rate": 0.0009994398577925167, "loss": 2.547, "step": 35 }, { "epoch": 0.05, "learning_rate": 0.0009992573038509849, "loss": 2.5281, "step": 40 }, { "epoch": 0.06, "learning_rate": 0.0009990490652335784, "loss": 2.6397, "step": 45 }, { "epoch": 0.07, "learning_rate": 0.000998815152653293, "loss": 2.608, "step": 50 }, { "epoch": 0.08, "learning_rate": 0.000998555578143941, "loss": 2.4076, "step": 55 }, { "epoch": 0.08, "learning_rate": 0.0009982703550595329, "loss": 2.5914, "step": 60 }, { "epoch": 0.09, "learning_rate": 0.0009979594980735896, "loss": 2.3517, "step": 65 }, { "epoch": 0.1, "learning_rate": 0.0009976230231783876, "loss": 2.4019, "step": 70 }, { "epoch": 0.1, "learning_rate": 0.0009972609476841367, "loss": 2.4481, "step": 75 }, { "epoch": 0.11, "learning_rate": 0.000996873290218089, "loss": 2.3244, "step": 80 }, { "epoch": 0.12, "learning_rate": 0.000996460070723581, "loss": 2.4821, "step": 85 }, { "epoch": 0.12, "learning_rate": 0.0009960213104590074, "loss": 2.438, "step": 90 }, { "epoch": 0.13, "learning_rate": 0.0009955570319967273, "loss": 2.2925, "step": 95 }, { "epoch": 0.14, "learning_rate": 0.0009950672592219031, "loss": 2.3052, "step": 100 }, { "epoch": 0.14, "eval_loss": 2.3112449645996094, "eval_runtime": 8.9668, "eval_samples_per_second": 13.16, "eval_steps_per_second": 1.673, "step": 100 }, { "epoch": 0.14, "learning_rate": 0.000994552017331272, "loss": 2.5839, "step": 105 }, { "epoch": 0.15, "learning_rate": 0.0009940113328318488, "loss": 2.4983, "step": 110 }, { "epoch": 0.16, "learning_rate": 0.0009934452335395637, "loss": 2.4598, "step": 115 }, { "epoch": 0.16, "learning_rate": 0.0009928537485778299, "loss": 2.4336, "step": 120 }, { "epoch": 0.17, "learning_rate": 0.0009922369083760461, "loss": 2.3799, "step": 125 }, { "epoch": 0.18, "learning_rate": 0.0009915947446680307, "loss": 2.5443, "step": 130 }, { "epoch": 0.18, "learning_rate": 0.0009909272904903897, "loss": 2.3701, "step": 135 }, { "epoch": 0.19, "learning_rate": 0.0009902345801808161, "loss": 2.2718, "step": 140 }, { "epoch": 0.2, "learning_rate": 0.0009895166493763246, "loss": 2.3402, "step": 145 }, { "epoch": 0.21, "learning_rate": 0.0009887735350114174, "loss": 2.4072, "step": 150 }, { "epoch": 0.21, "learning_rate": 0.000988005275316184, "loss": 2.3807, "step": 155 }, { "epoch": 0.22, "learning_rate": 0.000987211909814336, "loss": 2.2149, "step": 160 }, { "epoch": 0.23, "learning_rate": 0.000986393479321171, "loss": 2.2713, "step": 165 }, { "epoch": 0.23, "learning_rate": 0.0009855500259414753, "loss": 2.08, "step": 170 }, { "epoch": 0.24, "learning_rate": 0.0009846815930673563, "loss": 2.4682, "step": 175 }, { "epoch": 0.25, "learning_rate": 0.000983788225376011, "loss": 2.3328, "step": 180 }, { "epoch": 0.25, "learning_rate": 0.0009828699688274275, "loss": 2.3671, "step": 185 }, { "epoch": 0.26, "learning_rate": 0.0009819268706620196, "loss": 2.3251, "step": 190 }, { "epoch": 0.27, "learning_rate": 0.0009809589793981978, "loss": 2.4429, "step": 195 }, { "epoch": 0.27, "learning_rate": 0.0009799663448298724, "loss": 2.2499, "step": 200 }, { "epoch": 0.27, "eval_loss": 2.234222650527954, "eval_runtime": 8.8394, "eval_samples_per_second": 13.349, "eval_steps_per_second": 1.697, "step": 200 }, { "epoch": 0.28, "learning_rate": 0.0009789490180238916, "loss": 2.2279, "step": 205 }, { "epoch": 0.29, "learning_rate": 0.0009779070513174157, "loss": 2.4875, "step": 210 }, { "epoch": 0.29, "learning_rate": 0.0009768404983152227, "loss": 2.3507, "step": 215 }, { "epoch": 0.3, "learning_rate": 0.0009757494138869523, "loss": 2.2189, "step": 220 }, { "epoch": 0.31, "learning_rate": 0.0009746338541642812, "loss": 2.2597, "step": 225 }, { "epoch": 0.32, "learning_rate": 0.0009734938765380377, "loss": 2.334, "step": 230 }, { "epoch": 0.32, "learning_rate": 0.000972329539655247, "loss": 2.3133, "step": 235 }, { "epoch": 0.33, "learning_rate": 0.0009711409034161151, "loss": 2.2286, "step": 240 }, { "epoch": 0.34, "learning_rate": 0.0009699280289709478, "loss": 2.224, "step": 245 }, { "epoch": 0.34, "learning_rate": 0.0009686909787170031, "loss": 2.3772, "step": 250 }, { "epoch": 0.35, "learning_rate": 0.0009674298162952826, "loss": 2.3606, "step": 255 }, { "epoch": 0.36, "learning_rate": 0.0009661446065872568, "loss": 2.3207, "step": 260 }, { "epoch": 0.36, "learning_rate": 0.0009648354157115271, "loss": 2.2505, "step": 265 }, { "epoch": 0.37, "learning_rate": 0.0009635023110204253, "loss": 2.2192, "step": 270 }, { "epoch": 0.38, "learning_rate": 0.0009621453610965467, "loss": 2.3082, "step": 275 }, { "epoch": 0.38, "learning_rate": 0.0009607646357492237, "loss": 2.2913, "step": 280 }, { "epoch": 0.39, "learning_rate": 0.0009593602060109334, "loss": 2.1133, "step": 285 }, { "epoch": 0.4, "learning_rate": 0.0009579321441336436, "loss": 2.1855, "step": 290 }, { "epoch": 0.4, "learning_rate": 0.0009564805235850955, "loss": 2.3691, "step": 295 }, { "epoch": 0.41, "learning_rate": 0.0009550054190450246, "loss": 2.1942, "step": 300 }, { "epoch": 0.41, "eval_loss": 2.184567451477051, "eval_runtime": 8.8391, "eval_samples_per_second": 13.35, "eval_steps_per_second": 1.697, "step": 300 }, { "epoch": 0.42, "learning_rate": 0.000953506906401318, "loss": 2.2661, "step": 305 }, { "epoch": 0.42, "learning_rate": 0.0009519850627461109, "loss": 2.1222, "step": 310 }, { "epoch": 0.43, "learning_rate": 0.0009504399663718202, "loss": 2.2852, "step": 315 }, { "epoch": 0.44, "learning_rate": 0.0009488716967671169, "loss": 2.2543, "step": 320 }, { "epoch": 0.45, "learning_rate": 0.0009472803346128368, "loss": 2.4232, "step": 325 }, { "epoch": 0.45, "learning_rate": 0.0009456659617778294, "loss": 2.3455, "step": 330 }, { "epoch": 0.46, "learning_rate": 0.0009440286613147466, "loss": 2.2071, "step": 335 }, { "epoch": 0.47, "learning_rate": 0.0009423685174557695, "loss": 2.2561, "step": 340 }, { "epoch": 0.47, "learning_rate": 0.0009406856156082755, "loss": 2.2257, "step": 345 }, { "epoch": 0.48, "learning_rate": 0.0009389800423504441, "loss": 2.0418, "step": 350 }, { "epoch": 0.49, "learning_rate": 0.000937251885426803, "loss": 2.1139, "step": 355 }, { "epoch": 0.49, "learning_rate": 0.0009355012337437138, "loss": 2.0349, "step": 360 }, { "epoch": 0.5, "learning_rate": 0.0009337281773647985, "loss": 2.1056, "step": 365 }, { "epoch": 0.51, "learning_rate": 0.0009319328075063059, "loss": 2.0944, "step": 370 }, { "epoch": 0.51, "learning_rate": 0.0009301152165324185, "loss": 2.0468, "step": 375 }, { "epoch": 0.52, "learning_rate": 0.0009282754979505018, "loss": 2.4125, "step": 380 }, { "epoch": 0.53, "learning_rate": 0.0009264137464062927, "loss": 2.14, "step": 385 }, { "epoch": 0.53, "learning_rate": 0.0009245300576790309, "loss": 2.0077, "step": 390 }, { "epoch": 0.54, "learning_rate": 0.0009226245286765316, "loss": 2.0926, "step": 395 }, { "epoch": 0.55, "learning_rate": 0.0009206972574301991, "loss": 2.2612, "step": 400 }, { "epoch": 0.55, "eval_loss": 2.1513657569885254, "eval_runtime": 8.8395, "eval_samples_per_second": 13.349, "eval_steps_per_second": 1.697, "step": 400 }, { "epoch": 0.55, "learning_rate": 0.0009187483430899845, "loss": 2.1961, "step": 405 }, { "epoch": 0.56, "learning_rate": 0.000916777885919285, "loss": 2.21, "step": 410 }, { "epoch": 0.57, "learning_rate": 0.0009147859872897843, "loss": 2.1734, "step": 415 }, { "epoch": 0.58, "learning_rate": 0.0009127727496762394, "loss": 2.2751, "step": 420 }, { "epoch": 0.58, "learning_rate": 0.0009107382766512072, "loss": 2.165, "step": 425 }, { "epoch": 0.59, "learning_rate": 0.0009086826728797165, "loss": 1.9987, "step": 430 }, { "epoch": 0.6, "learning_rate": 0.0009066060441138841, "loss": 2.2766, "step": 435 }, { "epoch": 0.6, "learning_rate": 0.0009045084971874737, "loss": 2.2556, "step": 440 }, { "epoch": 0.61, "learning_rate": 0.0009023901400103995, "loss": 2.2067, "step": 445 }, { "epoch": 0.62, "learning_rate": 0.0009002510815631754, "loss": 2.1062, "step": 450 }, { "epoch": 0.62, "learning_rate": 0.0008980914318913078, "loss": 2.2781, "step": 455 }, { "epoch": 0.63, "learning_rate": 0.0008959113020996348, "loss": 2.2977, "step": 460 }, { "epoch": 0.64, "learning_rate": 0.0008937108043466098, "loss": 2.1739, "step": 465 }, { "epoch": 0.64, "learning_rate": 0.0008914900518385314, "loss": 2.2108, "step": 470 }, { "epoch": 0.65, "learning_rate": 0.0008892491588237203, "loss": 2.204, "step": 475 }, { "epoch": 0.66, "learning_rate": 0.0008869882405866404, "loss": 2.0958, "step": 480 }, { "epoch": 0.66, "learning_rate": 0.000884707413441969, "loss": 2.1794, "step": 485 }, { "epoch": 0.67, "learning_rate": 0.0008824067947286121, "loss": 2.2172, "step": 490 }, { "epoch": 0.68, "learning_rate": 0.0008800865028036685, "loss": 2.1321, "step": 495 }, { "epoch": 0.68, "learning_rate": 0.0008777466570363402, "loss": 2.1153, "step": 500 }, { "epoch": 0.68, "eval_loss": 2.1377201080322266, "eval_runtime": 8.8389, "eval_samples_per_second": 13.35, "eval_steps_per_second": 1.697, "step": 500 }, { "epoch": 0.69, "learning_rate": 0.0008753873778017918, "loss": 2.0652, "step": 505 }, { "epoch": 0.7, "learning_rate": 0.0008730087864749578, "loss": 2.1858, "step": 510 }, { "epoch": 0.71, "learning_rate": 0.0008706110054242979, "loss": 2.3533, "step": 515 }, { "epoch": 0.71, "learning_rate": 0.0008681941580055016, "loss": 2.0069, "step": 520 }, { "epoch": 0.72, "learning_rate": 0.0008657583685551429, "loss": 2.2022, "step": 525 }, { "epoch": 0.73, "learning_rate": 0.0008633037623842828, "loss": 2.2126, "step": 530 }, { "epoch": 0.73, "learning_rate": 0.0008608304657720232, "loss": 2.1354, "step": 535 }, { "epoch": 0.74, "learning_rate": 0.00085833860595901, "loss": 2.3017, "step": 540 }, { "epoch": 0.75, "learning_rate": 0.0008558283111408874, "loss": 2.1492, "step": 545 }, { "epoch": 0.75, "learning_rate": 0.0008532997104617022, "loss": 2.3461, "step": 550 }, { "epoch": 0.76, "learning_rate": 0.0008507529340072608, "loss": 2.2892, "step": 555 }, { "epoch": 0.77, "learning_rate": 0.0008481881127984361, "loss": 2.1384, "step": 560 }, { "epoch": 0.77, "learning_rate": 0.0008456053787844274, "loss": 2.2022, "step": 565 }, { "epoch": 0.78, "learning_rate": 0.0008430048648359713, "loss": 2.1867, "step": 570 }, { "epoch": 0.79, "learning_rate": 0.000840386704738508, "loss": 2.0888, "step": 575 }, { "epoch": 0.79, "learning_rate": 0.0008377510331852969, "loss": 2.1194, "step": 580 }, { "epoch": 0.8, "learning_rate": 0.0008350979857704872, "loss": 2.1946, "step": 585 }, { "epoch": 0.81, "learning_rate": 0.0008324276989821433, "loss": 2.2366, "step": 590 }, { "epoch": 0.82, "learning_rate": 0.0008297403101952221, "loss": 2.2807, "step": 595 }, { "epoch": 0.82, "learning_rate": 0.0008270359576645061, "loss": 2.2787, "step": 600 }, { "epoch": 0.82, "eval_loss": 2.1165168285369873, "eval_runtime": 8.8529, "eval_samples_per_second": 13.329, "eval_steps_per_second": 1.694, "step": 600 }, { "epoch": 0.83, "learning_rate": 0.0008243147805174907, "loss": 2.1761, "step": 605 }, { "epoch": 0.84, "learning_rate": 0.0008215769187472266, "loss": 2.036, "step": 610 }, { "epoch": 0.84, "learning_rate": 0.0008188225132051175, "loss": 2.3055, "step": 615 }, { "epoch": 0.85, "learning_rate": 0.0008160517055936743, "loss": 2.1386, "step": 620 }, { "epoch": 0.86, "learning_rate": 0.0008132646384592254, "loss": 2.2383, "step": 625 }, { "epoch": 0.86, "learning_rate": 0.0008104614551845823, "loss": 2.0414, "step": 630 }, { "epoch": 0.87, "learning_rate": 0.000807642299981664, "loss": 2.1539, "step": 635 }, { "epoch": 0.88, "learning_rate": 0.0008048073178840773, "loss": 2.2804, "step": 640 }, { "epoch": 0.88, "learning_rate": 0.0008019566547396563, "loss": 2.0363, "step": 645 }, { "epoch": 0.89, "learning_rate": 0.0007990904572029582, "loss": 2.152, "step": 650 }, { "epoch": 0.9, "learning_rate": 0.0007962088727277193, "loss": 2.1571, "step": 655 }, { "epoch": 0.9, "learning_rate": 0.0007933120495592682, "loss": 2.1813, "step": 660 }, { "epoch": 0.91, "learning_rate": 0.0007904001367269004, "loss": 2.2906, "step": 665 }, { "epoch": 0.92, "learning_rate": 0.0007874732840362107, "loss": 2.2132, "step": 670 }, { "epoch": 0.92, "learning_rate": 0.0007845316420613859, "loss": 2.0774, "step": 675 }, { "epoch": 0.93, "learning_rate": 0.0007815753621374593, "loss": 2.1264, "step": 680 }, { "epoch": 0.94, "learning_rate": 0.0007786045963525249, "loss": 2.1156, "step": 685 }, { "epoch": 0.95, "learning_rate": 0.0007756194975399123, "loss": 2.0804, "step": 690 }, { "epoch": 0.95, "learning_rate": 0.0007726202192703255, "loss": 2.2225, "step": 695 }, { "epoch": 0.96, "learning_rate": 0.0007696069158439412, "loss": 2.0872, "step": 700 }, { "epoch": 0.96, "eval_loss": 2.0954582691192627, "eval_runtime": 8.8391, "eval_samples_per_second": 13.35, "eval_steps_per_second": 1.697, "step": 700 }, { "epoch": 0.97, "learning_rate": 0.0007665797422824708, "loss": 2.0316, "step": 705 }, { "epoch": 0.97, "learning_rate": 0.0007635388543211861, "loss": 2.1545, "step": 710 }, { "epoch": 0.98, "learning_rate": 0.0007604844084009063, "loss": 2.082, "step": 715 }, { "epoch": 0.99, "learning_rate": 0.0007574165616599501, "loss": 2.1494, "step": 720 }, { "epoch": 0.99, "learning_rate": 0.0007543354719260522, "loss": 2.2864, "step": 725 }, { "epoch": 1.0, "learning_rate": 0.000751241297708243, "loss": 2.122, "step": 730 }, { "epoch": 1.01, "learning_rate": 0.0007481341981886942, "loss": 1.9816, "step": 735 }, { "epoch": 1.01, "learning_rate": 0.0007450143332145297, "loss": 1.8529, "step": 740 }, { "epoch": 1.02, "learning_rate": 0.0007418818632896017, "loss": 1.936, "step": 745 }, { "epoch": 1.03, "learning_rate": 0.0007387369495662343, "loss": 1.9031, "step": 750 }, { "epoch": 1.03, "learning_rate": 0.000735579753836932, "loss": 1.8006, "step": 755 }, { "epoch": 1.04, "learning_rate": 0.0007324104385260566, "loss": 1.9221, "step": 760 }, { "epoch": 1.05, "learning_rate": 0.0007292291666814713, "loss": 1.9734, "step": 765 }, { "epoch": 1.05, "learning_rate": 0.0007260361019661522, "loss": 2.0162, "step": 770 }, { "epoch": 1.06, "learning_rate": 0.0007228314086497686, "loss": 1.755, "step": 775 }, { "epoch": 1.07, "learning_rate": 0.0007196152516002323, "loss": 2.0059, "step": 780 }, { "epoch": 1.08, "learning_rate": 0.0007163877962752157, "loss": 2.0842, "step": 785 }, { "epoch": 1.08, "learning_rate": 0.0007131492087136393, "loss": 2.0298, "step": 790 }, { "epoch": 1.09, "learning_rate": 0.0007098996555271309, "loss": 2.0099, "step": 795 }, { "epoch": 1.1, "learning_rate": 0.0007066393038914522, "loss": 2.0318, "step": 800 }, { "epoch": 1.1, "eval_loss": 2.081965923309326, "eval_runtime": 8.8393, "eval_samples_per_second": 13.349, "eval_steps_per_second": 1.697, "step": 800 }, { "epoch": 1.1, "learning_rate": 0.0007033683215379002, "loss": 2.0159, "step": 805 }, { "epoch": 1.11, "learning_rate": 0.0007000868767446771, "loss": 2.0416, "step": 810 }, { "epoch": 1.12, "learning_rate": 0.0006967951383282334, "loss": 1.9853, "step": 815 }, { "epoch": 1.12, "learning_rate": 0.000693493275634583, "loss": 2.0131, "step": 820 }, { "epoch": 1.13, "learning_rate": 0.0006901814585305909, "loss": 1.9054, "step": 825 }, { "epoch": 1.14, "learning_rate": 0.0006868598573952345, "loss": 1.9213, "step": 830 }, { "epoch": 1.14, "learning_rate": 0.0006835286431108383, "loss": 1.8538, "step": 835 }, { "epoch": 1.15, "learning_rate": 0.0006801879870542821, "loss": 2.0632, "step": 840 }, { "epoch": 1.16, "learning_rate": 0.0006768380610881859, "loss": 2.0617, "step": 845 }, { "epoch": 1.16, "learning_rate": 0.0006734790375520663, "loss": 1.9131, "step": 850 }, { "epoch": 1.17, "learning_rate": 0.0006701110892534723, "loss": 1.969, "step": 855 }, { "epoch": 1.18, "learning_rate": 0.0006667343894590934, "loss": 2.1041, "step": 860 }, { "epoch": 1.18, "learning_rate": 0.0006633491118858471, "loss": 1.9544, "step": 865 }, { "epoch": 1.19, "learning_rate": 0.0006599554306919408, "loss": 1.9392, "step": 870 }, { "epoch": 1.2, "learning_rate": 0.0006565535204679134, "loss": 1.9857, "step": 875 }, { "epoch": 1.21, "learning_rate": 0.0006531435562276514, "loss": 2.0771, "step": 880 }, { "epoch": 1.21, "learning_rate": 0.0006497257133993877, "loss": 1.9266, "step": 885 }, { "epoch": 1.22, "learning_rate": 0.0006463001678166743, "loss": 1.9898, "step": 890 }, { "epoch": 1.23, "learning_rate": 0.0006428670957093375, "loss": 1.9723, "step": 895 }, { "epoch": 1.23, "learning_rate": 0.0006394266736944118, "loss": 1.9746, "step": 900 }, { "epoch": 1.23, "eval_loss": 2.0750818252563477, "eval_runtime": 8.84, "eval_samples_per_second": 13.348, "eval_steps_per_second": 1.697, "step": 900 }, { "epoch": 1.24, "learning_rate": 0.0006359790787670527, "loss": 1.8354, "step": 905 }, { "epoch": 1.25, "learning_rate": 0.0006325244882914327, "loss": 1.8571, "step": 910 }, { "epoch": 1.25, "learning_rate": 0.0006290630799916144, "loss": 2.0376, "step": 915 }, { "epoch": 1.26, "learning_rate": 0.0006255950319424097, "loss": 2.0141, "step": 920 }, { "epoch": 1.27, "learning_rate": 0.0006221205225602169, "loss": 1.982, "step": 925 }, { "epoch": 1.27, "learning_rate": 0.0006186397305938427, "loss": 1.9456, "step": 930 }, { "epoch": 1.28, "learning_rate": 0.0006151528351153061, "loss": 1.8855, "step": 935 }, { "epoch": 1.29, "learning_rate": 0.0006116600155106263, "loss": 1.9335, "step": 940 }, { "epoch": 1.29, "learning_rate": 0.0006081614514705933, "loss": 1.9459, "step": 945 }, { "epoch": 1.3, "learning_rate": 0.0006046573229815243, "loss": 1.9753, "step": 950 }, { "epoch": 1.31, "learning_rate": 0.0006011478103160037, "loss": 1.969, "step": 955 }, { "epoch": 1.32, "learning_rate": 0.0005976330940236089, "loss": 1.9184, "step": 960 }, { "epoch": 1.32, "learning_rate": 0.0005941133549216221, "loss": 1.9287, "step": 965 }, { "epoch": 1.33, "learning_rate": 0.0005905887740857279, "loss": 1.9373, "step": 970 }, { "epoch": 1.34, "learning_rate": 0.0005870595328406971, "loss": 1.9323, "step": 975 }, { "epoch": 1.34, "learning_rate": 0.0005835258127510597, "loss": 1.9249, "step": 980 }, { "epoch": 1.35, "learning_rate": 0.0005799877956117621, "loss": 2.0135, "step": 985 }, { "epoch": 1.36, "learning_rate": 0.0005764456634388171, "loss": 1.9741, "step": 990 }, { "epoch": 1.36, "learning_rate": 0.0005728995984599373, "loss": 1.9028, "step": 995 }, { "epoch": 1.37, "learning_rate": 0.0005693497831051624, "loss": 1.9647, "step": 1000 }, { "epoch": 1.37, "eval_loss": 2.0705747604370117, "eval_runtime": 8.8388, "eval_samples_per_second": 13.35, "eval_steps_per_second": 1.697, "step": 1000 }, { "epoch": 1.38, "learning_rate": 0.0005657963999974728, "loss": 1.9312, "step": 1005 }, { "epoch": 1.38, "learning_rate": 0.0005622396319433947, "loss": 1.9319, "step": 1010 }, { "epoch": 1.39, "learning_rate": 0.0005586796619235951, "loss": 2.0215, "step": 1015 }, { "epoch": 1.4, "learning_rate": 0.0005551166730834692, "loss": 1.9109, "step": 1020 }, { "epoch": 1.4, "learning_rate": 0.0005515508487237174, "loss": 1.9534, "step": 1025 }, { "epoch": 1.41, "learning_rate": 0.0005479823722909158, "loss": 1.9559, "step": 1030 }, { "epoch": 1.42, "learning_rate": 0.0005444114273680778, "loss": 1.9402, "step": 1035 }, { "epoch": 1.42, "learning_rate": 0.0005408381976652113, "loss": 1.8844, "step": 1040 }, { "epoch": 1.43, "learning_rate": 0.0005372628670098654, "loss": 1.8787, "step": 1045 }, { "epoch": 1.44, "learning_rate": 0.0005336856193376748, "loss": 2.0642, "step": 1050 }, { "epoch": 1.45, "learning_rate": 0.0005301066386828965, "loss": 2.0661, "step": 1055 }, { "epoch": 1.45, "learning_rate": 0.0005265261091689423, "loss": 1.8911, "step": 1060 }, { "epoch": 1.46, "learning_rate": 0.0005229442149989058, "loss": 1.9742, "step": 1065 }, { "epoch": 1.47, "learning_rate": 0.0005193611404460872, "loss": 1.8662, "step": 1070 }, { "epoch": 1.47, "learning_rate": 0.0005157770698445116, "loss": 1.9766, "step": 1075 }, { "epoch": 1.48, "learning_rate": 0.0005121921875794468, "loss": 1.8823, "step": 1080 }, { "epoch": 1.49, "learning_rate": 0.0005086066780779174, "loss": 2.0215, "step": 1085 }, { "epoch": 1.49, "learning_rate": 0.0005050207257992166, "loss": 2.002, "step": 1090 }, { "epoch": 1.5, "learning_rate": 0.0005014345152254166, "loss": 2.0568, "step": 1095 }, { "epoch": 1.51, "learning_rate": 0.0004978482308518779, "loss": 2.0477, "step": 1100 }, { "epoch": 1.51, "eval_loss": 2.0488994121551514, "eval_runtime": 8.8391, "eval_samples_per_second": 13.35, "eval_steps_per_second": 1.697, "step": 1100 }, { "epoch": 1.51, "learning_rate": 0.0004942620571777576, "loss": 1.9615, "step": 1105 }, { "epoch": 1.52, "learning_rate": 0.0004906761786965175, "loss": 1.9747, "step": 1110 }, { "epoch": 1.53, "learning_rate": 0.00048709077988643367, "loss": 2.0174, "step": 1115 }, { "epoch": 1.53, "learning_rate": 0.0004835060452011041, "loss": 1.927, "step": 1120 }, { "epoch": 1.54, "learning_rate": 0.00047992215905996163, "loss": 1.8728, "step": 1125 }, { "epoch": 1.55, "learning_rate": 0.0004763393058387841, "loss": 2.0364, "step": 1130 }, { "epoch": 1.55, "learning_rate": 0.00047275766986021046, "loss": 1.9834, "step": 1135 }, { "epoch": 1.56, "learning_rate": 0.0004691774353842571, "loss": 1.838, "step": 1140 }, { "epoch": 1.57, "learning_rate": 0.0004655987865988401, "loss": 1.8644, "step": 1145 }, { "epoch": 1.58, "learning_rate": 0.0004620219076102975, "loss": 1.9723, "step": 1150 }, { "epoch": 1.58, "learning_rate": 0.0004584469824339192, "loss": 1.7848, "step": 1155 }, { "epoch": 1.59, "learning_rate": 0.0004548741949844795, "loss": 1.9848, "step": 1160 }, { "epoch": 1.6, "learning_rate": 0.0004513037290667761, "loss": 1.9545, "step": 1165 }, { "epoch": 1.6, "learning_rate": 0.00044773576836617336, "loss": 2.0322, "step": 1170 }, { "epoch": 1.61, "learning_rate": 0.0004441704964391529, "loss": 1.933, "step": 1175 }, { "epoch": 1.62, "learning_rate": 0.0004406080967038701, "loss": 1.8019, "step": 1180 }, { "epoch": 1.62, "learning_rate": 0.0004370487524307189, "loss": 1.9489, "step": 1185 }, { "epoch": 1.63, "learning_rate": 0.00043349264673290204, "loss": 1.9982, "step": 1190 }, { "epoch": 1.64, "learning_rate": 0.0004299399625570114, "loss": 1.9536, "step": 1195 }, { "epoch": 1.64, "learning_rate": 0.00042639088267361596, "loss": 1.849, "step": 1200 }, { "epoch": 1.64, "eval_loss": 2.030912160873413, "eval_runtime": 8.838, "eval_samples_per_second": 13.351, "eval_steps_per_second": 1.697, "step": 1200 }, { "epoch": 1.65, "learning_rate": 0.00042284558966785944, "loss": 1.9273, "step": 1205 }, { "epoch": 1.66, "learning_rate": 0.00041930426593006633, "loss": 1.9215, "step": 1210 }, { "epoch": 1.66, "learning_rate": 0.0004157670936463592, "loss": 2.0457, "step": 1215 }, { "epoch": 1.67, "learning_rate": 0.00041223425478928595, "loss": 1.9452, "step": 1220 }, { "epoch": 1.68, "learning_rate": 0.0004087059311084581, "loss": 2.033, "step": 1225 }, { "epoch": 1.68, "learning_rate": 0.0004051823041212002, "loss": 1.9818, "step": 1230 }, { "epoch": 1.69, "learning_rate": 0.00040166355510321195, "loss": 2.0649, "step": 1235 }, { "epoch": 1.7, "learning_rate": 0.00039814986507924195, "loss": 1.8362, "step": 1240 }, { "epoch": 1.71, "learning_rate": 0.0003946414148137756, "loss": 2.017, "step": 1245 }, { "epoch": 1.71, "learning_rate": 0.0003911383848017341, "loss": 1.871, "step": 1250 }, { "epoch": 1.72, "learning_rate": 0.0003876409552591901, "loss": 1.9843, "step": 1255 }, { "epoch": 1.73, "learning_rate": 0.00038414930611409525, "loss": 1.7489, "step": 1260 }, { "epoch": 1.73, "learning_rate": 0.000380663616997025, "loss": 2.071, "step": 1265 }, { "epoch": 1.74, "learning_rate": 0.00037718406723193576, "loss": 2.0351, "step": 1270 }, { "epoch": 1.75, "learning_rate": 0.0003737108358269408, "loss": 1.9891, "step": 1275 }, { "epoch": 1.75, "learning_rate": 0.00037024410146510014, "loss": 2.0055, "step": 1280 }, { "epoch": 1.76, "learning_rate": 0.0003667840424952288, "loss": 1.9104, "step": 1285 }, { "epoch": 1.77, "learning_rate": 0.00036333083692272083, "loss": 1.9239, "step": 1290 }, { "epoch": 1.77, "learning_rate": 0.00035988466240039206, "loss": 1.8592, "step": 1295 }, { "epoch": 1.78, "learning_rate": 0.0003564456962193403, "loss": 2.0883, "step": 1300 }, { "epoch": 1.78, "eval_loss": 2.012606382369995, "eval_runtime": 8.8391, "eval_samples_per_second": 13.35, "eval_steps_per_second": 1.697, "step": 1300 }, { "epoch": 1.79, "learning_rate": 0.0003530141152998255, "loss": 1.8061, "step": 1305 }, { "epoch": 1.79, "learning_rate": 0.0003495900961821662, "loss": 1.8564, "step": 1310 }, { "epoch": 1.8, "learning_rate": 0.0003461738150176588, "loss": 2.0208, "step": 1315 }, { "epoch": 1.81, "learning_rate": 0.00034276544755951444, "loss": 2.0325, "step": 1320 }, { "epoch": 1.82, "learning_rate": 0.00033936516915381774, "loss": 1.9853, "step": 1325 }, { "epoch": 1.82, "learning_rate": 0.00033597315473050596, "loss": 1.8627, "step": 1330 }, { "epoch": 1.83, "learning_rate": 0.00033258957879436893, "loss": 1.7516, "step": 1335 }, { "epoch": 1.84, "learning_rate": 0.00032921461541607225, "loss": 1.8258, "step": 1340 }, { "epoch": 1.84, "learning_rate": 0.0003258484382232023, "loss": 1.9302, "step": 1345 }, { "epoch": 1.85, "learning_rate": 0.00032249122039133273, "loss": 1.8517, "step": 1350 }, { "epoch": 1.86, "learning_rate": 0.00031914313463511635, "loss": 1.9234, "step": 1355 }, { "epoch": 1.86, "learning_rate": 0.000315804353199399, "loss": 1.7208, "step": 1360 }, { "epoch": 1.87, "learning_rate": 0.0003124750478503593, "loss": 1.9718, "step": 1365 }, { "epoch": 1.88, "learning_rate": 0.0003091553898666705, "loss": 1.9739, "step": 1370 }, { "epoch": 1.88, "learning_rate": 0.00030584555003069017, "loss": 1.9678, "step": 1375 }, { "epoch": 1.89, "learning_rate": 0.0003025456986196734, "loss": 1.859, "step": 1380 }, { "epoch": 1.9, "learning_rate": 0.0002992560053970135, "loss": 1.866, "step": 1385 }, { "epoch": 1.9, "learning_rate": 0.0002959766396035077, "loss": 1.8789, "step": 1390 }, { "epoch": 1.91, "learning_rate": 0.0002927077699486507, "loss": 1.9049, "step": 1395 }, { "epoch": 1.92, "learning_rate": 0.00028944956460195514, "loss": 1.9501, "step": 1400 }, { "epoch": 1.92, "eval_loss": 2.006899833679199, "eval_runtime": 8.8391, "eval_samples_per_second": 13.35, "eval_steps_per_second": 1.697, "step": 1400 }, { "epoch": 1.92, "learning_rate": 0.0002862021911843008, "loss": 1.9477, "step": 1405 }, { "epoch": 1.93, "learning_rate": 0.00028296581675930964, "loss": 1.8011, "step": 1410 }, { "epoch": 1.94, "learning_rate": 0.00027974060782475255, "loss": 1.9646, "step": 1415 }, { "epoch": 1.95, "learning_rate": 0.000276526730303983, "loss": 1.7943, "step": 1420 }, { "epoch": 1.95, "learning_rate": 0.0002733243495374013, "loss": 1.8871, "step": 1425 }, { "epoch": 1.96, "learning_rate": 0.000270133630273948, "loss": 1.9161, "step": 1430 }, { "epoch": 1.97, "learning_rate": 0.00026695473666262925, "loss": 1.7969, "step": 1435 }, { "epoch": 1.97, "learning_rate": 0.0002637878322440708, "loss": 1.9717, "step": 1440 }, { "epoch": 1.98, "learning_rate": 0.00026063307994210586, "loss": 1.8904, "step": 1445 }, { "epoch": 1.99, "learning_rate": 0.00025749064205539206, "loss": 1.8843, "step": 1450 }, { "epoch": 1.99, "learning_rate": 0.0002543606802490628, "loss": 1.9602, "step": 1455 }, { "epoch": 2.0, "learning_rate": 0.00025124335554640965, "loss": 1.8936, "step": 1460 }, { "epoch": 2.01, "learning_rate": 0.00024813882832059914, "loss": 1.6483, "step": 1465 }, { "epoch": 2.01, "learning_rate": 0.00024504725828642125, "loss": 1.7576, "step": 1470 }, { "epoch": 2.02, "learning_rate": 0.00024196880449207364, "loss": 1.7134, "step": 1475 }, { "epoch": 2.03, "learning_rate": 0.0002389036253109787, "loss": 1.7071, "step": 1480 }, { "epoch": 2.03, "learning_rate": 0.00023585187843363614, "loss": 1.7951, "step": 1485 }, { "epoch": 2.04, "learning_rate": 0.00023281372085951068, "loss": 1.6741, "step": 1490 }, { "epoch": 2.05, "learning_rate": 0.00022978930888895466, "loss": 1.6294, "step": 1495 }, { "epoch": 2.05, "learning_rate": 0.00022677879811516715, "loss": 1.7268, "step": 1500 }, { "epoch": 2.05, "eval_loss": 2.005824327468872, "eval_runtime": 8.8391, "eval_samples_per_second": 13.35, "eval_steps_per_second": 1.697, "step": 1500 } ], "logging_steps": 5, "max_steps": 2190, "num_train_epochs": 3, "save_steps": 100, "total_flos": 4.432472626272338e+17, "trial_name": null, "trial_params": null }