{ "best_metric": null, "best_model_checkpoint": null, "epoch": 60.0, "eval_steps": 500, "global_step": 197880, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.0535678188801295e-08, "loss": 12.7149, "step": 1 }, { "epoch": 0.3, "learning_rate": 5.0030321406913285e-05, "loss": 3.3584, "step": 990 }, { "epoch": 0.6, "learning_rate": 0.00010006064281382657, "loss": 0.3384, "step": 1980 }, { "epoch": 0.9, "learning_rate": 0.00015009096422073984, "loss": 0.2236, "step": 2970 }, { "epoch": 1.0, "eval_loss": 0.11203870922327042, "eval_max_distance": 133, "eval_mean_distance": 5, "eval_runtime": 0.5965, "eval_samples_per_second": 83.828, "eval_steps_per_second": 3.353, "step": 3298 }, { "epoch": 1.2, "learning_rate": 0.00020012128562765314, "loss": 0.1679, "step": 3960 }, { "epoch": 1.5, "learning_rate": 0.0002501516070345664, "loss": 0.1395, "step": 4950 }, { "epoch": 1.8, "learning_rate": 0.0003001819284414797, "loss": 0.1179, "step": 5940 }, { "epoch": 2.0, "eval_loss": 0.05475025996565819, "eval_max_distance": 82, "eval_mean_distance": 3, "eval_runtime": 0.5422, "eval_samples_per_second": 92.223, "eval_steps_per_second": 3.689, "step": 6596 }, { "epoch": 2.1, "learning_rate": 0.0003502122498483929, "loss": 0.1022, "step": 6930 }, { "epoch": 2.4, "learning_rate": 0.0004002425712553063, "loss": 0.0917, "step": 7920 }, { "epoch": 2.7, "learning_rate": 0.0004502728926622195, "loss": 0.0829, "step": 8910 }, { "epoch": 3.0, "eval_loss": 0.042510777711868286, "eval_max_distance": 46, "eval_mean_distance": 1, "eval_runtime": 0.5158, "eval_samples_per_second": 96.928, "eval_steps_per_second": 3.877, "step": 9894 }, { "epoch": 3.0, "learning_rate": 0.0005003032140691328, "loss": 0.0769, "step": 9900 }, { "epoch": 3.3, "learning_rate": 0.0005503335354760462, "loss": 0.0667, "step": 10890 }, { "epoch": 3.6, "learning_rate": 0.0006003638568829594, "loss": 0.0653, "step": 11880 }, { "epoch": 3.9, "learning_rate": 0.0006503941782898727, "loss": 0.0643, "step": 12870 }, { "epoch": 4.0, "eval_loss": 0.03110930137336254, "eval_max_distance": 64, "eval_mean_distance": 1, "eval_runtime": 0.4848, "eval_samples_per_second": 103.129, "eval_steps_per_second": 4.125, "step": 13192 }, { "epoch": 4.2, "learning_rate": 0.0007004244996967858, "loss": 0.0589, "step": 13860 }, { "epoch": 4.5, "learning_rate": 0.0007504548211036993, "loss": 0.0549, "step": 14850 }, { "epoch": 4.8, "learning_rate": 0.0008004851425106126, "loss": 0.0538, "step": 15840 }, { "epoch": 5.0, "eval_loss": 0.026651622727513313, "eval_max_distance": 48, "eval_mean_distance": 1, "eval_runtime": 0.5057, "eval_samples_per_second": 98.878, "eval_steps_per_second": 3.955, "step": 16490 }, { "epoch": 5.1, "learning_rate": 0.0008505154639175257, "loss": 0.048, "step": 16830 }, { "epoch": 5.4, "learning_rate": 0.000900545785324439, "loss": 0.0461, "step": 17820 }, { "epoch": 5.7, "learning_rate": 0.0009505761067313523, "loss": 0.0469, "step": 18810 }, { "epoch": 6.0, "eval_loss": 0.039574604481458664, "eval_max_distance": 80, "eval_mean_distance": 2, "eval_runtime": 0.5179, "eval_samples_per_second": 96.548, "eval_steps_per_second": 3.862, "step": 19788 }, { "epoch": 6.0, "learning_rate": 0.0009999326190957482, "loss": 0.0464, "step": 19800 }, { "epoch": 6.3, "learning_rate": 0.0009943736944949802, "loss": 0.0393, "step": 20790 }, { "epoch": 6.6, "learning_rate": 0.000988814769894212, "loss": 0.0426, "step": 21780 }, { "epoch": 6.9, "learning_rate": 0.000983255845293444, "loss": 0.0385, "step": 22770 }, { "epoch": 7.0, "eval_loss": 0.026188833639025688, "eval_max_distance": 73, "eval_mean_distance": 2, "eval_runtime": 0.4896, "eval_samples_per_second": 102.115, "eval_steps_per_second": 4.085, "step": 23086 }, { "epoch": 7.2, "learning_rate": 0.0009776969206926756, "loss": 0.034, "step": 23760 }, { "epoch": 7.5, "learning_rate": 0.0009721379960919076, "loss": 0.0315, "step": 24750 }, { "epoch": 7.8, "learning_rate": 0.0009665790714911395, "loss": 0.0316, "step": 25740 }, { "epoch": 8.0, "eval_loss": 0.02234221063554287, "eval_max_distance": 40, "eval_mean_distance": 1, "eval_runtime": 0.4837, "eval_samples_per_second": 103.365, "eval_steps_per_second": 4.135, "step": 26384 }, { "epoch": 8.1, "learning_rate": 0.0009610201468903713, "loss": 0.0305, "step": 26730 }, { "epoch": 8.41, "learning_rate": 0.0009554612222896032, "loss": 0.0271, "step": 27720 }, { "epoch": 8.71, "learning_rate": 0.0009499022976888349, "loss": 0.0263, "step": 28710 }, { "epoch": 9.0, "eval_loss": 0.023996921256184578, "eval_max_distance": 69, "eval_mean_distance": 1, "eval_runtime": 0.4894, "eval_samples_per_second": 102.167, "eval_steps_per_second": 4.087, "step": 29682 }, { "epoch": 9.01, "learning_rate": 0.0009443433730880669, "loss": 0.0282, "step": 29700 }, { "epoch": 9.31, "learning_rate": 0.0009387844484872987, "loss": 0.0229, "step": 30690 }, { "epoch": 9.61, "learning_rate": 0.0009332255238865306, "loss": 0.0226, "step": 31680 }, { "epoch": 9.91, "learning_rate": 0.0009276665992857625, "loss": 0.0226, "step": 32670 }, { "epoch": 10.0, "eval_loss": 0.02030733972787857, "eval_max_distance": 60, "eval_mean_distance": 1, "eval_runtime": 0.4797, "eval_samples_per_second": 104.236, "eval_steps_per_second": 4.169, "step": 32980 }, { "epoch": 10.21, "learning_rate": 0.0009221076746849943, "loss": 0.0209, "step": 33660 }, { "epoch": 10.51, "learning_rate": 0.0009165487500842261, "loss": 0.02, "step": 34650 }, { "epoch": 10.81, "learning_rate": 0.000910989825483458, "loss": 0.0203, "step": 35640 }, { "epoch": 11.0, "eval_loss": 0.017732510343194008, "eval_max_distance": 54, "eval_mean_distance": 1, "eval_runtime": 0.4814, "eval_samples_per_second": 103.858, "eval_steps_per_second": 4.154, "step": 36278 }, { "epoch": 11.11, "learning_rate": 0.0009054309008826899, "loss": 0.0183, "step": 36630 }, { "epoch": 11.41, "learning_rate": 0.0008998719762819217, "loss": 0.0174, "step": 37620 }, { "epoch": 11.71, "learning_rate": 0.0008943130516811536, "loss": 0.0178, "step": 38610 }, { "epoch": 12.0, "eval_loss": 0.018777821213006973, "eval_max_distance": 61, "eval_mean_distance": 1, "eval_runtime": 0.4893, "eval_samples_per_second": 102.185, "eval_steps_per_second": 4.087, "step": 39576 }, { "epoch": 12.01, "learning_rate": 0.0008887541270803853, "loss": 0.0174, "step": 39600 }, { "epoch": 12.31, "learning_rate": 0.0008831952024796173, "loss": 0.0153, "step": 40590 }, { "epoch": 12.61, "learning_rate": 0.0008776362778788492, "loss": 0.015, "step": 41580 }, { "epoch": 12.91, "learning_rate": 0.000872077353278081, "loss": 0.0154, "step": 42570 }, { "epoch": 13.0, "eval_loss": 0.029613599181175232, "eval_max_distance": 65, "eval_mean_distance": 1, "eval_runtime": 0.4669, "eval_samples_per_second": 107.079, "eval_steps_per_second": 4.283, "step": 42874 }, { "epoch": 13.21, "learning_rate": 0.0008665184286773129, "loss": 0.014, "step": 43560 }, { "epoch": 13.51, "learning_rate": 0.0008609595040765447, "loss": 0.0135, "step": 44550 }, { "epoch": 13.81, "learning_rate": 0.0008554005794757766, "loss": 0.0138, "step": 45540 }, { "epoch": 14.0, "eval_loss": 0.02011469565331936, "eval_max_distance": 55, "eval_mean_distance": 1, "eval_runtime": 0.5034, "eval_samples_per_second": 99.332, "eval_steps_per_second": 3.973, "step": 46172 }, { "epoch": 14.11, "learning_rate": 0.0008498416548750084, "loss": 0.0128, "step": 46530 }, { "epoch": 14.41, "learning_rate": 0.0008442827302742403, "loss": 0.0121, "step": 47520 }, { "epoch": 14.71, "learning_rate": 0.0008387238056734722, "loss": 0.012, "step": 48510 }, { "epoch": 15.0, "eval_loss": 0.026753582060337067, "eval_max_distance": 67, "eval_mean_distance": 1, "eval_runtime": 0.4716, "eval_samples_per_second": 106.031, "eval_steps_per_second": 4.241, "step": 49470 }, { "epoch": 15.01, "learning_rate": 0.000833164881072704, "loss": 0.0123, "step": 49500 }, { "epoch": 15.31, "learning_rate": 0.0008276059564719359, "loss": 0.0104, "step": 50490 }, { "epoch": 15.61, "learning_rate": 0.0008220470318711677, "loss": 0.0109, "step": 51480 }, { "epoch": 15.91, "learning_rate": 0.0008164881072703996, "loss": 0.0109, "step": 52470 }, { "epoch": 16.0, "eval_loss": 0.01633359119296074, "eval_max_distance": 35, "eval_mean_distance": 1, "eval_runtime": 0.4971, "eval_samples_per_second": 100.579, "eval_steps_per_second": 4.023, "step": 52768 }, { "epoch": 16.21, "learning_rate": 0.0008109291826696314, "loss": 0.0098, "step": 53460 }, { "epoch": 16.51, "learning_rate": 0.0008053702580688633, "loss": 0.0094, "step": 54450 }, { "epoch": 16.81, "learning_rate": 0.0007998113334680952, "loss": 0.0105, "step": 55440 }, { "epoch": 17.0, "eval_loss": 0.013592842034995556, "eval_max_distance": 26, "eval_mean_distance": 1, "eval_runtime": 0.48, "eval_samples_per_second": 104.157, "eval_steps_per_second": 4.166, "step": 56066 }, { "epoch": 17.11, "learning_rate": 0.000794252408867327, "loss": 0.0097, "step": 56430 }, { "epoch": 17.41, "learning_rate": 0.0007886934842665589, "loss": 0.0083, "step": 57420 }, { "epoch": 17.71, "learning_rate": 0.0007831345596657907, "loss": 0.0092, "step": 58410 }, { "epoch": 18.0, "eval_loss": 0.020196767523884773, "eval_max_distance": 65, "eval_mean_distance": 1, "eval_runtime": 0.4567, "eval_samples_per_second": 109.487, "eval_steps_per_second": 4.379, "step": 59364 }, { "epoch": 18.01, "learning_rate": 0.0007775756350650226, "loss": 0.009, "step": 59400 }, { "epoch": 18.31, "learning_rate": 0.0007720167104642545, "loss": 0.0075, "step": 60390 }, { "epoch": 18.61, "learning_rate": 0.0007664577858634864, "loss": 0.0078, "step": 61380 }, { "epoch": 18.91, "learning_rate": 0.0007608988612627181, "loss": 0.0087, "step": 62370 }, { "epoch": 19.0, "eval_loss": 0.02213277295231819, "eval_max_distance": 65, "eval_mean_distance": 1, "eval_runtime": 0.4707, "eval_samples_per_second": 106.233, "eval_steps_per_second": 4.249, "step": 62662 }, { "epoch": 19.21, "learning_rate": 0.00075533993666195, "loss": 0.0077, "step": 63360 }, { "epoch": 19.51, "learning_rate": 0.0007497810120611818, "loss": 0.0071, "step": 64350 }, { "epoch": 19.81, "learning_rate": 0.0007442220874604138, "loss": 0.0075, "step": 65340 }, { "epoch": 20.0, "eval_loss": 0.020336275920271873, "eval_max_distance": 33, "eval_mean_distance": 1, "eval_runtime": 0.4773, "eval_samples_per_second": 104.749, "eval_steps_per_second": 4.19, "step": 65960 }, { "epoch": 20.11, "learning_rate": 0.0007386631628596457, "loss": 0.0073, "step": 66330 }, { "epoch": 20.41, "learning_rate": 0.0007331042382588774, "loss": 0.0063, "step": 67320 }, { "epoch": 20.71, "learning_rate": 0.0007275453136581093, "loss": 0.0067, "step": 68310 }, { "epoch": 21.0, "eval_loss": 0.022562623023986816, "eval_max_distance": 26, "eval_mean_distance": 1, "eval_runtime": 0.5033, "eval_samples_per_second": 99.35, "eval_steps_per_second": 3.974, "step": 69258 }, { "epoch": 21.01, "learning_rate": 0.0007219863890573411, "loss": 0.007, "step": 69300 }, { "epoch": 21.31, "learning_rate": 0.000716427464456573, "loss": 0.0061, "step": 70290 }, { "epoch": 21.61, "learning_rate": 0.0007108685398558049, "loss": 0.006, "step": 71280 }, { "epoch": 21.91, "learning_rate": 0.0007053096152550368, "loss": 0.0062, "step": 72270 }, { "epoch": 22.0, "eval_loss": 0.01839238964021206, "eval_max_distance": 24, "eval_mean_distance": 1, "eval_runtime": 0.4856, "eval_samples_per_second": 102.959, "eval_steps_per_second": 4.118, "step": 72556 }, { "epoch": 22.21, "learning_rate": 0.0006997506906542685, "loss": 0.0057, "step": 73260 }, { "epoch": 22.51, "learning_rate": 0.0006941917660535004, "loss": 0.0058, "step": 74250 }, { "epoch": 22.81, "learning_rate": 0.0006886328414527323, "loss": 0.0059, "step": 75240 }, { "epoch": 23.0, "eval_loss": 0.013111269101500511, "eval_max_distance": 18, "eval_mean_distance": 0, "eval_runtime": 0.5001, "eval_samples_per_second": 99.983, "eval_steps_per_second": 3.999, "step": 75854 }, { "epoch": 23.11, "learning_rate": 0.0006830739168519642, "loss": 0.0055, "step": 76230 }, { "epoch": 23.41, "learning_rate": 0.0006775149922511961, "loss": 0.0051, "step": 77220 }, { "epoch": 23.71, "learning_rate": 0.0006719560676504279, "loss": 0.0054, "step": 78210 }, { "epoch": 24.0, "eval_loss": 0.026959825307130814, "eval_max_distance": 58, "eval_mean_distance": 1, "eval_runtime": 0.4725, "eval_samples_per_second": 105.825, "eval_steps_per_second": 4.233, "step": 79152 }, { "epoch": 24.01, "learning_rate": 0.0006663971430496597, "loss": 0.0055, "step": 79200 }, { "epoch": 24.31, "learning_rate": 0.0006608382184488915, "loss": 0.0046, "step": 80190 }, { "epoch": 24.61, "learning_rate": 0.0006552792938481235, "loss": 0.005, "step": 81180 }, { "epoch": 24.92, "learning_rate": 0.0006497203692473554, "loss": 0.0052, "step": 82170 }, { "epoch": 25.0, "eval_loss": 0.024379713460803032, "eval_max_distance": 45, "eval_mean_distance": 1, "eval_runtime": 0.47, "eval_samples_per_second": 106.387, "eval_steps_per_second": 4.255, "step": 82450 }, { "epoch": 25.22, "learning_rate": 0.0006441614446465872, "loss": 0.0048, "step": 83160 }, { "epoch": 25.52, "learning_rate": 0.000638602520045819, "loss": 0.0045, "step": 84150 }, { "epoch": 25.82, "learning_rate": 0.0006330435954450508, "loss": 0.0044, "step": 85140 }, { "epoch": 26.0, "eval_loss": 0.014908027835190296, "eval_max_distance": 23, "eval_mean_distance": 1, "eval_runtime": 0.4819, "eval_samples_per_second": 103.748, "eval_steps_per_second": 4.15, "step": 85748 }, { "epoch": 26.12, "learning_rate": 0.0006274846708442828, "loss": 0.0044, "step": 86130 }, { "epoch": 26.42, "learning_rate": 0.0006219257462435146, "loss": 0.0042, "step": 87120 }, { "epoch": 26.72, "learning_rate": 0.0006163668216427465, "loss": 0.0043, "step": 88110 }, { "epoch": 27.0, "eval_loss": 0.0256387647241354, "eval_max_distance": 63, "eval_mean_distance": 1, "eval_runtime": 0.5104, "eval_samples_per_second": 97.954, "eval_steps_per_second": 3.918, "step": 89046 }, { "epoch": 27.02, "learning_rate": 0.0006108078970419783, "loss": 0.0043, "step": 89100 }, { "epoch": 27.32, "learning_rate": 0.0006052489724412101, "loss": 0.004, "step": 90090 }, { "epoch": 27.62, "learning_rate": 0.0005996900478404421, "loss": 0.0037, "step": 91080 }, { "epoch": 27.92, "learning_rate": 0.0005941311232396739, "loss": 0.0038, "step": 92070 }, { "epoch": 28.0, "eval_loss": 0.017227506265044212, "eval_max_distance": 30, "eval_mean_distance": 1, "eval_runtime": 0.4632, "eval_samples_per_second": 107.934, "eval_steps_per_second": 4.317, "step": 92344 }, { "epoch": 28.22, "learning_rate": 0.0005885721986389058, "loss": 0.0037, "step": 93060 }, { "epoch": 28.52, "learning_rate": 0.0005830132740381376, "loss": 0.0038, "step": 94050 }, { "epoch": 28.82, "learning_rate": 0.0005774543494373694, "loss": 0.0036, "step": 95040 }, { "epoch": 29.0, "eval_loss": 0.022354494780302048, "eval_max_distance": 37, "eval_mean_distance": 1, "eval_runtime": 0.4846, "eval_samples_per_second": 103.187, "eval_steps_per_second": 4.127, "step": 95642 }, { "epoch": 29.12, "learning_rate": 0.0005718954248366013, "loss": 0.0037, "step": 96030 }, { "epoch": 29.42, "learning_rate": 0.0005663365002358332, "loss": 0.0033, "step": 97020 }, { "epoch": 29.72, "learning_rate": 0.000560777575635065, "loss": 0.0033, "step": 98010 }, { "epoch": 30.0, "eval_loss": 0.01936698891222477, "eval_max_distance": 30, "eval_mean_distance": 1, "eval_runtime": 0.4829, "eval_samples_per_second": 103.544, "eval_steps_per_second": 4.142, "step": 98940 }, { "epoch": 30.02, "learning_rate": 0.0005552186510342969, "loss": 0.0035, "step": 99000 }, { "epoch": 30.32, "learning_rate": 0.0005496597264335288, "loss": 0.003, "step": 99990 }, { "epoch": 30.62, "learning_rate": 0.0005441008018327606, "loss": 0.0033, "step": 100980 }, { "epoch": 30.92, "learning_rate": 0.0005385418772319925, "loss": 0.0031, "step": 101970 }, { "epoch": 31.0, "eval_loss": 0.023793019354343414, "eval_max_distance": 59, "eval_mean_distance": 1, "eval_runtime": 0.5012, "eval_samples_per_second": 99.754, "eval_steps_per_second": 3.99, "step": 102238 }, { "epoch": 31.22, "learning_rate": 0.0005329829526312243, "loss": 0.0029, "step": 102960 }, { "epoch": 31.52, "learning_rate": 0.0005274240280304562, "loss": 0.003, "step": 103950 }, { "epoch": 31.82, "learning_rate": 0.000521865103429688, "loss": 0.003, "step": 104940 }, { "epoch": 32.0, "eval_loss": 0.02003033086657524, "eval_max_distance": 28, "eval_mean_distance": 1, "eval_runtime": 0.475, "eval_samples_per_second": 105.268, "eval_steps_per_second": 4.211, "step": 105536 }, { "epoch": 32.12, "learning_rate": 0.00051630617882892, "loss": 0.0028, "step": 105930 }, { "epoch": 32.42, "learning_rate": 0.0005107472542281517, "loss": 0.0027, "step": 106920 }, { "epoch": 32.72, "learning_rate": 0.0005051883296273836, "loss": 0.0028, "step": 107910 }, { "epoch": 33.0, "eval_loss": 0.01606147363781929, "eval_max_distance": 18, "eval_mean_distance": 0, "eval_runtime": 0.4673, "eval_samples_per_second": 107.008, "eval_steps_per_second": 4.28, "step": 108834 }, { "epoch": 33.02, "learning_rate": 0.0004996294050266155, "loss": 0.0028, "step": 108900 }, { "epoch": 33.32, "learning_rate": 0.0004940704804258473, "loss": 0.0026, "step": 109890 }, { "epoch": 33.62, "learning_rate": 0.0004885115558250792, "loss": 0.0026, "step": 110880 }, { "epoch": 33.92, "learning_rate": 0.00048295263122431103, "loss": 0.0027, "step": 111870 }, { "epoch": 34.0, "eval_loss": 0.021506933495402336, "eval_max_distance": 26, "eval_mean_distance": 1, "eval_runtime": 0.4763, "eval_samples_per_second": 104.968, "eval_steps_per_second": 4.199, "step": 112132 }, { "epoch": 34.22, "learning_rate": 0.00047739370662354294, "loss": 0.0024, "step": 112860 }, { "epoch": 34.52, "learning_rate": 0.00047183478202277474, "loss": 0.0023, "step": 113850 }, { "epoch": 34.82, "learning_rate": 0.0004662758574220066, "loss": 0.0025, "step": 114840 }, { "epoch": 35.0, "eval_loss": 0.019841769710183144, "eval_max_distance": 19, "eval_mean_distance": 0, "eval_runtime": 0.4767, "eval_samples_per_second": 104.884, "eval_steps_per_second": 4.195, "step": 115430 }, { "epoch": 35.12, "learning_rate": 0.00046071693282123845, "loss": 0.0023, "step": 115830 }, { "epoch": 35.42, "learning_rate": 0.0004551580082204703, "loss": 0.0021, "step": 116820 }, { "epoch": 35.72, "learning_rate": 0.0004495990836197022, "loss": 0.0023, "step": 117810 }, { "epoch": 36.0, "eval_loss": 0.01675160974264145, "eval_max_distance": 24, "eval_mean_distance": 0, "eval_runtime": 0.4591, "eval_samples_per_second": 108.901, "eval_steps_per_second": 4.356, "step": 118728 }, { "epoch": 36.02, "learning_rate": 0.000444040159018934, "loss": 0.0023, "step": 118800 }, { "epoch": 36.32, "learning_rate": 0.0004384812344181659, "loss": 0.0021, "step": 119790 }, { "epoch": 36.62, "learning_rate": 0.0004329223098173978, "loss": 0.0021, "step": 120780 }, { "epoch": 36.92, "learning_rate": 0.0004273633852166296, "loss": 0.002, "step": 121770 }, { "epoch": 37.0, "eval_loss": 0.022139811888337135, "eval_max_distance": 32, "eval_mean_distance": 1, "eval_runtime": 0.4713, "eval_samples_per_second": 106.08, "eval_steps_per_second": 4.243, "step": 122026 }, { "epoch": 37.22, "learning_rate": 0.0004218044606158615, "loss": 0.002, "step": 122760 }, { "epoch": 37.52, "learning_rate": 0.00041624553601509335, "loss": 0.0019, "step": 123750 }, { "epoch": 37.82, "learning_rate": 0.00041068661141432515, "loss": 0.0019, "step": 124740 }, { "epoch": 38.0, "eval_loss": 0.02140805311501026, "eval_max_distance": 32, "eval_mean_distance": 1, "eval_runtime": 0.4808, "eval_samples_per_second": 104.001, "eval_steps_per_second": 4.16, "step": 125324 }, { "epoch": 38.12, "learning_rate": 0.00040512768681355706, "loss": 0.0019, "step": 125730 }, { "epoch": 38.42, "learning_rate": 0.0003995687622127889, "loss": 0.0018, "step": 126720 }, { "epoch": 38.72, "learning_rate": 0.0003940098376120208, "loss": 0.0017, "step": 127710 }, { "epoch": 39.0, "eval_loss": 0.018618840724229813, "eval_max_distance": 19, "eval_mean_distance": 0, "eval_runtime": 0.4752, "eval_samples_per_second": 105.222, "eval_steps_per_second": 4.209, "step": 128622 }, { "epoch": 39.02, "learning_rate": 0.00038845091301125263, "loss": 0.002, "step": 128700 }, { "epoch": 39.32, "learning_rate": 0.0003828919884104845, "loss": 0.0016, "step": 129690 }, { "epoch": 39.62, "learning_rate": 0.00037733306380971634, "loss": 0.0017, "step": 130680 }, { "epoch": 39.92, "learning_rate": 0.0003717741392089482, "loss": 0.0017, "step": 131670 }, { "epoch": 40.0, "eval_loss": 0.017086679115891457, "eval_max_distance": 23, "eval_mean_distance": 0, "eval_runtime": 0.458, "eval_samples_per_second": 109.178, "eval_steps_per_second": 4.367, "step": 131920 }, { "epoch": 40.22, "learning_rate": 0.00036621521460818, "loss": 0.0015, "step": 132660 }, { "epoch": 40.52, "learning_rate": 0.0003606562900074119, "loss": 0.0016, "step": 133650 }, { "epoch": 40.82, "learning_rate": 0.00035509736540664376, "loss": 0.0016, "step": 134640 }, { "epoch": 41.0, "eval_loss": 0.01638130471110344, "eval_max_distance": 17, "eval_mean_distance": 0, "eval_runtime": 0.4581, "eval_samples_per_second": 109.147, "eval_steps_per_second": 4.366, "step": 135218 }, { "epoch": 41.12, "learning_rate": 0.0003495384408058756, "loss": 0.0015, "step": 135630 }, { "epoch": 41.43, "learning_rate": 0.0003439795162051075, "loss": 0.0014, "step": 136620 }, { "epoch": 41.73, "learning_rate": 0.00033842059160433933, "loss": 0.0015, "step": 137610 }, { "epoch": 42.0, "eval_loss": 0.016585057601332664, "eval_max_distance": 21, "eval_mean_distance": 1, "eval_runtime": 0.479, "eval_samples_per_second": 104.393, "eval_steps_per_second": 4.176, "step": 138516 }, { "epoch": 42.03, "learning_rate": 0.0003328616670035712, "loss": 0.0014, "step": 138600 }, { "epoch": 42.33, "learning_rate": 0.00032730274240280304, "loss": 0.0015, "step": 139590 }, { "epoch": 42.63, "learning_rate": 0.00032174381780203495, "loss": 0.0015, "step": 140580 }, { "epoch": 42.93, "learning_rate": 0.00031618489320126675, "loss": 0.0014, "step": 141570 }, { "epoch": 43.0, "eval_loss": 0.016704820096492767, "eval_max_distance": 21, "eval_mean_distance": 0, "eval_runtime": 0.4809, "eval_samples_per_second": 103.976, "eval_steps_per_second": 4.159, "step": 141814 }, { "epoch": 43.23, "learning_rate": 0.0003106259686004986, "loss": 0.0011, "step": 142560 }, { "epoch": 43.53, "learning_rate": 0.0003050670439997305, "loss": 0.0013, "step": 143550 }, { "epoch": 43.83, "learning_rate": 0.0002995081193989623, "loss": 0.0019, "step": 144540 }, { "epoch": 44.0, "eval_loss": 0.019240867346525192, "eval_max_distance": 32, "eval_mean_distance": 1, "eval_runtime": 0.6494, "eval_samples_per_second": 76.999, "eval_steps_per_second": 3.08, "step": 145112 }, { "epoch": 44.13, "learning_rate": 0.00029394919479819423, "loss": 0.0012, "step": 145530 }, { "epoch": 44.43, "learning_rate": 0.00028839027019742603, "loss": 0.0011, "step": 146520 }, { "epoch": 44.73, "learning_rate": 0.0002828313455966579, "loss": 0.0011, "step": 147510 }, { "epoch": 45.0, "eval_loss": 0.02091757208108902, "eval_max_distance": 27, "eval_mean_distance": 1, "eval_runtime": 0.4646, "eval_samples_per_second": 107.608, "eval_steps_per_second": 4.304, "step": 148410 }, { "epoch": 45.03, "learning_rate": 0.0002772724209958898, "loss": 0.0011, "step": 148500 }, { "epoch": 45.33, "learning_rate": 0.0002717134963951216, "loss": 0.0011, "step": 149490 }, { "epoch": 45.63, "learning_rate": 0.0002661545717943535, "loss": 0.001, "step": 150480 }, { "epoch": 45.93, "learning_rate": 0.00026059564719358537, "loss": 0.0011, "step": 151470 }, { "epoch": 46.0, "eval_loss": 0.02175173908472061, "eval_max_distance": 23, "eval_mean_distance": 0, "eval_runtime": 0.4863, "eval_samples_per_second": 102.827, "eval_steps_per_second": 4.113, "step": 151708 }, { "epoch": 46.23, "learning_rate": 0.00025503672259281717, "loss": 0.001, "step": 152460 }, { "epoch": 46.53, "learning_rate": 0.0002494777979920491, "loss": 0.001, "step": 153450 }, { "epoch": 46.83, "learning_rate": 0.0002439188733912809, "loss": 0.001, "step": 154440 }, { "epoch": 47.0, "eval_loss": 0.01951581984758377, "eval_max_distance": 25, "eval_mean_distance": 0, "eval_runtime": 0.4608, "eval_samples_per_second": 108.512, "eval_steps_per_second": 4.34, "step": 155006 }, { "epoch": 47.13, "learning_rate": 0.0002383599487905128, "loss": 0.001, "step": 155430 }, { "epoch": 47.43, "learning_rate": 0.00023280102418974464, "loss": 0.0009, "step": 156420 }, { "epoch": 47.73, "learning_rate": 0.00022724209958897647, "loss": 0.0009, "step": 157410 }, { "epoch": 48.0, "eval_loss": 0.01657327450811863, "eval_max_distance": 15, "eval_mean_distance": 0, "eval_runtime": 0.4688, "eval_samples_per_second": 106.651, "eval_steps_per_second": 4.266, "step": 158304 }, { "epoch": 48.03, "learning_rate": 0.00022168317498820833, "loss": 0.0009, "step": 158400 }, { "epoch": 48.33, "learning_rate": 0.0002161242503874402, "loss": 0.0008, "step": 159390 }, { "epoch": 48.63, "learning_rate": 0.00021056532578667207, "loss": 0.0008, "step": 160380 }, { "epoch": 48.93, "learning_rate": 0.00020500640118590392, "loss": 0.0008, "step": 161370 }, { "epoch": 49.0, "eval_loss": 0.020961837843060493, "eval_max_distance": 31, "eval_mean_distance": 1, "eval_runtime": 0.4893, "eval_samples_per_second": 102.188, "eval_steps_per_second": 4.088, "step": 161602 }, { "epoch": 49.23, "learning_rate": 0.00019944747658513578, "loss": 0.0008, "step": 162360 }, { "epoch": 49.53, "learning_rate": 0.00019388855198436764, "loss": 0.0008, "step": 163350 }, { "epoch": 49.83, "learning_rate": 0.0001883296273835995, "loss": 0.0008, "step": 164340 }, { "epoch": 50.0, "eval_loss": 0.022983456030488014, "eval_max_distance": 22, "eval_mean_distance": 0, "eval_runtime": 0.479, "eval_samples_per_second": 104.39, "eval_steps_per_second": 4.176, "step": 164900 }, { "epoch": 50.13, "learning_rate": 0.00018277070278283135, "loss": 0.0008, "step": 165330 }, { "epoch": 50.43, "learning_rate": 0.0001772117781820632, "loss": 0.0007, "step": 166320 }, { "epoch": 50.73, "learning_rate": 0.00017165285358129506, "loss": 0.0008, "step": 167310 }, { "epoch": 51.0, "eval_loss": 0.018444916233420372, "eval_max_distance": 15, "eval_mean_distance": 0, "eval_runtime": 0.4866, "eval_samples_per_second": 102.75, "eval_steps_per_second": 4.11, "step": 168198 }, { "epoch": 51.03, "learning_rate": 0.00016609392898052691, "loss": 0.0007, "step": 168300 }, { "epoch": 51.33, "learning_rate": 0.0001605350043797588, "loss": 0.0007, "step": 169290 }, { "epoch": 51.63, "learning_rate": 0.00015497607977899065, "loss": 0.0007, "step": 170280 }, { "epoch": 51.93, "learning_rate": 0.00014941715517822248, "loss": 0.0007, "step": 171270 }, { "epoch": 52.0, "eval_loss": 0.01832015998661518, "eval_max_distance": 15, "eval_mean_distance": 0, "eval_runtime": 0.4672, "eval_samples_per_second": 107.025, "eval_steps_per_second": 4.281, "step": 171496 }, { "epoch": 52.23, "learning_rate": 0.00014385823057745434, "loss": 0.0006, "step": 172260 }, { "epoch": 52.53, "learning_rate": 0.00013829930597668622, "loss": 0.0006, "step": 173250 }, { "epoch": 52.83, "learning_rate": 0.00013274038137591808, "loss": 0.0006, "step": 174240 }, { "epoch": 53.0, "eval_loss": 0.023398304358124733, "eval_max_distance": 32, "eval_mean_distance": 1, "eval_runtime": 0.4822, "eval_samples_per_second": 103.698, "eval_steps_per_second": 4.148, "step": 174794 }, { "epoch": 53.13, "learning_rate": 0.0001271814567751499, "loss": 0.0006, "step": 175230 }, { "epoch": 53.43, "learning_rate": 0.00012162253217438179, "loss": 0.0006, "step": 176220 }, { "epoch": 53.73, "learning_rate": 0.00011606360757361364, "loss": 0.0005, "step": 177210 }, { "epoch": 54.0, "eval_loss": 0.022733934223651886, "eval_max_distance": 24, "eval_mean_distance": 0, "eval_runtime": 0.4789, "eval_samples_per_second": 104.41, "eval_steps_per_second": 4.176, "step": 178092 }, { "epoch": 54.03, "learning_rate": 0.0001105046829728455, "loss": 0.0005, "step": 178200 }, { "epoch": 54.33, "learning_rate": 0.00010494575837207735, "loss": 0.0005, "step": 179190 }, { "epoch": 54.63, "learning_rate": 9.938683377130921e-05, "loss": 0.0005, "step": 180180 }, { "epoch": 54.93, "learning_rate": 9.382790917054107e-05, "loss": 0.0004, "step": 181170 }, { "epoch": 55.0, "eval_loss": 0.018815917894244194, "eval_max_distance": 15, "eval_mean_distance": 0, "eval_runtime": 0.4798, "eval_samples_per_second": 104.21, "eval_steps_per_second": 4.168, "step": 181390 }, { "epoch": 55.23, "learning_rate": 8.826898456977294e-05, "loss": 0.0005, "step": 182160 }, { "epoch": 55.53, "learning_rate": 8.271005996900478e-05, "loss": 0.0004, "step": 183150 }, { "epoch": 55.83, "learning_rate": 7.715113536823665e-05, "loss": 0.0005, "step": 184140 }, { "epoch": 56.0, "eval_loss": 0.01906018890440464, "eval_max_distance": 15, "eval_mean_distance": 0, "eval_runtime": 0.48, "eval_samples_per_second": 104.168, "eval_steps_per_second": 4.167, "step": 184688 }, { "epoch": 56.13, "learning_rate": 7.15922107674685e-05, "loss": 0.0004, "step": 185130 }, { "epoch": 56.43, "learning_rate": 6.603328616670036e-05, "loss": 0.0004, "step": 186120 }, { "epoch": 56.73, "learning_rate": 6.0474361565932214e-05, "loss": 0.0004, "step": 187110 }, { "epoch": 57.0, "eval_loss": 0.018282707780599594, "eval_max_distance": 15, "eval_mean_distance": 0, "eval_runtime": 0.4797, "eval_samples_per_second": 104.233, "eval_steps_per_second": 4.169, "step": 187986 }, { "epoch": 57.03, "learning_rate": 5.491543696516407e-05, "loss": 0.0004, "step": 188100 }, { "epoch": 57.33, "learning_rate": 4.935651236439593e-05, "loss": 0.0004, "step": 189090 }, { "epoch": 57.63, "learning_rate": 4.379758776362779e-05, "loss": 0.0004, "step": 190080 }, { "epoch": 57.94, "learning_rate": 3.823866316285965e-05, "loss": 0.0003, "step": 191070 }, { "epoch": 58.0, "eval_loss": 0.018019111827015877, "eval_max_distance": 15, "eval_mean_distance": 0, "eval_runtime": 0.4619, "eval_samples_per_second": 108.242, "eval_steps_per_second": 4.33, "step": 191284 }, { "epoch": 58.24, "learning_rate": 3.2679738562091506e-05, "loss": 0.0004, "step": 192060 }, { "epoch": 58.54, "learning_rate": 2.7120813961323362e-05, "loss": 0.0004, "step": 193050 }, { "epoch": 58.84, "learning_rate": 2.1561889360555218e-05, "loss": 0.0003, "step": 194040 }, { "epoch": 59.0, "eval_loss": 0.01795811764895916, "eval_max_distance": 15, "eval_mean_distance": 0, "eval_runtime": 0.475, "eval_samples_per_second": 105.265, "eval_steps_per_second": 4.211, "step": 194582 }, { "epoch": 59.14, "learning_rate": 1.6002964759787074e-05, "loss": 0.0004, "step": 195030 }, { "epoch": 59.44, "learning_rate": 1.0444040159018933e-05, "loss": 0.0004, "step": 196020 }, { "epoch": 59.74, "learning_rate": 4.885115558250792e-06, "loss": 0.0004, "step": 197010 }, { "epoch": 60.0, "eval_loss": 0.017678335309028625, "eval_max_distance": 15, "eval_mean_distance": 0, "eval_runtime": 0.4798, "eval_samples_per_second": 104.214, "eval_steps_per_second": 4.169, "step": 197880 }, { "epoch": 60.0, "step": 197880, "total_flos": 1.1400109636858675e+17, "train_loss": 0.031872519274052644, "train_runtime": 16366.2485, "train_samples_per_second": 362.656, "train_steps_per_second": 12.091 } ], "logging_steps": 990, "max_steps": 197880, "num_train_epochs": 60, "save_steps": 1979, "total_flos": 1.1400109636858675e+17, "trial_name": null, "trial_params": null }