|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 60.0, |
|
"eval_steps": 500, |
|
"global_step": 197880, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.0535678188801295e-08, |
|
"loss": 12.7149, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 5.0030321406913285e-05, |
|
"loss": 3.3584, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00010006064281382657, |
|
"loss": 0.3384, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00015009096422073984, |
|
"loss": 0.2236, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.11203870922327042, |
|
"eval_max_distance": 133, |
|
"eval_mean_distance": 5, |
|
"eval_runtime": 0.5965, |
|
"eval_samples_per_second": 83.828, |
|
"eval_steps_per_second": 3.353, |
|
"step": 3298 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00020012128562765314, |
|
"loss": 0.1679, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.0002501516070345664, |
|
"loss": 0.1395, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.0003001819284414797, |
|
"loss": 0.1179, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.05475025996565819, |
|
"eval_max_distance": 82, |
|
"eval_mean_distance": 3, |
|
"eval_runtime": 0.5422, |
|
"eval_samples_per_second": 92.223, |
|
"eval_steps_per_second": 3.689, |
|
"step": 6596 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.0003502122498483929, |
|
"loss": 0.1022, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.0004002425712553063, |
|
"loss": 0.0917, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.0004502728926622195, |
|
"loss": 0.0829, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.042510777711868286, |
|
"eval_max_distance": 46, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.5158, |
|
"eval_samples_per_second": 96.928, |
|
"eval_steps_per_second": 3.877, |
|
"step": 9894 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.0005003032140691328, |
|
"loss": 0.0769, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.0005503335354760462, |
|
"loss": 0.0667, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 0.0006003638568829594, |
|
"loss": 0.0653, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 0.0006503941782898727, |
|
"loss": 0.0643, |
|
"step": 12870 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.03110930137336254, |
|
"eval_max_distance": 64, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.4848, |
|
"eval_samples_per_second": 103.129, |
|
"eval_steps_per_second": 4.125, |
|
"step": 13192 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 0.0007004244996967858, |
|
"loss": 0.0589, |
|
"step": 13860 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 0.0007504548211036993, |
|
"loss": 0.0549, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 0.0008004851425106126, |
|
"loss": 0.0538, |
|
"step": 15840 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.026651622727513313, |
|
"eval_max_distance": 48, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.5057, |
|
"eval_samples_per_second": 98.878, |
|
"eval_steps_per_second": 3.955, |
|
"step": 16490 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 0.0008505154639175257, |
|
"loss": 0.048, |
|
"step": 16830 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 0.000900545785324439, |
|
"loss": 0.0461, |
|
"step": 17820 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 0.0009505761067313523, |
|
"loss": 0.0469, |
|
"step": 18810 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.039574604481458664, |
|
"eval_max_distance": 80, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 0.5179, |
|
"eval_samples_per_second": 96.548, |
|
"eval_steps_per_second": 3.862, |
|
"step": 19788 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.0009999326190957482, |
|
"loss": 0.0464, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 0.0009943736944949802, |
|
"loss": 0.0393, |
|
"step": 20790 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 0.000988814769894212, |
|
"loss": 0.0426, |
|
"step": 21780 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 0.000983255845293444, |
|
"loss": 0.0385, |
|
"step": 22770 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.026188833639025688, |
|
"eval_max_distance": 73, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 0.4896, |
|
"eval_samples_per_second": 102.115, |
|
"eval_steps_per_second": 4.085, |
|
"step": 23086 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 0.0009776969206926756, |
|
"loss": 0.034, |
|
"step": 23760 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 0.0009721379960919076, |
|
"loss": 0.0315, |
|
"step": 24750 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 0.0009665790714911395, |
|
"loss": 0.0316, |
|
"step": 25740 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.02234221063554287, |
|
"eval_max_distance": 40, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.4837, |
|
"eval_samples_per_second": 103.365, |
|
"eval_steps_per_second": 4.135, |
|
"step": 26384 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 0.0009610201468903713, |
|
"loss": 0.0305, |
|
"step": 26730 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 0.0009554612222896032, |
|
"loss": 0.0271, |
|
"step": 27720 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 0.0009499022976888349, |
|
"loss": 0.0263, |
|
"step": 28710 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.023996921256184578, |
|
"eval_max_distance": 69, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.4894, |
|
"eval_samples_per_second": 102.167, |
|
"eval_steps_per_second": 4.087, |
|
"step": 29682 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 0.0009443433730880669, |
|
"loss": 0.0282, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 0.0009387844484872987, |
|
"loss": 0.0229, |
|
"step": 30690 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 0.0009332255238865306, |
|
"loss": 0.0226, |
|
"step": 31680 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"learning_rate": 0.0009276665992857625, |
|
"loss": 0.0226, |
|
"step": 32670 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.02030733972787857, |
|
"eval_max_distance": 60, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.4797, |
|
"eval_samples_per_second": 104.236, |
|
"eval_steps_per_second": 4.169, |
|
"step": 32980 |
|
}, |
|
{ |
|
"epoch": 10.21, |
|
"learning_rate": 0.0009221076746849943, |
|
"loss": 0.0209, |
|
"step": 33660 |
|
}, |
|
{ |
|
"epoch": 10.51, |
|
"learning_rate": 0.0009165487500842261, |
|
"loss": 0.02, |
|
"step": 34650 |
|
}, |
|
{ |
|
"epoch": 10.81, |
|
"learning_rate": 0.000910989825483458, |
|
"loss": 0.0203, |
|
"step": 35640 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.017732510343194008, |
|
"eval_max_distance": 54, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.4814, |
|
"eval_samples_per_second": 103.858, |
|
"eval_steps_per_second": 4.154, |
|
"step": 36278 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"learning_rate": 0.0009054309008826899, |
|
"loss": 0.0183, |
|
"step": 36630 |
|
}, |
|
{ |
|
"epoch": 11.41, |
|
"learning_rate": 0.0008998719762819217, |
|
"loss": 0.0174, |
|
"step": 37620 |
|
}, |
|
{ |
|
"epoch": 11.71, |
|
"learning_rate": 0.0008943130516811536, |
|
"loss": 0.0178, |
|
"step": 38610 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.018777821213006973, |
|
"eval_max_distance": 61, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.4893, |
|
"eval_samples_per_second": 102.185, |
|
"eval_steps_per_second": 4.087, |
|
"step": 39576 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 0.0008887541270803853, |
|
"loss": 0.0174, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 12.31, |
|
"learning_rate": 0.0008831952024796173, |
|
"loss": 0.0153, |
|
"step": 40590 |
|
}, |
|
{ |
|
"epoch": 12.61, |
|
"learning_rate": 0.0008776362778788492, |
|
"loss": 0.015, |
|
"step": 41580 |
|
}, |
|
{ |
|
"epoch": 12.91, |
|
"learning_rate": 0.000872077353278081, |
|
"loss": 0.0154, |
|
"step": 42570 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.029613599181175232, |
|
"eval_max_distance": 65, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.4669, |
|
"eval_samples_per_second": 107.079, |
|
"eval_steps_per_second": 4.283, |
|
"step": 42874 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"learning_rate": 0.0008665184286773129, |
|
"loss": 0.014, |
|
"step": 43560 |
|
}, |
|
{ |
|
"epoch": 13.51, |
|
"learning_rate": 0.0008609595040765447, |
|
"loss": 0.0135, |
|
"step": 44550 |
|
}, |
|
{ |
|
"epoch": 13.81, |
|
"learning_rate": 0.0008554005794757766, |
|
"loss": 0.0138, |
|
"step": 45540 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.02011469565331936, |
|
"eval_max_distance": 55, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.5034, |
|
"eval_samples_per_second": 99.332, |
|
"eval_steps_per_second": 3.973, |
|
"step": 46172 |
|
}, |
|
{ |
|
"epoch": 14.11, |
|
"learning_rate": 0.0008498416548750084, |
|
"loss": 0.0128, |
|
"step": 46530 |
|
}, |
|
{ |
|
"epoch": 14.41, |
|
"learning_rate": 0.0008442827302742403, |
|
"loss": 0.0121, |
|
"step": 47520 |
|
}, |
|
{ |
|
"epoch": 14.71, |
|
"learning_rate": 0.0008387238056734722, |
|
"loss": 0.012, |
|
"step": 48510 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.026753582060337067, |
|
"eval_max_distance": 67, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.4716, |
|
"eval_samples_per_second": 106.031, |
|
"eval_steps_per_second": 4.241, |
|
"step": 49470 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"learning_rate": 0.000833164881072704, |
|
"loss": 0.0123, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 15.31, |
|
"learning_rate": 0.0008276059564719359, |
|
"loss": 0.0104, |
|
"step": 50490 |
|
}, |
|
{ |
|
"epoch": 15.61, |
|
"learning_rate": 0.0008220470318711677, |
|
"loss": 0.0109, |
|
"step": 51480 |
|
}, |
|
{ |
|
"epoch": 15.91, |
|
"learning_rate": 0.0008164881072703996, |
|
"loss": 0.0109, |
|
"step": 52470 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.01633359119296074, |
|
"eval_max_distance": 35, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.4971, |
|
"eval_samples_per_second": 100.579, |
|
"eval_steps_per_second": 4.023, |
|
"step": 52768 |
|
}, |
|
{ |
|
"epoch": 16.21, |
|
"learning_rate": 0.0008109291826696314, |
|
"loss": 0.0098, |
|
"step": 53460 |
|
}, |
|
{ |
|
"epoch": 16.51, |
|
"learning_rate": 0.0008053702580688633, |
|
"loss": 0.0094, |
|
"step": 54450 |
|
}, |
|
{ |
|
"epoch": 16.81, |
|
"learning_rate": 0.0007998113334680952, |
|
"loss": 0.0105, |
|
"step": 55440 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 0.013592842034995556, |
|
"eval_max_distance": 26, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.48, |
|
"eval_samples_per_second": 104.157, |
|
"eval_steps_per_second": 4.166, |
|
"step": 56066 |
|
}, |
|
{ |
|
"epoch": 17.11, |
|
"learning_rate": 0.000794252408867327, |
|
"loss": 0.0097, |
|
"step": 56430 |
|
}, |
|
{ |
|
"epoch": 17.41, |
|
"learning_rate": 0.0007886934842665589, |
|
"loss": 0.0083, |
|
"step": 57420 |
|
}, |
|
{ |
|
"epoch": 17.71, |
|
"learning_rate": 0.0007831345596657907, |
|
"loss": 0.0092, |
|
"step": 58410 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.020196767523884773, |
|
"eval_max_distance": 65, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.4567, |
|
"eval_samples_per_second": 109.487, |
|
"eval_steps_per_second": 4.379, |
|
"step": 59364 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"learning_rate": 0.0007775756350650226, |
|
"loss": 0.009, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 18.31, |
|
"learning_rate": 0.0007720167104642545, |
|
"loss": 0.0075, |
|
"step": 60390 |
|
}, |
|
{ |
|
"epoch": 18.61, |
|
"learning_rate": 0.0007664577858634864, |
|
"loss": 0.0078, |
|
"step": 61380 |
|
}, |
|
{ |
|
"epoch": 18.91, |
|
"learning_rate": 0.0007608988612627181, |
|
"loss": 0.0087, |
|
"step": 62370 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 0.02213277295231819, |
|
"eval_max_distance": 65, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.4707, |
|
"eval_samples_per_second": 106.233, |
|
"eval_steps_per_second": 4.249, |
|
"step": 62662 |
|
}, |
|
{ |
|
"epoch": 19.21, |
|
"learning_rate": 0.00075533993666195, |
|
"loss": 0.0077, |
|
"step": 63360 |
|
}, |
|
{ |
|
"epoch": 19.51, |
|
"learning_rate": 0.0007497810120611818, |
|
"loss": 0.0071, |
|
"step": 64350 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"learning_rate": 0.0007442220874604138, |
|
"loss": 0.0075, |
|
"step": 65340 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.020336275920271873, |
|
"eval_max_distance": 33, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.4773, |
|
"eval_samples_per_second": 104.749, |
|
"eval_steps_per_second": 4.19, |
|
"step": 65960 |
|
}, |
|
{ |
|
"epoch": 20.11, |
|
"learning_rate": 0.0007386631628596457, |
|
"loss": 0.0073, |
|
"step": 66330 |
|
}, |
|
{ |
|
"epoch": 20.41, |
|
"learning_rate": 0.0007331042382588774, |
|
"loss": 0.0063, |
|
"step": 67320 |
|
}, |
|
{ |
|
"epoch": 20.71, |
|
"learning_rate": 0.0007275453136581093, |
|
"loss": 0.0067, |
|
"step": 68310 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 0.022562623023986816, |
|
"eval_max_distance": 26, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.5033, |
|
"eval_samples_per_second": 99.35, |
|
"eval_steps_per_second": 3.974, |
|
"step": 69258 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"learning_rate": 0.0007219863890573411, |
|
"loss": 0.007, |
|
"step": 69300 |
|
}, |
|
{ |
|
"epoch": 21.31, |
|
"learning_rate": 0.000716427464456573, |
|
"loss": 0.0061, |
|
"step": 70290 |
|
}, |
|
{ |
|
"epoch": 21.61, |
|
"learning_rate": 0.0007108685398558049, |
|
"loss": 0.006, |
|
"step": 71280 |
|
}, |
|
{ |
|
"epoch": 21.91, |
|
"learning_rate": 0.0007053096152550368, |
|
"loss": 0.0062, |
|
"step": 72270 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 0.01839238964021206, |
|
"eval_max_distance": 24, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.4856, |
|
"eval_samples_per_second": 102.959, |
|
"eval_steps_per_second": 4.118, |
|
"step": 72556 |
|
}, |
|
{ |
|
"epoch": 22.21, |
|
"learning_rate": 0.0006997506906542685, |
|
"loss": 0.0057, |
|
"step": 73260 |
|
}, |
|
{ |
|
"epoch": 22.51, |
|
"learning_rate": 0.0006941917660535004, |
|
"loss": 0.0058, |
|
"step": 74250 |
|
}, |
|
{ |
|
"epoch": 22.81, |
|
"learning_rate": 0.0006886328414527323, |
|
"loss": 0.0059, |
|
"step": 75240 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 0.013111269101500511, |
|
"eval_max_distance": 18, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.5001, |
|
"eval_samples_per_second": 99.983, |
|
"eval_steps_per_second": 3.999, |
|
"step": 75854 |
|
}, |
|
{ |
|
"epoch": 23.11, |
|
"learning_rate": 0.0006830739168519642, |
|
"loss": 0.0055, |
|
"step": 76230 |
|
}, |
|
{ |
|
"epoch": 23.41, |
|
"learning_rate": 0.0006775149922511961, |
|
"loss": 0.0051, |
|
"step": 77220 |
|
}, |
|
{ |
|
"epoch": 23.71, |
|
"learning_rate": 0.0006719560676504279, |
|
"loss": 0.0054, |
|
"step": 78210 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 0.026959825307130814, |
|
"eval_max_distance": 58, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.4725, |
|
"eval_samples_per_second": 105.825, |
|
"eval_steps_per_second": 4.233, |
|
"step": 79152 |
|
}, |
|
{ |
|
"epoch": 24.01, |
|
"learning_rate": 0.0006663971430496597, |
|
"loss": 0.0055, |
|
"step": 79200 |
|
}, |
|
{ |
|
"epoch": 24.31, |
|
"learning_rate": 0.0006608382184488915, |
|
"loss": 0.0046, |
|
"step": 80190 |
|
}, |
|
{ |
|
"epoch": 24.61, |
|
"learning_rate": 0.0006552792938481235, |
|
"loss": 0.005, |
|
"step": 81180 |
|
}, |
|
{ |
|
"epoch": 24.92, |
|
"learning_rate": 0.0006497203692473554, |
|
"loss": 0.0052, |
|
"step": 82170 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 0.024379713460803032, |
|
"eval_max_distance": 45, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.47, |
|
"eval_samples_per_second": 106.387, |
|
"eval_steps_per_second": 4.255, |
|
"step": 82450 |
|
}, |
|
{ |
|
"epoch": 25.22, |
|
"learning_rate": 0.0006441614446465872, |
|
"loss": 0.0048, |
|
"step": 83160 |
|
}, |
|
{ |
|
"epoch": 25.52, |
|
"learning_rate": 0.000638602520045819, |
|
"loss": 0.0045, |
|
"step": 84150 |
|
}, |
|
{ |
|
"epoch": 25.82, |
|
"learning_rate": 0.0006330435954450508, |
|
"loss": 0.0044, |
|
"step": 85140 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 0.014908027835190296, |
|
"eval_max_distance": 23, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.4819, |
|
"eval_samples_per_second": 103.748, |
|
"eval_steps_per_second": 4.15, |
|
"step": 85748 |
|
}, |
|
{ |
|
"epoch": 26.12, |
|
"learning_rate": 0.0006274846708442828, |
|
"loss": 0.0044, |
|
"step": 86130 |
|
}, |
|
{ |
|
"epoch": 26.42, |
|
"learning_rate": 0.0006219257462435146, |
|
"loss": 0.0042, |
|
"step": 87120 |
|
}, |
|
{ |
|
"epoch": 26.72, |
|
"learning_rate": 0.0006163668216427465, |
|
"loss": 0.0043, |
|
"step": 88110 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 0.0256387647241354, |
|
"eval_max_distance": 63, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.5104, |
|
"eval_samples_per_second": 97.954, |
|
"eval_steps_per_second": 3.918, |
|
"step": 89046 |
|
}, |
|
{ |
|
"epoch": 27.02, |
|
"learning_rate": 0.0006108078970419783, |
|
"loss": 0.0043, |
|
"step": 89100 |
|
}, |
|
{ |
|
"epoch": 27.32, |
|
"learning_rate": 0.0006052489724412101, |
|
"loss": 0.004, |
|
"step": 90090 |
|
}, |
|
{ |
|
"epoch": 27.62, |
|
"learning_rate": 0.0005996900478404421, |
|
"loss": 0.0037, |
|
"step": 91080 |
|
}, |
|
{ |
|
"epoch": 27.92, |
|
"learning_rate": 0.0005941311232396739, |
|
"loss": 0.0038, |
|
"step": 92070 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 0.017227506265044212, |
|
"eval_max_distance": 30, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.4632, |
|
"eval_samples_per_second": 107.934, |
|
"eval_steps_per_second": 4.317, |
|
"step": 92344 |
|
}, |
|
{ |
|
"epoch": 28.22, |
|
"learning_rate": 0.0005885721986389058, |
|
"loss": 0.0037, |
|
"step": 93060 |
|
}, |
|
{ |
|
"epoch": 28.52, |
|
"learning_rate": 0.0005830132740381376, |
|
"loss": 0.0038, |
|
"step": 94050 |
|
}, |
|
{ |
|
"epoch": 28.82, |
|
"learning_rate": 0.0005774543494373694, |
|
"loss": 0.0036, |
|
"step": 95040 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 0.022354494780302048, |
|
"eval_max_distance": 37, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.4846, |
|
"eval_samples_per_second": 103.187, |
|
"eval_steps_per_second": 4.127, |
|
"step": 95642 |
|
}, |
|
{ |
|
"epoch": 29.12, |
|
"learning_rate": 0.0005718954248366013, |
|
"loss": 0.0037, |
|
"step": 96030 |
|
}, |
|
{ |
|
"epoch": 29.42, |
|
"learning_rate": 0.0005663365002358332, |
|
"loss": 0.0033, |
|
"step": 97020 |
|
}, |
|
{ |
|
"epoch": 29.72, |
|
"learning_rate": 0.000560777575635065, |
|
"loss": 0.0033, |
|
"step": 98010 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 0.01936698891222477, |
|
"eval_max_distance": 30, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.4829, |
|
"eval_samples_per_second": 103.544, |
|
"eval_steps_per_second": 4.142, |
|
"step": 98940 |
|
}, |
|
{ |
|
"epoch": 30.02, |
|
"learning_rate": 0.0005552186510342969, |
|
"loss": 0.0035, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 30.32, |
|
"learning_rate": 0.0005496597264335288, |
|
"loss": 0.003, |
|
"step": 99990 |
|
}, |
|
{ |
|
"epoch": 30.62, |
|
"learning_rate": 0.0005441008018327606, |
|
"loss": 0.0033, |
|
"step": 100980 |
|
}, |
|
{ |
|
"epoch": 30.92, |
|
"learning_rate": 0.0005385418772319925, |
|
"loss": 0.0031, |
|
"step": 101970 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_loss": 0.023793019354343414, |
|
"eval_max_distance": 59, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.5012, |
|
"eval_samples_per_second": 99.754, |
|
"eval_steps_per_second": 3.99, |
|
"step": 102238 |
|
}, |
|
{ |
|
"epoch": 31.22, |
|
"learning_rate": 0.0005329829526312243, |
|
"loss": 0.0029, |
|
"step": 102960 |
|
}, |
|
{ |
|
"epoch": 31.52, |
|
"learning_rate": 0.0005274240280304562, |
|
"loss": 0.003, |
|
"step": 103950 |
|
}, |
|
{ |
|
"epoch": 31.82, |
|
"learning_rate": 0.000521865103429688, |
|
"loss": 0.003, |
|
"step": 104940 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 0.02003033086657524, |
|
"eval_max_distance": 28, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.475, |
|
"eval_samples_per_second": 105.268, |
|
"eval_steps_per_second": 4.211, |
|
"step": 105536 |
|
}, |
|
{ |
|
"epoch": 32.12, |
|
"learning_rate": 0.00051630617882892, |
|
"loss": 0.0028, |
|
"step": 105930 |
|
}, |
|
{ |
|
"epoch": 32.42, |
|
"learning_rate": 0.0005107472542281517, |
|
"loss": 0.0027, |
|
"step": 106920 |
|
}, |
|
{ |
|
"epoch": 32.72, |
|
"learning_rate": 0.0005051883296273836, |
|
"loss": 0.0028, |
|
"step": 107910 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_loss": 0.01606147363781929, |
|
"eval_max_distance": 18, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.4673, |
|
"eval_samples_per_second": 107.008, |
|
"eval_steps_per_second": 4.28, |
|
"step": 108834 |
|
}, |
|
{ |
|
"epoch": 33.02, |
|
"learning_rate": 0.0004996294050266155, |
|
"loss": 0.0028, |
|
"step": 108900 |
|
}, |
|
{ |
|
"epoch": 33.32, |
|
"learning_rate": 0.0004940704804258473, |
|
"loss": 0.0026, |
|
"step": 109890 |
|
}, |
|
{ |
|
"epoch": 33.62, |
|
"learning_rate": 0.0004885115558250792, |
|
"loss": 0.0026, |
|
"step": 110880 |
|
}, |
|
{ |
|
"epoch": 33.92, |
|
"learning_rate": 0.00048295263122431103, |
|
"loss": 0.0027, |
|
"step": 111870 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_loss": 0.021506933495402336, |
|
"eval_max_distance": 26, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.4763, |
|
"eval_samples_per_second": 104.968, |
|
"eval_steps_per_second": 4.199, |
|
"step": 112132 |
|
}, |
|
{ |
|
"epoch": 34.22, |
|
"learning_rate": 0.00047739370662354294, |
|
"loss": 0.0024, |
|
"step": 112860 |
|
}, |
|
{ |
|
"epoch": 34.52, |
|
"learning_rate": 0.00047183478202277474, |
|
"loss": 0.0023, |
|
"step": 113850 |
|
}, |
|
{ |
|
"epoch": 34.82, |
|
"learning_rate": 0.0004662758574220066, |
|
"loss": 0.0025, |
|
"step": 114840 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_loss": 0.019841769710183144, |
|
"eval_max_distance": 19, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.4767, |
|
"eval_samples_per_second": 104.884, |
|
"eval_steps_per_second": 4.195, |
|
"step": 115430 |
|
}, |
|
{ |
|
"epoch": 35.12, |
|
"learning_rate": 0.00046071693282123845, |
|
"loss": 0.0023, |
|
"step": 115830 |
|
}, |
|
{ |
|
"epoch": 35.42, |
|
"learning_rate": 0.0004551580082204703, |
|
"loss": 0.0021, |
|
"step": 116820 |
|
}, |
|
{ |
|
"epoch": 35.72, |
|
"learning_rate": 0.0004495990836197022, |
|
"loss": 0.0023, |
|
"step": 117810 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_loss": 0.01675160974264145, |
|
"eval_max_distance": 24, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.4591, |
|
"eval_samples_per_second": 108.901, |
|
"eval_steps_per_second": 4.356, |
|
"step": 118728 |
|
}, |
|
{ |
|
"epoch": 36.02, |
|
"learning_rate": 0.000444040159018934, |
|
"loss": 0.0023, |
|
"step": 118800 |
|
}, |
|
{ |
|
"epoch": 36.32, |
|
"learning_rate": 0.0004384812344181659, |
|
"loss": 0.0021, |
|
"step": 119790 |
|
}, |
|
{ |
|
"epoch": 36.62, |
|
"learning_rate": 0.0004329223098173978, |
|
"loss": 0.0021, |
|
"step": 120780 |
|
}, |
|
{ |
|
"epoch": 36.92, |
|
"learning_rate": 0.0004273633852166296, |
|
"loss": 0.002, |
|
"step": 121770 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_loss": 0.022139811888337135, |
|
"eval_max_distance": 32, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.4713, |
|
"eval_samples_per_second": 106.08, |
|
"eval_steps_per_second": 4.243, |
|
"step": 122026 |
|
}, |
|
{ |
|
"epoch": 37.22, |
|
"learning_rate": 0.0004218044606158615, |
|
"loss": 0.002, |
|
"step": 122760 |
|
}, |
|
{ |
|
"epoch": 37.52, |
|
"learning_rate": 0.00041624553601509335, |
|
"loss": 0.0019, |
|
"step": 123750 |
|
}, |
|
{ |
|
"epoch": 37.82, |
|
"learning_rate": 0.00041068661141432515, |
|
"loss": 0.0019, |
|
"step": 124740 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_loss": 0.02140805311501026, |
|
"eval_max_distance": 32, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.4808, |
|
"eval_samples_per_second": 104.001, |
|
"eval_steps_per_second": 4.16, |
|
"step": 125324 |
|
}, |
|
{ |
|
"epoch": 38.12, |
|
"learning_rate": 0.00040512768681355706, |
|
"loss": 0.0019, |
|
"step": 125730 |
|
}, |
|
{ |
|
"epoch": 38.42, |
|
"learning_rate": 0.0003995687622127889, |
|
"loss": 0.0018, |
|
"step": 126720 |
|
}, |
|
{ |
|
"epoch": 38.72, |
|
"learning_rate": 0.0003940098376120208, |
|
"loss": 0.0017, |
|
"step": 127710 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_loss": 0.018618840724229813, |
|
"eval_max_distance": 19, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.4752, |
|
"eval_samples_per_second": 105.222, |
|
"eval_steps_per_second": 4.209, |
|
"step": 128622 |
|
}, |
|
{ |
|
"epoch": 39.02, |
|
"learning_rate": 0.00038845091301125263, |
|
"loss": 0.002, |
|
"step": 128700 |
|
}, |
|
{ |
|
"epoch": 39.32, |
|
"learning_rate": 0.0003828919884104845, |
|
"loss": 0.0016, |
|
"step": 129690 |
|
}, |
|
{ |
|
"epoch": 39.62, |
|
"learning_rate": 0.00037733306380971634, |
|
"loss": 0.0017, |
|
"step": 130680 |
|
}, |
|
{ |
|
"epoch": 39.92, |
|
"learning_rate": 0.0003717741392089482, |
|
"loss": 0.0017, |
|
"step": 131670 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 0.017086679115891457, |
|
"eval_max_distance": 23, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.458, |
|
"eval_samples_per_second": 109.178, |
|
"eval_steps_per_second": 4.367, |
|
"step": 131920 |
|
}, |
|
{ |
|
"epoch": 40.22, |
|
"learning_rate": 0.00036621521460818, |
|
"loss": 0.0015, |
|
"step": 132660 |
|
}, |
|
{ |
|
"epoch": 40.52, |
|
"learning_rate": 0.0003606562900074119, |
|
"loss": 0.0016, |
|
"step": 133650 |
|
}, |
|
{ |
|
"epoch": 40.82, |
|
"learning_rate": 0.00035509736540664376, |
|
"loss": 0.0016, |
|
"step": 134640 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_loss": 0.01638130471110344, |
|
"eval_max_distance": 17, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.4581, |
|
"eval_samples_per_second": 109.147, |
|
"eval_steps_per_second": 4.366, |
|
"step": 135218 |
|
}, |
|
{ |
|
"epoch": 41.12, |
|
"learning_rate": 0.0003495384408058756, |
|
"loss": 0.0015, |
|
"step": 135630 |
|
}, |
|
{ |
|
"epoch": 41.43, |
|
"learning_rate": 0.0003439795162051075, |
|
"loss": 0.0014, |
|
"step": 136620 |
|
}, |
|
{ |
|
"epoch": 41.73, |
|
"learning_rate": 0.00033842059160433933, |
|
"loss": 0.0015, |
|
"step": 137610 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_loss": 0.016585057601332664, |
|
"eval_max_distance": 21, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.479, |
|
"eval_samples_per_second": 104.393, |
|
"eval_steps_per_second": 4.176, |
|
"step": 138516 |
|
}, |
|
{ |
|
"epoch": 42.03, |
|
"learning_rate": 0.0003328616670035712, |
|
"loss": 0.0014, |
|
"step": 138600 |
|
}, |
|
{ |
|
"epoch": 42.33, |
|
"learning_rate": 0.00032730274240280304, |
|
"loss": 0.0015, |
|
"step": 139590 |
|
}, |
|
{ |
|
"epoch": 42.63, |
|
"learning_rate": 0.00032174381780203495, |
|
"loss": 0.0015, |
|
"step": 140580 |
|
}, |
|
{ |
|
"epoch": 42.93, |
|
"learning_rate": 0.00031618489320126675, |
|
"loss": 0.0014, |
|
"step": 141570 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_loss": 0.016704820096492767, |
|
"eval_max_distance": 21, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.4809, |
|
"eval_samples_per_second": 103.976, |
|
"eval_steps_per_second": 4.159, |
|
"step": 141814 |
|
}, |
|
{ |
|
"epoch": 43.23, |
|
"learning_rate": 0.0003106259686004986, |
|
"loss": 0.0011, |
|
"step": 142560 |
|
}, |
|
{ |
|
"epoch": 43.53, |
|
"learning_rate": 0.0003050670439997305, |
|
"loss": 0.0013, |
|
"step": 143550 |
|
}, |
|
{ |
|
"epoch": 43.83, |
|
"learning_rate": 0.0002995081193989623, |
|
"loss": 0.0019, |
|
"step": 144540 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_loss": 0.019240867346525192, |
|
"eval_max_distance": 32, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.6494, |
|
"eval_samples_per_second": 76.999, |
|
"eval_steps_per_second": 3.08, |
|
"step": 145112 |
|
}, |
|
{ |
|
"epoch": 44.13, |
|
"learning_rate": 0.00029394919479819423, |
|
"loss": 0.0012, |
|
"step": 145530 |
|
}, |
|
{ |
|
"epoch": 44.43, |
|
"learning_rate": 0.00028839027019742603, |
|
"loss": 0.0011, |
|
"step": 146520 |
|
}, |
|
{ |
|
"epoch": 44.73, |
|
"learning_rate": 0.0002828313455966579, |
|
"loss": 0.0011, |
|
"step": 147510 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_loss": 0.02091757208108902, |
|
"eval_max_distance": 27, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.4646, |
|
"eval_samples_per_second": 107.608, |
|
"eval_steps_per_second": 4.304, |
|
"step": 148410 |
|
}, |
|
{ |
|
"epoch": 45.03, |
|
"learning_rate": 0.0002772724209958898, |
|
"loss": 0.0011, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 45.33, |
|
"learning_rate": 0.0002717134963951216, |
|
"loss": 0.0011, |
|
"step": 149490 |
|
}, |
|
{ |
|
"epoch": 45.63, |
|
"learning_rate": 0.0002661545717943535, |
|
"loss": 0.001, |
|
"step": 150480 |
|
}, |
|
{ |
|
"epoch": 45.93, |
|
"learning_rate": 0.00026059564719358537, |
|
"loss": 0.0011, |
|
"step": 151470 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_loss": 0.02175173908472061, |
|
"eval_max_distance": 23, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.4863, |
|
"eval_samples_per_second": 102.827, |
|
"eval_steps_per_second": 4.113, |
|
"step": 151708 |
|
}, |
|
{ |
|
"epoch": 46.23, |
|
"learning_rate": 0.00025503672259281717, |
|
"loss": 0.001, |
|
"step": 152460 |
|
}, |
|
{ |
|
"epoch": 46.53, |
|
"learning_rate": 0.0002494777979920491, |
|
"loss": 0.001, |
|
"step": 153450 |
|
}, |
|
{ |
|
"epoch": 46.83, |
|
"learning_rate": 0.0002439188733912809, |
|
"loss": 0.001, |
|
"step": 154440 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_loss": 0.01951581984758377, |
|
"eval_max_distance": 25, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.4608, |
|
"eval_samples_per_second": 108.512, |
|
"eval_steps_per_second": 4.34, |
|
"step": 155006 |
|
}, |
|
{ |
|
"epoch": 47.13, |
|
"learning_rate": 0.0002383599487905128, |
|
"loss": 0.001, |
|
"step": 155430 |
|
}, |
|
{ |
|
"epoch": 47.43, |
|
"learning_rate": 0.00023280102418974464, |
|
"loss": 0.0009, |
|
"step": 156420 |
|
}, |
|
{ |
|
"epoch": 47.73, |
|
"learning_rate": 0.00022724209958897647, |
|
"loss": 0.0009, |
|
"step": 157410 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_loss": 0.01657327450811863, |
|
"eval_max_distance": 15, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.4688, |
|
"eval_samples_per_second": 106.651, |
|
"eval_steps_per_second": 4.266, |
|
"step": 158304 |
|
}, |
|
{ |
|
"epoch": 48.03, |
|
"learning_rate": 0.00022168317498820833, |
|
"loss": 0.0009, |
|
"step": 158400 |
|
}, |
|
{ |
|
"epoch": 48.33, |
|
"learning_rate": 0.0002161242503874402, |
|
"loss": 0.0008, |
|
"step": 159390 |
|
}, |
|
{ |
|
"epoch": 48.63, |
|
"learning_rate": 0.00021056532578667207, |
|
"loss": 0.0008, |
|
"step": 160380 |
|
}, |
|
{ |
|
"epoch": 48.93, |
|
"learning_rate": 0.00020500640118590392, |
|
"loss": 0.0008, |
|
"step": 161370 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_loss": 0.020961837843060493, |
|
"eval_max_distance": 31, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.4893, |
|
"eval_samples_per_second": 102.188, |
|
"eval_steps_per_second": 4.088, |
|
"step": 161602 |
|
}, |
|
{ |
|
"epoch": 49.23, |
|
"learning_rate": 0.00019944747658513578, |
|
"loss": 0.0008, |
|
"step": 162360 |
|
}, |
|
{ |
|
"epoch": 49.53, |
|
"learning_rate": 0.00019388855198436764, |
|
"loss": 0.0008, |
|
"step": 163350 |
|
}, |
|
{ |
|
"epoch": 49.83, |
|
"learning_rate": 0.0001883296273835995, |
|
"loss": 0.0008, |
|
"step": 164340 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 0.022983456030488014, |
|
"eval_max_distance": 22, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.479, |
|
"eval_samples_per_second": 104.39, |
|
"eval_steps_per_second": 4.176, |
|
"step": 164900 |
|
}, |
|
{ |
|
"epoch": 50.13, |
|
"learning_rate": 0.00018277070278283135, |
|
"loss": 0.0008, |
|
"step": 165330 |
|
}, |
|
{ |
|
"epoch": 50.43, |
|
"learning_rate": 0.0001772117781820632, |
|
"loss": 0.0007, |
|
"step": 166320 |
|
}, |
|
{ |
|
"epoch": 50.73, |
|
"learning_rate": 0.00017165285358129506, |
|
"loss": 0.0008, |
|
"step": 167310 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_loss": 0.018444916233420372, |
|
"eval_max_distance": 15, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.4866, |
|
"eval_samples_per_second": 102.75, |
|
"eval_steps_per_second": 4.11, |
|
"step": 168198 |
|
}, |
|
{ |
|
"epoch": 51.03, |
|
"learning_rate": 0.00016609392898052691, |
|
"loss": 0.0007, |
|
"step": 168300 |
|
}, |
|
{ |
|
"epoch": 51.33, |
|
"learning_rate": 0.0001605350043797588, |
|
"loss": 0.0007, |
|
"step": 169290 |
|
}, |
|
{ |
|
"epoch": 51.63, |
|
"learning_rate": 0.00015497607977899065, |
|
"loss": 0.0007, |
|
"step": 170280 |
|
}, |
|
{ |
|
"epoch": 51.93, |
|
"learning_rate": 0.00014941715517822248, |
|
"loss": 0.0007, |
|
"step": 171270 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_loss": 0.01832015998661518, |
|
"eval_max_distance": 15, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.4672, |
|
"eval_samples_per_second": 107.025, |
|
"eval_steps_per_second": 4.281, |
|
"step": 171496 |
|
}, |
|
{ |
|
"epoch": 52.23, |
|
"learning_rate": 0.00014385823057745434, |
|
"loss": 0.0006, |
|
"step": 172260 |
|
}, |
|
{ |
|
"epoch": 52.53, |
|
"learning_rate": 0.00013829930597668622, |
|
"loss": 0.0006, |
|
"step": 173250 |
|
}, |
|
{ |
|
"epoch": 52.83, |
|
"learning_rate": 0.00013274038137591808, |
|
"loss": 0.0006, |
|
"step": 174240 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_loss": 0.023398304358124733, |
|
"eval_max_distance": 32, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.4822, |
|
"eval_samples_per_second": 103.698, |
|
"eval_steps_per_second": 4.148, |
|
"step": 174794 |
|
}, |
|
{ |
|
"epoch": 53.13, |
|
"learning_rate": 0.0001271814567751499, |
|
"loss": 0.0006, |
|
"step": 175230 |
|
}, |
|
{ |
|
"epoch": 53.43, |
|
"learning_rate": 0.00012162253217438179, |
|
"loss": 0.0006, |
|
"step": 176220 |
|
}, |
|
{ |
|
"epoch": 53.73, |
|
"learning_rate": 0.00011606360757361364, |
|
"loss": 0.0005, |
|
"step": 177210 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_loss": 0.022733934223651886, |
|
"eval_max_distance": 24, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.4789, |
|
"eval_samples_per_second": 104.41, |
|
"eval_steps_per_second": 4.176, |
|
"step": 178092 |
|
}, |
|
{ |
|
"epoch": 54.03, |
|
"learning_rate": 0.0001105046829728455, |
|
"loss": 0.0005, |
|
"step": 178200 |
|
}, |
|
{ |
|
"epoch": 54.33, |
|
"learning_rate": 0.00010494575837207735, |
|
"loss": 0.0005, |
|
"step": 179190 |
|
}, |
|
{ |
|
"epoch": 54.63, |
|
"learning_rate": 9.938683377130921e-05, |
|
"loss": 0.0005, |
|
"step": 180180 |
|
}, |
|
{ |
|
"epoch": 54.93, |
|
"learning_rate": 9.382790917054107e-05, |
|
"loss": 0.0004, |
|
"step": 181170 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_loss": 0.018815917894244194, |
|
"eval_max_distance": 15, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.4798, |
|
"eval_samples_per_second": 104.21, |
|
"eval_steps_per_second": 4.168, |
|
"step": 181390 |
|
}, |
|
{ |
|
"epoch": 55.23, |
|
"learning_rate": 8.826898456977294e-05, |
|
"loss": 0.0005, |
|
"step": 182160 |
|
}, |
|
{ |
|
"epoch": 55.53, |
|
"learning_rate": 8.271005996900478e-05, |
|
"loss": 0.0004, |
|
"step": 183150 |
|
}, |
|
{ |
|
"epoch": 55.83, |
|
"learning_rate": 7.715113536823665e-05, |
|
"loss": 0.0005, |
|
"step": 184140 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_loss": 0.01906018890440464, |
|
"eval_max_distance": 15, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.48, |
|
"eval_samples_per_second": 104.168, |
|
"eval_steps_per_second": 4.167, |
|
"step": 184688 |
|
}, |
|
{ |
|
"epoch": 56.13, |
|
"learning_rate": 7.15922107674685e-05, |
|
"loss": 0.0004, |
|
"step": 185130 |
|
}, |
|
{ |
|
"epoch": 56.43, |
|
"learning_rate": 6.603328616670036e-05, |
|
"loss": 0.0004, |
|
"step": 186120 |
|
}, |
|
{ |
|
"epoch": 56.73, |
|
"learning_rate": 6.0474361565932214e-05, |
|
"loss": 0.0004, |
|
"step": 187110 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_loss": 0.018282707780599594, |
|
"eval_max_distance": 15, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.4797, |
|
"eval_samples_per_second": 104.233, |
|
"eval_steps_per_second": 4.169, |
|
"step": 187986 |
|
}, |
|
{ |
|
"epoch": 57.03, |
|
"learning_rate": 5.491543696516407e-05, |
|
"loss": 0.0004, |
|
"step": 188100 |
|
}, |
|
{ |
|
"epoch": 57.33, |
|
"learning_rate": 4.935651236439593e-05, |
|
"loss": 0.0004, |
|
"step": 189090 |
|
}, |
|
{ |
|
"epoch": 57.63, |
|
"learning_rate": 4.379758776362779e-05, |
|
"loss": 0.0004, |
|
"step": 190080 |
|
}, |
|
{ |
|
"epoch": 57.94, |
|
"learning_rate": 3.823866316285965e-05, |
|
"loss": 0.0003, |
|
"step": 191070 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_loss": 0.018019111827015877, |
|
"eval_max_distance": 15, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.4619, |
|
"eval_samples_per_second": 108.242, |
|
"eval_steps_per_second": 4.33, |
|
"step": 191284 |
|
}, |
|
{ |
|
"epoch": 58.24, |
|
"learning_rate": 3.2679738562091506e-05, |
|
"loss": 0.0004, |
|
"step": 192060 |
|
}, |
|
{ |
|
"epoch": 58.54, |
|
"learning_rate": 2.7120813961323362e-05, |
|
"loss": 0.0004, |
|
"step": 193050 |
|
}, |
|
{ |
|
"epoch": 58.84, |
|
"learning_rate": 2.1561889360555218e-05, |
|
"loss": 0.0003, |
|
"step": 194040 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_loss": 0.01795811764895916, |
|
"eval_max_distance": 15, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.475, |
|
"eval_samples_per_second": 105.265, |
|
"eval_steps_per_second": 4.211, |
|
"step": 194582 |
|
}, |
|
{ |
|
"epoch": 59.14, |
|
"learning_rate": 1.6002964759787074e-05, |
|
"loss": 0.0004, |
|
"step": 195030 |
|
}, |
|
{ |
|
"epoch": 59.44, |
|
"learning_rate": 1.0444040159018933e-05, |
|
"loss": 0.0004, |
|
"step": 196020 |
|
}, |
|
{ |
|
"epoch": 59.74, |
|
"learning_rate": 4.885115558250792e-06, |
|
"loss": 0.0004, |
|
"step": 197010 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_loss": 0.017678335309028625, |
|
"eval_max_distance": 15, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.4798, |
|
"eval_samples_per_second": 104.214, |
|
"eval_steps_per_second": 4.169, |
|
"step": 197880 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"step": 197880, |
|
"total_flos": 1.1400109636858675e+17, |
|
"train_loss": 0.031872519274052644, |
|
"train_runtime": 16366.2485, |
|
"train_samples_per_second": 362.656, |
|
"train_steps_per_second": 12.091 |
|
} |
|
], |
|
"logging_steps": 990, |
|
"max_steps": 197880, |
|
"num_train_epochs": 60, |
|
"save_steps": 1979, |
|
"total_flos": 1.1400109636858675e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|