|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 350.0, |
|
"eval_steps": 350, |
|
"global_step": 1050, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.9047619047619051e-06, |
|
"loss": 1.9355, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.8095238095238102e-06, |
|
"loss": 1.977, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5.7142857142857145e-06, |
|
"loss": 1.909, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 7.6190476190476205e-06, |
|
"loss": 1.9514, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 9.523809523809523e-06, |
|
"loss": 1.945, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.1428571428571429e-05, |
|
"loss": 1.9343, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 1.923, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 1.5238095238095241e-05, |
|
"loss": 1.9339, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.7142857142857145e-05, |
|
"loss": 1.9657, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 1.9047619047619046e-05, |
|
"loss": 1.9215, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 2.0952380952380954e-05, |
|
"loss": 1.9188, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 2.2857142857142858e-05, |
|
"loss": 1.9504, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 2.4761904761904762e-05, |
|
"loss": 1.9009, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 1.9399, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 1.902, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 3.0476190476190482e-05, |
|
"loss": 1.8687, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 3.2380952380952386e-05, |
|
"loss": 1.8792, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 3.428571428571429e-05, |
|
"loss": 1.9199, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 3.619047619047619e-05, |
|
"loss": 1.8893, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 3.809523809523809e-05, |
|
"loss": 1.7973, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4e-05, |
|
"loss": 1.8354, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 4.190476190476191e-05, |
|
"loss": 1.8192, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 4.380952380952381e-05, |
|
"loss": 1.7685, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.5714285714285716e-05, |
|
"loss": 1.7412, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 4.761904761904762e-05, |
|
"loss": 1.7596, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 4.9523809523809525e-05, |
|
"loss": 1.6698, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 5.142857142857143e-05, |
|
"loss": 1.6695, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"learning_rate": 5.333333333333333e-05, |
|
"loss": 1.6478, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 5.5238095238095244e-05, |
|
"loss": 1.6236, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 5.714285714285714e-05, |
|
"loss": 1.6301, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 10.33, |
|
"learning_rate": 5.904761904761905e-05, |
|
"loss": 1.5615, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 10.67, |
|
"learning_rate": 6.0952380952380964e-05, |
|
"loss": 1.5969, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 6.285714285714286e-05, |
|
"loss": 1.5722, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 11.33, |
|
"learning_rate": 6.476190476190477e-05, |
|
"loss": 1.5809, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 11.67, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 1.4969, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 6.857142857142858e-05, |
|
"loss": 1.5034, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 12.33, |
|
"learning_rate": 7.047619047619048e-05, |
|
"loss": 1.4814, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 12.67, |
|
"learning_rate": 7.238095238095238e-05, |
|
"loss": 1.4822, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 7.428571428571429e-05, |
|
"loss": 1.4758, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 7.619047619047618e-05, |
|
"loss": 1.4583, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 13.67, |
|
"learning_rate": 7.80952380952381e-05, |
|
"loss": 1.4306, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 8e-05, |
|
"loss": 1.4247, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 14.33, |
|
"learning_rate": 8.19047619047619e-05, |
|
"loss": 1.3784, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 14.67, |
|
"learning_rate": 8.380952380952382e-05, |
|
"loss": 1.4326, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 8.571428571428571e-05, |
|
"loss": 1.3538, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 15.33, |
|
"learning_rate": 8.761904761904762e-05, |
|
"loss": 1.3156, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 15.67, |
|
"learning_rate": 8.952380952380953e-05, |
|
"loss": 1.3364, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 9.142857142857143e-05, |
|
"loss": 1.3473, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 16.33, |
|
"learning_rate": 9.333333333333334e-05, |
|
"loss": 1.2919, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 9.523809523809524e-05, |
|
"loss": 1.2749, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 9.714285714285715e-05, |
|
"loss": 1.2587, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 17.33, |
|
"learning_rate": 9.904761904761905e-05, |
|
"loss": 1.2509, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 17.67, |
|
"learning_rate": 0.00010095238095238096, |
|
"loss": 1.2122, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 0.00010285714285714286, |
|
"loss": 1.2101, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 18.33, |
|
"learning_rate": 0.00010476190476190477, |
|
"loss": 1.1981, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 18.67, |
|
"learning_rate": 0.00010666666666666667, |
|
"loss": 1.1938, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 0.00010857142857142856, |
|
"loss": 1.1346, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 19.33, |
|
"learning_rate": 0.00011047619047619049, |
|
"loss": 1.2012, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 19.67, |
|
"learning_rate": 0.00011238095238095239, |
|
"loss": 1.1305, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.00011428571428571428, |
|
"loss": 1.089, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 20.33, |
|
"learning_rate": 0.00011619047619047621, |
|
"loss": 1.0899, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 20.67, |
|
"learning_rate": 0.0001180952380952381, |
|
"loss": 1.0853, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 0.00012, |
|
"loss": 1.1892, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 21.33, |
|
"learning_rate": 0.00012190476190476193, |
|
"loss": 1.0786, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 21.67, |
|
"learning_rate": 0.0001238095238095238, |
|
"loss": 1.0845, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 0.00012571428571428572, |
|
"loss": 1.0965, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 22.33, |
|
"learning_rate": 0.0001276190476190476, |
|
"loss": 1.0556, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 22.67, |
|
"learning_rate": 0.00012952380952380954, |
|
"loss": 1.1, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 0.00013142857142857143, |
|
"loss": 1.0133, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 23.33, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 1.0485, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 23.67, |
|
"learning_rate": 0.00013523809523809525, |
|
"loss": 1.0284, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 0.00013714285714285716, |
|
"loss": 1.0065, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 24.33, |
|
"learning_rate": 0.00013904761904761905, |
|
"loss": 1.087, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 24.67, |
|
"learning_rate": 0.00014095238095238096, |
|
"loss": 0.9647, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 0.00014285714285714287, |
|
"loss": 0.9509, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 25.33, |
|
"learning_rate": 0.00014476190476190475, |
|
"loss": 0.9621, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 25.67, |
|
"learning_rate": 0.00014666666666666666, |
|
"loss": 0.9913, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 0.00014857142857142857, |
|
"loss": 1.0245, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 26.33, |
|
"learning_rate": 0.00015047619047619048, |
|
"loss": 0.9638, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 26.67, |
|
"learning_rate": 0.00015238095238095237, |
|
"loss": 0.9652, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 0.0001542857142857143, |
|
"loss": 0.9183, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 27.33, |
|
"learning_rate": 0.0001561904761904762, |
|
"loss": 0.9729, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 27.67, |
|
"learning_rate": 0.0001580952380952381, |
|
"loss": 0.9248, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 0.00016, |
|
"loss": 0.8567, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 28.33, |
|
"learning_rate": 0.00016190476190476192, |
|
"loss": 0.911, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 28.67, |
|
"learning_rate": 0.0001638095238095238, |
|
"loss": 0.8622, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 0.00016571428571428575, |
|
"loss": 0.9135, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 29.33, |
|
"learning_rate": 0.00016761904761904763, |
|
"loss": 0.869, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 29.67, |
|
"learning_rate": 0.00016952380952380954, |
|
"loss": 0.9034, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 0.00017142857142857143, |
|
"loss": 0.8036, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 30.33, |
|
"learning_rate": 0.00017333333333333334, |
|
"loss": 0.8248, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 30.67, |
|
"learning_rate": 0.00017523809523809525, |
|
"loss": 0.8567, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 0.00017714285714285713, |
|
"loss": 0.7961, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 31.33, |
|
"learning_rate": 0.00017904761904761907, |
|
"loss": 0.7859, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 31.67, |
|
"learning_rate": 0.00018095238095238095, |
|
"loss": 0.8019, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 0.00018285714285714286, |
|
"loss": 0.8066, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 32.33, |
|
"learning_rate": 0.00018476190476190478, |
|
"loss": 0.781, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 32.67, |
|
"learning_rate": 0.0001866666666666667, |
|
"loss": 0.7525, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 0.00018857142857142857, |
|
"loss": 0.7167, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 0.00019047619047619048, |
|
"loss": 0.7536, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 33.67, |
|
"learning_rate": 0.0001923809523809524, |
|
"loss": 0.6921, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 0.0001942857142857143, |
|
"loss": 0.6864, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 34.33, |
|
"learning_rate": 0.0001961904761904762, |
|
"loss": 0.7011, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 34.67, |
|
"learning_rate": 0.0001980952380952381, |
|
"loss": 0.6718, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 0.0002, |
|
"loss": 0.652, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 35.33, |
|
"learning_rate": 0.00019999944740655014, |
|
"loss": 0.6498, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 35.67, |
|
"learning_rate": 0.00019999778963230775, |
|
"loss": 0.6476, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 0.00019999502669559432, |
|
"loss": 0.6011, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 36.33, |
|
"learning_rate": 0.00019999115862694546, |
|
"loss": 0.595, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 36.67, |
|
"learning_rate": 0.00019998618546911056, |
|
"loss": 0.6427, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 0.00019998010727705236, |
|
"loss": 0.5505, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 37.33, |
|
"learning_rate": 0.00019997292411794618, |
|
"loss": 0.617, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 37.67, |
|
"learning_rate": 0.00019996463607117935, |
|
"loss": 0.537, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"learning_rate": 0.00019995524322835034, |
|
"loss": 0.5147, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 38.33, |
|
"learning_rate": 0.00019994474569326757, |
|
"loss": 0.5067, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 38.67, |
|
"learning_rate": 0.00019993314358194843, |
|
"loss": 0.5243, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"learning_rate": 0.00019992043702261793, |
|
"loss": 0.5144, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 39.33, |
|
"learning_rate": 0.0001999066261557073, |
|
"loss": 0.4697, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 39.67, |
|
"learning_rate": 0.0001998917111338525, |
|
"loss": 0.4706, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.00019987569212189224, |
|
"loss": 0.4908, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 40.33, |
|
"learning_rate": 0.00019985856929686667, |
|
"loss": 0.4193, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 40.67, |
|
"learning_rate": 0.00019984034284801502, |
|
"loss": 0.4805, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"learning_rate": 0.0001998210129767735, |
|
"loss": 0.4101, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 41.33, |
|
"learning_rate": 0.00019980057989677345, |
|
"loss": 0.3676, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"learning_rate": 0.0001997790438338385, |
|
"loss": 0.442, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"learning_rate": 0.00019975640502598244, |
|
"loss": 0.385, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 42.33, |
|
"learning_rate": 0.00019973266372340639, |
|
"loss": 0.38, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 42.67, |
|
"learning_rate": 0.0001997078201884961, |
|
"loss": 0.3504, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"learning_rate": 0.0001996818746958191, |
|
"loss": 0.3622, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 43.33, |
|
"learning_rate": 0.00019965482753212156, |
|
"loss": 0.3173, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 43.67, |
|
"learning_rate": 0.00019962667899632518, |
|
"loss": 0.3491, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"learning_rate": 0.00019959742939952392, |
|
"loss": 0.3067, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 44.33, |
|
"learning_rate": 0.00019956707906498044, |
|
"loss": 0.2769, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 44.67, |
|
"learning_rate": 0.00019953562832812272, |
|
"loss": 0.306, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 0.00019950307753654017, |
|
"loss": 0.2923, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 45.33, |
|
"learning_rate": 0.00019946942704997982, |
|
"loss": 0.2585, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 45.67, |
|
"learning_rate": 0.00019943467724034252, |
|
"loss": 0.2664, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"learning_rate": 0.00019939882849167852, |
|
"loss": 0.2606, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 46.33, |
|
"learning_rate": 0.0001993618812001836, |
|
"loss": 0.2376, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 46.67, |
|
"learning_rate": 0.00019932383577419432, |
|
"loss": 0.213, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"learning_rate": 0.00019928469263418374, |
|
"loss": 0.2371, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 47.33, |
|
"learning_rate": 0.00019924445221275675, |
|
"loss": 0.2132, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 47.67, |
|
"learning_rate": 0.00019920311495464518, |
|
"loss": 0.2004, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 0.00019916068131670302, |
|
"loss": 0.2163, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 48.33, |
|
"learning_rate": 0.0001991171517679013, |
|
"loss": 0.2106, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 48.67, |
|
"learning_rate": 0.0001990725267893228, |
|
"loss": 0.1734, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"learning_rate": 0.00019902680687415705, |
|
"loss": 0.1896, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 49.33, |
|
"learning_rate": 0.00019897999252769448, |
|
"loss": 0.1678, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 49.67, |
|
"learning_rate": 0.00019893208426732115, |
|
"loss": 0.179, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 0.00019888308262251285, |
|
"loss": 0.152, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 50.33, |
|
"learning_rate": 0.00019883298813482938, |
|
"loss": 0.1437, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 50.67, |
|
"learning_rate": 0.00019878180135790845, |
|
"loss": 0.1491, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"learning_rate": 0.00019872952285745959, |
|
"loss": 0.1562, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 51.33, |
|
"learning_rate": 0.00019867615321125795, |
|
"loss": 0.137, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 51.67, |
|
"learning_rate": 0.00019862169300913785, |
|
"loss": 0.1305, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"learning_rate": 0.0001985661428529863, |
|
"loss": 0.1266, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 52.33, |
|
"learning_rate": 0.00019850950335673643, |
|
"loss": 0.1205, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 52.67, |
|
"learning_rate": 0.00019845177514636042, |
|
"loss": 0.1262, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"learning_rate": 0.00019839295885986296, |
|
"loss": 0.1023, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 53.33, |
|
"learning_rate": 0.00019833305514727395, |
|
"loss": 0.1071, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 53.67, |
|
"learning_rate": 0.00019827206467064133, |
|
"loss": 0.0987, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"learning_rate": 0.0001982099881040239, |
|
"loss": 0.0992, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 54.33, |
|
"learning_rate": 0.0001981468261334837, |
|
"loss": 0.0812, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 54.67, |
|
"learning_rate": 0.0001980825794570786, |
|
"loss": 0.0976, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"learning_rate": 0.00019801724878485438, |
|
"loss": 0.0957, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 55.33, |
|
"learning_rate": 0.00019795083483883715, |
|
"loss": 0.0867, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 55.67, |
|
"learning_rate": 0.0001978833383530251, |
|
"loss": 0.0724, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"learning_rate": 0.00019781476007338058, |
|
"loss": 0.0926, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 56.33, |
|
"learning_rate": 0.00019774510075782172, |
|
"loss": 0.0717, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 56.67, |
|
"learning_rate": 0.00019767436117621413, |
|
"loss": 0.0764, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"learning_rate": 0.00019760254211036244, |
|
"loss": 0.073, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 57.33, |
|
"learning_rate": 0.00019752964435400155, |
|
"loss": 0.065, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 57.67, |
|
"learning_rate": 0.00019745566871278794, |
|
"loss": 0.0695, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"learning_rate": 0.00019738061600429064, |
|
"loss": 0.0638, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 58.33, |
|
"learning_rate": 0.00019730448705798239, |
|
"loss": 0.0583, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 58.67, |
|
"learning_rate": 0.00019722728271523034, |
|
"loss": 0.0594, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"learning_rate": 0.00019714900382928675, |
|
"loss": 0.0574, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 59.33, |
|
"learning_rate": 0.00019706965126527963, |
|
"loss": 0.0489, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 59.67, |
|
"learning_rate": 0.00019698922590020312, |
|
"loss": 0.0602, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 0.0001969077286229078, |
|
"loss": 0.0438, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 60.33, |
|
"learning_rate": 0.00019682516033409092, |
|
"loss": 0.047, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 60.67, |
|
"learning_rate": 0.00019674152194628638, |
|
"loss": 0.0437, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"learning_rate": 0.00019665681438385473, |
|
"loss": 0.0533, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 61.33, |
|
"learning_rate": 0.0001965710385829728, |
|
"loss": 0.0463, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 61.67, |
|
"learning_rate": 0.00019648419549162348, |
|
"loss": 0.0441, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"learning_rate": 0.00019639628606958533, |
|
"loss": 0.0398, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 62.33, |
|
"learning_rate": 0.0001963073112884217, |
|
"loss": 0.0396, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 62.67, |
|
"learning_rate": 0.00019621727213147027, |
|
"loss": 0.0372, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"learning_rate": 0.0001961261695938319, |
|
"loss": 0.0447, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 63.33, |
|
"learning_rate": 0.00019603400468235998, |
|
"loss": 0.0366, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 63.67, |
|
"learning_rate": 0.00019594077841564907, |
|
"loss": 0.0369, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"learning_rate": 0.00019584649182402357, |
|
"loss": 0.0338, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 64.33, |
|
"learning_rate": 0.0001957511459495266, |
|
"loss": 0.0337, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 64.67, |
|
"learning_rate": 0.00019565474184590826, |
|
"loss": 0.0331, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"learning_rate": 0.0001955572805786141, |
|
"loss": 0.0345, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 65.33, |
|
"learning_rate": 0.0001954587632247732, |
|
"loss": 0.0294, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 65.67, |
|
"learning_rate": 0.00019535919087318652, |
|
"loss": 0.031, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"learning_rate": 0.0001952585646243146, |
|
"loss": 0.0303, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 66.33, |
|
"learning_rate": 0.00019515688559026563, |
|
"loss": 0.0289, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 66.67, |
|
"learning_rate": 0.0001950541548947829, |
|
"loss": 0.0276, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"learning_rate": 0.00019495037367323262, |
|
"loss": 0.0272, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 67.33, |
|
"learning_rate": 0.0001948455430725913, |
|
"loss": 0.0255, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 67.67, |
|
"learning_rate": 0.00019473966425143292, |
|
"loss": 0.0261, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"learning_rate": 0.00019463273837991643, |
|
"loss": 0.0278, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 68.33, |
|
"learning_rate": 0.00019452476663977248, |
|
"loss": 0.0225, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 68.67, |
|
"learning_rate": 0.00019441575022429065, |
|
"loss": 0.0236, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"learning_rate": 0.00019430569033830605, |
|
"loss": 0.0261, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 69.33, |
|
"learning_rate": 0.00019419458819818614, |
|
"loss": 0.0218, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 69.67, |
|
"learning_rate": 0.00019408244503181724, |
|
"loss": 0.0222, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"learning_rate": 0.00019396926207859084, |
|
"loss": 0.0224, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 70.33, |
|
"learning_rate": 0.00019385504058939024, |
|
"loss": 0.0207, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 70.67, |
|
"learning_rate": 0.00019373978182657625, |
|
"loss": 0.0207, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"learning_rate": 0.00019362348706397373, |
|
"loss": 0.0194, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 71.33, |
|
"learning_rate": 0.00019350615758685708, |
|
"loss": 0.0167, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 71.67, |
|
"learning_rate": 0.00019338779469193639, |
|
"loss": 0.02, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"learning_rate": 0.00019326839968734279, |
|
"loss": 0.0196, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 72.33, |
|
"learning_rate": 0.00019314797389261424, |
|
"loss": 0.0155, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 72.67, |
|
"learning_rate": 0.00019302651863868092, |
|
"loss": 0.019, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"learning_rate": 0.00019290403526785025, |
|
"loss": 0.0174, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 73.33, |
|
"learning_rate": 0.00019278052513379255, |
|
"loss": 0.0172, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 73.67, |
|
"learning_rate": 0.00019265598960152555, |
|
"loss": 0.0164, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"learning_rate": 0.00019253043004739968, |
|
"loss": 0.0149, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 74.33, |
|
"learning_rate": 0.00019240384785908265, |
|
"loss": 0.0147, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 74.67, |
|
"learning_rate": 0.00019227624443554425, |
|
"loss": 0.0156, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 0.00019214762118704076, |
|
"loss": 0.0151, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 75.33, |
|
"learning_rate": 0.00019201797953509955, |
|
"loss": 0.0153, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 75.67, |
|
"learning_rate": 0.00019188732091250307, |
|
"loss": 0.0131, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"learning_rate": 0.00019175564676327339, |
|
"loss": 0.0136, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 76.33, |
|
"learning_rate": 0.00019162295854265594, |
|
"loss": 0.0142, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 76.67, |
|
"learning_rate": 0.00019148925771710347, |
|
"loss": 0.0124, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"learning_rate": 0.0001913545457642601, |
|
"loss": 0.0127, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 77.33, |
|
"learning_rate": 0.00019121882417294462, |
|
"loss": 0.0119, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 77.67, |
|
"learning_rate": 0.00019108209444313433, |
|
"loss": 0.0128, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"learning_rate": 0.00019094435808594823, |
|
"loss": 0.0117, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 78.33, |
|
"learning_rate": 0.0001908056166236305, |
|
"loss": 0.0122, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 78.67, |
|
"learning_rate": 0.00019066587158953366, |
|
"loss": 0.0115, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"learning_rate": 0.0001905251245281015, |
|
"loss": 0.0108, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 79.33, |
|
"learning_rate": 0.00019038337699485208, |
|
"loss": 0.0104, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 79.67, |
|
"learning_rate": 0.00019024063055636057, |
|
"loss": 0.0108, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 0.0001900968867902419, |
|
"loss": 0.0099, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 80.33, |
|
"learning_rate": 0.00018995214728513343, |
|
"loss": 0.0099, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 80.67, |
|
"learning_rate": 0.0001898064136406771, |
|
"loss": 0.0096, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"learning_rate": 0.0001896596874675021, |
|
"loss": 0.0101, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 81.33, |
|
"learning_rate": 0.00018951197038720688, |
|
"loss": 0.0105, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 81.67, |
|
"learning_rate": 0.00018936326403234125, |
|
"loss": 0.0088, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"learning_rate": 0.00018921357004638835, |
|
"loss": 0.0097, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 82.33, |
|
"learning_rate": 0.00018906289008374655, |
|
"loss": 0.0084, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 82.67, |
|
"learning_rate": 0.00018891122580971098, |
|
"loss": 0.0095, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"learning_rate": 0.00018875857890045543, |
|
"loss": 0.0084, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 83.33, |
|
"learning_rate": 0.00018860495104301345, |
|
"loss": 0.0074, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 83.67, |
|
"learning_rate": 0.00018845034393526005, |
|
"loss": 0.0088, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"learning_rate": 0.00018829475928589271, |
|
"loss": 0.0078, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 84.33, |
|
"learning_rate": 0.0001881381988144126, |
|
"loss": 0.0081, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 84.67, |
|
"learning_rate": 0.0001879806642511055, |
|
"loss": 0.0071, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"learning_rate": 0.00018782215733702286, |
|
"loss": 0.0069, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 85.33, |
|
"learning_rate": 0.00018766267982396224, |
|
"loss": 0.007, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 85.67, |
|
"learning_rate": 0.00018750223347444828, |
|
"loss": 0.0074, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"learning_rate": 0.00018734082006171299, |
|
"loss": 0.0065, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 86.33, |
|
"learning_rate": 0.00018717844136967624, |
|
"loss": 0.0063, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 86.67, |
|
"learning_rate": 0.00018701509919292613, |
|
"loss": 0.0072, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"learning_rate": 0.0001868507953366989, |
|
"loss": 0.006, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 87.33, |
|
"learning_rate": 0.00018668553161685933, |
|
"loss": 0.0062, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 87.67, |
|
"learning_rate": 0.00018651930985988036, |
|
"loss": 0.0059, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"learning_rate": 0.0001863521319028231, |
|
"loss": 0.0068, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 88.33, |
|
"learning_rate": 0.0001861839995933164, |
|
"loss": 0.0056, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 88.67, |
|
"learning_rate": 0.00018601491478953657, |
|
"loss": 0.0058, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"learning_rate": 0.00018584487936018661, |
|
"loss": 0.0063, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 89.33, |
|
"learning_rate": 0.0001856738951844759, |
|
"loss": 0.0057, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 89.67, |
|
"learning_rate": 0.00018550196415209914, |
|
"loss": 0.006, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"learning_rate": 0.00018532908816321558, |
|
"loss": 0.0062, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 90.33, |
|
"learning_rate": 0.00018515526912842796, |
|
"loss": 0.0059, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 90.67, |
|
"learning_rate": 0.0001849805089687615, |
|
"loss": 0.0071, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"learning_rate": 0.0001848048096156426, |
|
"loss": 0.0053, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 91.33, |
|
"learning_rate": 0.00018462817301087748, |
|
"loss": 0.0061, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 91.67, |
|
"learning_rate": 0.0001844506011066308, |
|
"loss": 0.0049, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"learning_rate": 0.0001842720958654039, |
|
"loss": 0.0057, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 92.33, |
|
"learning_rate": 0.00018409265926001343, |
|
"loss": 0.0052, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 92.67, |
|
"learning_rate": 0.00018391229327356916, |
|
"loss": 0.0052, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"learning_rate": 0.00018373099989945236, |
|
"loss": 0.0051, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 93.33, |
|
"learning_rate": 0.00018354878114129367, |
|
"loss": 0.0054, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 93.67, |
|
"learning_rate": 0.0001833656390129509, |
|
"loss": 0.0051, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"learning_rate": 0.0001831815755384869, |
|
"loss": 0.0049, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 94.33, |
|
"learning_rate": 0.00018299659275214706, |
|
"loss": 0.0043, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 94.67, |
|
"learning_rate": 0.00018281069269833692, |
|
"loss": 0.005, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"learning_rate": 0.0001826238774315995, |
|
"loss": 0.0058, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 95.33, |
|
"learning_rate": 0.00018243614901659264, |
|
"loss": 0.0049, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 95.67, |
|
"learning_rate": 0.00018224750952806624, |
|
"loss": 0.0049, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"learning_rate": 0.00018205796105083915, |
|
"loss": 0.0047, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 96.33, |
|
"learning_rate": 0.00018186750567977637, |
|
"loss": 0.0046, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 96.67, |
|
"learning_rate": 0.00018167614551976567, |
|
"loss": 0.0051, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"learning_rate": 0.00018148388268569453, |
|
"loss": 0.0052, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 97.33, |
|
"learning_rate": 0.00018129071930242648, |
|
"loss": 0.0042, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 97.67, |
|
"learning_rate": 0.00018109665750477806, |
|
"loss": 0.0054, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 0.0054, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 98.33, |
|
"learning_rate": 0.00018070584725522762, |
|
"loss": 0.0041, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 98.67, |
|
"learning_rate": 0.00018050910312250931, |
|
"loss": 0.0051, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"learning_rate": 0.00018031146921373018, |
|
"loss": 0.0047, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 99.33, |
|
"learning_rate": 0.00018011294771311435, |
|
"loss": 0.0039, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 99.67, |
|
"learning_rate": 0.00017991354081469538, |
|
"loss": 0.0048, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 0.00017971325072229226, |
|
"loss": 0.0048, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 100.33, |
|
"learning_rate": 0.0001795120796494848, |
|
"loss": 0.0043, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 100.67, |
|
"learning_rate": 0.00017931002981958933, |
|
"loss": 0.0044, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 101.0, |
|
"learning_rate": 0.00017910710346563416, |
|
"loss": 0.0041, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 101.33, |
|
"learning_rate": 0.00017890330283033468, |
|
"loss": 0.0044, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 101.67, |
|
"learning_rate": 0.0001786986301660689, |
|
"loss": 0.0045, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 102.0, |
|
"learning_rate": 0.00017849308773485226, |
|
"loss": 0.0035, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 102.33, |
|
"learning_rate": 0.00017828667780831278, |
|
"loss": 0.0039, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 102.67, |
|
"learning_rate": 0.00017807940266766593, |
|
"loss": 0.0043, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 103.0, |
|
"learning_rate": 0.0001778712646036894, |
|
"loss": 0.0041, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 103.33, |
|
"learning_rate": 0.00017766226591669785, |
|
"loss": 0.004, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 103.67, |
|
"learning_rate": 0.00017745240891651735, |
|
"loss": 0.0036, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"learning_rate": 0.00017724169592245995, |
|
"loss": 0.0047, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 104.33, |
|
"learning_rate": 0.00017703012926329815, |
|
"loss": 0.0038, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 104.67, |
|
"learning_rate": 0.0001768177112772388, |
|
"loss": 0.0039, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"learning_rate": 0.0001766044443118978, |
|
"loss": 0.0038, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 105.33, |
|
"learning_rate": 0.00017639033072427366, |
|
"loss": 0.004, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 105.67, |
|
"learning_rate": 0.0001761753728807217, |
|
"loss": 0.004, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 106.0, |
|
"learning_rate": 0.00017595957315692782, |
|
"loss": 0.0033, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 106.33, |
|
"learning_rate": 0.00017574293393788235, |
|
"loss": 0.0039, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 106.67, |
|
"learning_rate": 0.0001755254576178535, |
|
"loss": 0.0041, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 107.0, |
|
"learning_rate": 0.00017530714660036112, |
|
"loss": 0.0033, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 107.33, |
|
"learning_rate": 0.00017508800329814995, |
|
"loss": 0.0036, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 107.67, |
|
"learning_rate": 0.000174868030133163, |
|
"loss": 0.0036, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"learning_rate": 0.00017464722953651504, |
|
"loss": 0.0039, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 108.33, |
|
"learning_rate": 0.00017442560394846516, |
|
"loss": 0.0039, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 108.67, |
|
"learning_rate": 0.00017420315581839044, |
|
"loss": 0.0036, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 109.0, |
|
"learning_rate": 0.0001739798876047584, |
|
"loss": 0.0035, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 109.33, |
|
"learning_rate": 0.00017375580177510016, |
|
"loss": 0.0039, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 109.67, |
|
"learning_rate": 0.0001735309008059829, |
|
"loss": 0.0032, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"learning_rate": 0.00017330518718298264, |
|
"loss": 0.0037, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 110.33, |
|
"learning_rate": 0.00017307866340065685, |
|
"loss": 0.0041, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 110.67, |
|
"learning_rate": 0.00017285133196251663, |
|
"loss": 0.0033, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 111.0, |
|
"learning_rate": 0.0001726231953809993, |
|
"loss": 0.0033, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 111.33, |
|
"learning_rate": 0.00017239425617744048, |
|
"loss": 0.0033, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 111.67, |
|
"learning_rate": 0.0001721645168820462, |
|
"loss": 0.0038, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"learning_rate": 0.0001719339800338651, |
|
"loss": 0.0036, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 112.33, |
|
"learning_rate": 0.00017170264818076026, |
|
"loss": 0.0033, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 112.67, |
|
"learning_rate": 0.0001714705238793809, |
|
"loss": 0.0035, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 113.0, |
|
"learning_rate": 0.0001712376096951345, |
|
"loss": 0.0038, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 113.33, |
|
"learning_rate": 0.00017100390820215804, |
|
"loss": 0.0034, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 113.67, |
|
"learning_rate": 0.00017076942198328987, |
|
"loss": 0.0034, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 114.0, |
|
"learning_rate": 0.0001705341536300409, |
|
"loss": 0.0035, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 114.33, |
|
"learning_rate": 0.0001702981057425662, |
|
"loss": 0.0033, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 114.67, |
|
"learning_rate": 0.00017006128092963605, |
|
"loss": 0.0036, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 115.0, |
|
"learning_rate": 0.00016982368180860728, |
|
"loss": 0.0034, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 115.33, |
|
"learning_rate": 0.00016958531100539427, |
|
"loss": 0.0032, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 115.67, |
|
"learning_rate": 0.00016934617115443992, |
|
"loss": 0.0033, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"learning_rate": 0.00016910626489868649, |
|
"loss": 0.004, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 116.33, |
|
"learning_rate": 0.00016886559488954648, |
|
"loss": 0.0032, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 116.67, |
|
"learning_rate": 0.0001686241637868734, |
|
"loss": 0.0035, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 116.67, |
|
"eval_loss": 1.2795522212982178, |
|
"eval_runtime": 3.5029, |
|
"eval_samples_per_second": 5.995, |
|
"eval_steps_per_second": 0.856, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 117.0, |
|
"learning_rate": 0.00016838197425893202, |
|
"loss": 0.0038, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 117.33, |
|
"learning_rate": 0.00016813902898236939, |
|
"loss": 0.0033, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 117.67, |
|
"learning_rate": 0.00016789533064218485, |
|
"loss": 0.0035, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 118.0, |
|
"learning_rate": 0.00016765088193170053, |
|
"loss": 0.0033, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 118.33, |
|
"learning_rate": 0.00016740568555253155, |
|
"loss": 0.0037, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 118.67, |
|
"learning_rate": 0.00016715974421455617, |
|
"loss": 0.0032, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 119.0, |
|
"learning_rate": 0.00016691306063588583, |
|
"loss": 0.0031, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 119.33, |
|
"learning_rate": 0.00016666563754283515, |
|
"loss": 0.0035, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 119.67, |
|
"learning_rate": 0.0001664174776698917, |
|
"loss": 0.0031, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"learning_rate": 0.00016616858375968595, |
|
"loss": 0.0033, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 120.33, |
|
"learning_rate": 0.00016591895856296073, |
|
"loss": 0.0036, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 120.67, |
|
"learning_rate": 0.00016566860483854104, |
|
"loss": 0.0032, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 121.0, |
|
"learning_rate": 0.00016541752535330345, |
|
"loss": 0.0032, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 121.33, |
|
"learning_rate": 0.00016516572288214552, |
|
"loss": 0.003, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 121.67, |
|
"learning_rate": 0.0001649132002079552, |
|
"loss": 0.003, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 122.0, |
|
"learning_rate": 0.00016465996012157995, |
|
"loss": 0.0036, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 122.33, |
|
"learning_rate": 0.00016440600542179615, |
|
"loss": 0.0036, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 122.67, |
|
"learning_rate": 0.0001641513389152777, |
|
"loss": 0.0032, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 123.0, |
|
"learning_rate": 0.0001638959634165656, |
|
"loss": 0.0027, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 123.33, |
|
"learning_rate": 0.00016363988174803638, |
|
"loss": 0.0034, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 123.67, |
|
"learning_rate": 0.00016338309673987101, |
|
"loss": 0.0033, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"learning_rate": 0.0001631256112300239, |
|
"loss": 0.0027, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 124.33, |
|
"learning_rate": 0.00016286742806419108, |
|
"loss": 0.0033, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 124.67, |
|
"learning_rate": 0.0001626085500957791, |
|
"loss": 0.0031, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"learning_rate": 0.00016234898018587337, |
|
"loss": 0.0032, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 125.33, |
|
"learning_rate": 0.0001620887212032065, |
|
"loss": 0.003, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 125.67, |
|
"learning_rate": 0.00016182777602412665, |
|
"loss": 0.0034, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 126.0, |
|
"learning_rate": 0.0001615661475325658, |
|
"loss": 0.0031, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 126.33, |
|
"learning_rate": 0.0001613038386200078, |
|
"loss": 0.0031, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 126.67, |
|
"learning_rate": 0.00016104085218545633, |
|
"loss": 0.0034, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 127.0, |
|
"learning_rate": 0.00016077719113540302, |
|
"loss": 0.0027, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 127.33, |
|
"learning_rate": 0.00016051285838379525, |
|
"loss": 0.0035, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 127.67, |
|
"learning_rate": 0.00016024785685200395, |
|
"loss": 0.0029, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"learning_rate": 0.00015998218946879138, |
|
"loss": 0.0027, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 128.33, |
|
"learning_rate": 0.00015971585917027862, |
|
"loss": 0.0035, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 128.67, |
|
"learning_rate": 0.00015944886889991325, |
|
"loss": 0.0029, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 129.0, |
|
"learning_rate": 0.00015918122160843678, |
|
"loss": 0.0029, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 129.33, |
|
"learning_rate": 0.000158912920253852, |
|
"loss": 0.0032, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 129.67, |
|
"learning_rate": 0.0001586439678013903, |
|
"loss": 0.0029, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 130.0, |
|
"learning_rate": 0.000158374367223479, |
|
"loss": 0.0032, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 130.33, |
|
"learning_rate": 0.00015810412149970833, |
|
"loss": 0.0027, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 130.67, |
|
"learning_rate": 0.00015783323361679864, |
|
"loss": 0.0034, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 131.0, |
|
"learning_rate": 0.00015756170656856737, |
|
"loss": 0.0032, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 131.33, |
|
"learning_rate": 0.0001572895433558958, |
|
"loss": 0.0033, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 131.67, |
|
"learning_rate": 0.0001570167469866962, |
|
"loss": 0.0027, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"learning_rate": 0.0001567433204758782, |
|
"loss": 0.003, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 132.33, |
|
"learning_rate": 0.00015646926684531585, |
|
"loss": 0.003, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 132.67, |
|
"learning_rate": 0.00015619458912381396, |
|
"loss": 0.003, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 133.0, |
|
"learning_rate": 0.0001559192903470747, |
|
"loss": 0.003, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 133.33, |
|
"learning_rate": 0.00015564337355766412, |
|
"loss": 0.0033, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 133.67, |
|
"learning_rate": 0.0001553668418049784, |
|
"loss": 0.0027, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 134.0, |
|
"learning_rate": 0.00015508969814521025, |
|
"loss": 0.003, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 134.33, |
|
"learning_rate": 0.00015481194564131512, |
|
"loss": 0.0028, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 134.67, |
|
"learning_rate": 0.00015453358736297729, |
|
"loss": 0.0029, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 135.0, |
|
"learning_rate": 0.00015425462638657595, |
|
"loss": 0.0031, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 135.33, |
|
"learning_rate": 0.0001539750657951513, |
|
"loss": 0.0027, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 135.67, |
|
"learning_rate": 0.00015369490867837035, |
|
"loss": 0.0029, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"learning_rate": 0.00015341415813249288, |
|
"loss": 0.0032, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 136.33, |
|
"learning_rate": 0.00015313281726033715, |
|
"loss": 0.0031, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 136.67, |
|
"learning_rate": 0.00015285088917124556, |
|
"loss": 0.0029, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 137.0, |
|
"learning_rate": 0.00015256837698105047, |
|
"loss": 0.0028, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 137.33, |
|
"learning_rate": 0.00015228528381203962, |
|
"loss": 0.003, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 137.67, |
|
"learning_rate": 0.00015200161279292155, |
|
"loss": 0.0029, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 138.0, |
|
"learning_rate": 0.00015171736705879126, |
|
"loss": 0.0028, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 138.33, |
|
"learning_rate": 0.00015143254975109538, |
|
"loss": 0.0025, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 138.67, |
|
"learning_rate": 0.0001511471640175974, |
|
"loss": 0.0031, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 139.0, |
|
"learning_rate": 0.00015086121301234316, |
|
"loss": 0.0029, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 139.33, |
|
"learning_rate": 0.00015057469989562567, |
|
"loss": 0.0027, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 139.67, |
|
"learning_rate": 0.00015028762783395034, |
|
"loss": 0.0028, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 0.0031, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 140.33, |
|
"learning_rate": 0.0001497118195725998, |
|
"loss": 0.0029, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 140.67, |
|
"learning_rate": 0.0001494230897366821, |
|
"loss": 0.003, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 141.0, |
|
"learning_rate": 0.00014913381368325115, |
|
"loss": 0.0027, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 141.33, |
|
"learning_rate": 0.00014884399460934805, |
|
"loss": 0.003, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 141.67, |
|
"learning_rate": 0.00014855363571801523, |
|
"loss": 0.0025, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 142.0, |
|
"learning_rate": 0.0001482627402182611, |
|
"loss": 0.003, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 142.33, |
|
"learning_rate": 0.00014797131132502465, |
|
"loss": 0.0028, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 142.67, |
|
"learning_rate": 0.00014767935225913975, |
|
"loss": 0.0027, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 143.0, |
|
"learning_rate": 0.00014738686624729986, |
|
"loss": 0.0031, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 143.33, |
|
"learning_rate": 0.00014709385652202203, |
|
"loss": 0.0029, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 143.67, |
|
"learning_rate": 0.0001468003263216113, |
|
"loss": 0.0029, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"learning_rate": 0.00014650627889012507, |
|
"loss": 0.0025, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 144.33, |
|
"learning_rate": 0.00014621171747733697, |
|
"loss": 0.0028, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 144.67, |
|
"learning_rate": 0.00014591664533870118, |
|
"loss": 0.0029, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 145.0, |
|
"learning_rate": 0.0001456210657353163, |
|
"loss": 0.0026, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 145.33, |
|
"learning_rate": 0.0001453249819338894, |
|
"loss": 0.0029, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 145.67, |
|
"learning_rate": 0.00014502839720669989, |
|
"loss": 0.0027, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 146.0, |
|
"learning_rate": 0.00014473131483156327, |
|
"loss": 0.0027, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 146.33, |
|
"learning_rate": 0.00014443373809179508, |
|
"loss": 0.0028, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 146.67, |
|
"learning_rate": 0.0001441356702761744, |
|
"loss": 0.0028, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 147.0, |
|
"learning_rate": 0.00014383711467890774, |
|
"loss": 0.0026, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 147.33, |
|
"learning_rate": 0.00014353807459959242, |
|
"loss": 0.0027, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 147.67, |
|
"learning_rate": 0.00014323855334318026, |
|
"loss": 0.0026, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 148.0, |
|
"learning_rate": 0.00014293855421994094, |
|
"loss": 0.0028, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 148.33, |
|
"learning_rate": 0.0001426380805454254, |
|
"loss": 0.0028, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 148.67, |
|
"learning_rate": 0.00014233713564042937, |
|
"loss": 0.0027, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 149.0, |
|
"learning_rate": 0.00014203572283095657, |
|
"loss": 0.0026, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 149.33, |
|
"learning_rate": 0.0001417338454481818, |
|
"loss": 0.0024, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 149.67, |
|
"learning_rate": 0.00014143150682841438, |
|
"loss": 0.0029, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"learning_rate": 0.00014112871031306119, |
|
"loss": 0.0028, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 150.33, |
|
"learning_rate": 0.00014082545924858954, |
|
"loss": 0.0027, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 150.67, |
|
"learning_rate": 0.00014052175698649053, |
|
"loss": 0.0029, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 151.0, |
|
"learning_rate": 0.00014021760688324176, |
|
"loss": 0.0023, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 151.33, |
|
"learning_rate": 0.0001399130123002703, |
|
"loss": 0.0028, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 151.67, |
|
"learning_rate": 0.0001396079766039157, |
|
"loss": 0.0027, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"learning_rate": 0.00013930250316539238, |
|
"loss": 0.0025, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 152.33, |
|
"learning_rate": 0.0001389965953607528, |
|
"loss": 0.0027, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 152.67, |
|
"learning_rate": 0.00013869025657084995, |
|
"loss": 0.0028, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 153.0, |
|
"learning_rate": 0.00013838349018130007, |
|
"loss": 0.0024, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 153.33, |
|
"learning_rate": 0.00013807629958244498, |
|
"loss": 0.0026, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 153.67, |
|
"learning_rate": 0.00013776868816931502, |
|
"loss": 0.0027, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 154.0, |
|
"learning_rate": 0.00013746065934159123, |
|
"loss": 0.0025, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 154.33, |
|
"learning_rate": 0.0001371522165035678, |
|
"loss": 0.0027, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 154.67, |
|
"learning_rate": 0.00013684336306411468, |
|
"loss": 0.0026, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 155.0, |
|
"learning_rate": 0.00013653410243663952, |
|
"loss": 0.0026, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 155.33, |
|
"learning_rate": 0.00013622443803905027, |
|
"loss": 0.0026, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 155.67, |
|
"learning_rate": 0.00013591437329371736, |
|
"loss": 0.0026, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 156.0, |
|
"learning_rate": 0.00013560391162743569, |
|
"loss": 0.0027, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 156.33, |
|
"learning_rate": 0.00013529305647138687, |
|
"loss": 0.003, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 156.67, |
|
"learning_rate": 0.0001349818112611015, |
|
"loss": 0.0024, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 157.0, |
|
"learning_rate": 0.00013467017943642073, |
|
"loss": 0.0024, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 157.33, |
|
"learning_rate": 0.0001343581644414587, |
|
"loss": 0.0024, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 157.67, |
|
"learning_rate": 0.00013404576972456431, |
|
"loss": 0.0027, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 158.0, |
|
"learning_rate": 0.00013373299873828303, |
|
"loss": 0.0026, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 158.33, |
|
"learning_rate": 0.00013341985493931877, |
|
"loss": 0.0026, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 158.67, |
|
"learning_rate": 0.0001331063417884958, |
|
"loss": 0.0024, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 159.0, |
|
"learning_rate": 0.00013279246275072046, |
|
"loss": 0.0028, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 159.33, |
|
"learning_rate": 0.00013247822129494266, |
|
"loss": 0.0026, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 159.67, |
|
"learning_rate": 0.00013216362089411783, |
|
"loss": 0.0026, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"learning_rate": 0.00013184866502516845, |
|
"loss": 0.0027, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 160.33, |
|
"learning_rate": 0.00013153335716894544, |
|
"loss": 0.0028, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 160.67, |
|
"learning_rate": 0.00013121770081018998, |
|
"loss": 0.0026, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 161.0, |
|
"learning_rate": 0.00013090169943749476, |
|
"loss": 0.0024, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 161.33, |
|
"learning_rate": 0.00013058535654326554, |
|
"loss": 0.0027, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 161.67, |
|
"learning_rate": 0.0001302686756236826, |
|
"loss": 0.0025, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 162.0, |
|
"learning_rate": 0.00012995166017866193, |
|
"loss": 0.0027, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 162.33, |
|
"learning_rate": 0.00012963431371181672, |
|
"loss": 0.0025, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 162.67, |
|
"learning_rate": 0.00012931663973041855, |
|
"loss": 0.0026, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 163.0, |
|
"learning_rate": 0.00012899864174535864, |
|
"loss": 0.0025, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 163.33, |
|
"learning_rate": 0.00012868032327110904, |
|
"loss": 0.0026, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 163.67, |
|
"learning_rate": 0.00012836168782568385, |
|
"loss": 0.0027, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 164.0, |
|
"learning_rate": 0.00012804273893060028, |
|
"loss": 0.0023, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 164.33, |
|
"learning_rate": 0.00012772348011083973, |
|
"loss": 0.0025, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 164.67, |
|
"learning_rate": 0.00012740391489480884, |
|
"loss": 0.0026, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 165.0, |
|
"learning_rate": 0.00012708404681430053, |
|
"loss": 0.0027, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 165.33, |
|
"learning_rate": 0.0001267638794044549, |
|
"loss": 0.0026, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 165.67, |
|
"learning_rate": 0.00012644341620372023, |
|
"loss": 0.0026, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 166.0, |
|
"learning_rate": 0.00012612266075381386, |
|
"loss": 0.0024, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 166.33, |
|
"learning_rate": 0.00012580161659968294, |
|
"loss": 0.0026, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 166.67, |
|
"learning_rate": 0.0001254802872894655, |
|
"loss": 0.0025, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 167.0, |
|
"learning_rate": 0.00012515867637445086, |
|
"loss": 0.0027, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 167.33, |
|
"learning_rate": 0.00012483678740904082, |
|
"loss": 0.0028, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 167.67, |
|
"learning_rate": 0.00012451462395071, |
|
"loss": 0.0024, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 168.0, |
|
"learning_rate": 0.00012419218955996676, |
|
"loss": 0.0024, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 168.33, |
|
"learning_rate": 0.0001238694878003138, |
|
"loss": 0.0024, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 168.67, |
|
"learning_rate": 0.00012354652223820858, |
|
"loss": 0.0022, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 169.0, |
|
"learning_rate": 0.00012322329644302426, |
|
"loss": 0.0031, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 169.33, |
|
"learning_rate": 0.00012289981398700995, |
|
"loss": 0.0022, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 169.67, |
|
"learning_rate": 0.00012257607844525146, |
|
"loss": 0.0026, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 170.0, |
|
"learning_rate": 0.00012225209339563145, |
|
"loss": 0.0027, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 170.33, |
|
"learning_rate": 0.00012192786241879033, |
|
"loss": 0.0024, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 170.67, |
|
"learning_rate": 0.0001216033890980864, |
|
"loss": 0.0025, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 171.0, |
|
"learning_rate": 0.00012127867701955622, |
|
"loss": 0.0026, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 171.33, |
|
"learning_rate": 0.0001209537297718752, |
|
"loss": 0.0026, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 171.67, |
|
"learning_rate": 0.00012062855094631778, |
|
"loss": 0.0023, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 172.0, |
|
"learning_rate": 0.00012030314413671762, |
|
"loss": 0.0027, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 172.33, |
|
"learning_rate": 0.00011997751293942827, |
|
"loss": 0.0027, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 172.67, |
|
"learning_rate": 0.00011965166095328301, |
|
"loss": 0.0023, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 173.0, |
|
"learning_rate": 0.00011932559177955533, |
|
"loss": 0.0024, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 173.33, |
|
"learning_rate": 0.00011899930902191902, |
|
"loss": 0.0024, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 173.67, |
|
"learning_rate": 0.00011867281628640835, |
|
"loss": 0.0026, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 174.0, |
|
"learning_rate": 0.00011834611718137824, |
|
"loss": 0.0024, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 174.33, |
|
"learning_rate": 0.00011801921531746444, |
|
"loss": 0.0023, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 174.67, |
|
"learning_rate": 0.00011769211430754357, |
|
"loss": 0.0025, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 175.0, |
|
"learning_rate": 0.00011736481776669306, |
|
"loss": 0.0025, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 175.33, |
|
"learning_rate": 0.00011703732931215141, |
|
"loss": 0.0024, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 175.67, |
|
"learning_rate": 0.00011670965256327818, |
|
"loss": 0.0024, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 176.0, |
|
"learning_rate": 0.00011638179114151377, |
|
"loss": 0.0024, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 176.33, |
|
"learning_rate": 0.00011605374867033977, |
|
"loss": 0.0024, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 176.67, |
|
"learning_rate": 0.00011572552877523854, |
|
"loss": 0.0024, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 177.0, |
|
"learning_rate": 0.00011539713508365335, |
|
"loss": 0.0025, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 177.33, |
|
"learning_rate": 0.00011506857122494831, |
|
"loss": 0.0024, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 177.67, |
|
"learning_rate": 0.00011473984083036813, |
|
"loss": 0.0028, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 178.0, |
|
"learning_rate": 0.00011441094753299801, |
|
"loss": 0.0021, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 178.33, |
|
"learning_rate": 0.00011408189496772368, |
|
"loss": 0.0027, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 178.67, |
|
"learning_rate": 0.00011375268677119089, |
|
"loss": 0.0024, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 179.0, |
|
"learning_rate": 0.00011342332658176555, |
|
"loss": 0.0022, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 179.33, |
|
"learning_rate": 0.00011309381803949333, |
|
"loss": 0.0027, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 179.67, |
|
"learning_rate": 0.00011276416478605949, |
|
"loss": 0.0024, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 180.0, |
|
"learning_rate": 0.00011243437046474853, |
|
"loss": 0.0023, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 180.33, |
|
"learning_rate": 0.00011210443872040414, |
|
"loss": 0.0022, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 180.67, |
|
"learning_rate": 0.00011177437319938875, |
|
"loss": 0.0026, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 181.0, |
|
"learning_rate": 0.0001114441775495432, |
|
"loss": 0.0029, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 181.33, |
|
"learning_rate": 0.00011111385542014663, |
|
"loss": 0.0025, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 181.67, |
|
"learning_rate": 0.00011078341046187589, |
|
"loss": 0.0022, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 182.0, |
|
"learning_rate": 0.00011045284632676536, |
|
"loss": 0.0027, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 182.33, |
|
"learning_rate": 0.00011012216666816659, |
|
"loss": 0.0025, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 182.67, |
|
"learning_rate": 0.00010979137514070782, |
|
"loss": 0.0025, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 183.0, |
|
"learning_rate": 0.00010946047540025372, |
|
"loss": 0.0024, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 183.33, |
|
"learning_rate": 0.00010912947110386484, |
|
"loss": 0.0024, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 183.67, |
|
"learning_rate": 0.00010879836590975731, |
|
"loss": 0.0024, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 184.0, |
|
"learning_rate": 0.00010846716347726233, |
|
"loss": 0.0025, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 184.33, |
|
"learning_rate": 0.00010813586746678583, |
|
"loss": 0.0026, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 184.67, |
|
"learning_rate": 0.00010780448153976793, |
|
"loss": 0.0023, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 185.0, |
|
"learning_rate": 0.00010747300935864243, |
|
"loss": 0.0023, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 185.33, |
|
"learning_rate": 0.00010714145458679649, |
|
"loss": 0.0027, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 185.67, |
|
"learning_rate": 0.00010680982088853002, |
|
"loss": 0.0022, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 186.0, |
|
"learning_rate": 0.00010647811192901518, |
|
"loss": 0.0023, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 186.33, |
|
"learning_rate": 0.00010614633137425598, |
|
"loss": 0.0022, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 186.67, |
|
"learning_rate": 0.00010581448289104758, |
|
"loss": 0.0025, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 187.0, |
|
"learning_rate": 0.00010548257014693601, |
|
"loss": 0.0027, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 187.33, |
|
"learning_rate": 0.0001051505968101774, |
|
"loss": 0.0023, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 187.67, |
|
"learning_rate": 0.00010481856654969758, |
|
"loss": 0.0027, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 188.0, |
|
"learning_rate": 0.00010448648303505151, |
|
"loss": 0.0021, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 188.33, |
|
"learning_rate": 0.00010415434993638269, |
|
"loss": 0.0026, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 188.67, |
|
"learning_rate": 0.00010382217092438255, |
|
"loss": 0.0023, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 189.0, |
|
"learning_rate": 0.00010348994967025012, |
|
"loss": 0.0022, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 189.33, |
|
"learning_rate": 0.0001031576898456511, |
|
"loss": 0.0022, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 189.67, |
|
"learning_rate": 0.00010282539512267757, |
|
"loss": 0.0024, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 190.0, |
|
"learning_rate": 0.0001024930691738073, |
|
"loss": 0.0028, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 190.33, |
|
"learning_rate": 0.00010216071567186312, |
|
"loss": 0.0022, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 190.67, |
|
"learning_rate": 0.00010182833828997238, |
|
"loss": 0.0027, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 191.0, |
|
"learning_rate": 0.00010149594070152638, |
|
"loss": 0.0021, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 191.33, |
|
"learning_rate": 0.00010116352658013973, |
|
"loss": 0.0024, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 191.67, |
|
"learning_rate": 0.00010083109959960973, |
|
"loss": 0.0024, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 192.0, |
|
"learning_rate": 0.00010049866343387581, |
|
"loss": 0.0025, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 192.33, |
|
"learning_rate": 0.00010016622175697898, |
|
"loss": 0.0024, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 192.67, |
|
"learning_rate": 9.983377824302106e-05, |
|
"loss": 0.0024, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 193.0, |
|
"learning_rate": 9.950133656612421e-05, |
|
"loss": 0.0022, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 193.33, |
|
"learning_rate": 9.916890040039031e-05, |
|
"loss": 0.0023, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 193.67, |
|
"learning_rate": 9.883647341986032e-05, |
|
"loss": 0.0023, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 194.0, |
|
"learning_rate": 9.850405929847366e-05, |
|
"loss": 0.0025, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 194.33, |
|
"learning_rate": 9.817166171002765e-05, |
|
"loss": 0.0023, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 194.67, |
|
"learning_rate": 9.783928432813688e-05, |
|
"loss": 0.0026, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 195.0, |
|
"learning_rate": 9.750693082619273e-05, |
|
"loss": 0.0022, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 195.33, |
|
"learning_rate": 9.717460487732245e-05, |
|
"loss": 0.0023, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 195.67, |
|
"learning_rate": 9.68423101543489e-05, |
|
"loss": 0.0025, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 196.0, |
|
"learning_rate": 9.651005032974994e-05, |
|
"loss": 0.0026, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 196.33, |
|
"learning_rate": 9.617782907561748e-05, |
|
"loss": 0.0025, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 196.67, |
|
"learning_rate": 9.584565006361734e-05, |
|
"loss": 0.0023, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 197.0, |
|
"learning_rate": 9.551351696494854e-05, |
|
"loss": 0.0024, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 197.33, |
|
"learning_rate": 9.518143345030246e-05, |
|
"loss": 0.0022, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 197.67, |
|
"learning_rate": 9.48494031898226e-05, |
|
"loss": 0.0026, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 198.0, |
|
"learning_rate": 9.451742985306398e-05, |
|
"loss": 0.0022, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 198.33, |
|
"learning_rate": 9.418551710895243e-05, |
|
"loss": 0.0021, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 198.67, |
|
"learning_rate": 9.385366862574404e-05, |
|
"loss": 0.0026, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 199.0, |
|
"learning_rate": 9.352188807098481e-05, |
|
"loss": 0.0025, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 199.33, |
|
"learning_rate": 9.319017911147e-05, |
|
"loss": 0.0024, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 199.67, |
|
"learning_rate": 9.285854541320352e-05, |
|
"loss": 0.0024, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"learning_rate": 9.252699064135758e-05, |
|
"loss": 0.0021, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 200.33, |
|
"learning_rate": 9.219551846023211e-05, |
|
"loss": 0.0022, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 200.67, |
|
"learning_rate": 9.186413253321418e-05, |
|
"loss": 0.0025, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 201.0, |
|
"learning_rate": 9.153283652273768e-05, |
|
"loss": 0.0023, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 201.33, |
|
"learning_rate": 9.120163409024271e-05, |
|
"loss": 0.0023, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 201.67, |
|
"learning_rate": 9.087052889613518e-05, |
|
"loss": 0.0023, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 202.0, |
|
"learning_rate": 9.05395245997463e-05, |
|
"loss": 0.0025, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 202.33, |
|
"learning_rate": 9.020862485929219e-05, |
|
"loss": 0.0026, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 202.67, |
|
"learning_rate": 8.987783333183344e-05, |
|
"loss": 0.0022, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 203.0, |
|
"learning_rate": 8.954715367323468e-05, |
|
"loss": 0.0023, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 203.33, |
|
"learning_rate": 8.921658953812415e-05, |
|
"loss": 0.0023, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 203.67, |
|
"learning_rate": 8.888614457985341e-05, |
|
"loss": 0.0023, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 204.0, |
|
"learning_rate": 8.855582245045683e-05, |
|
"loss": 0.0025, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 204.33, |
|
"learning_rate": 8.822562680061125e-05, |
|
"loss": 0.0023, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 204.67, |
|
"learning_rate": 8.789556127959585e-05, |
|
"loss": 0.0023, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 205.0, |
|
"learning_rate": 8.756562953525152e-05, |
|
"loss": 0.0023, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 205.33, |
|
"learning_rate": 8.723583521394054e-05, |
|
"loss": 0.0023, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 205.67, |
|
"learning_rate": 8.690618196050666e-05, |
|
"loss": 0.0024, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 206.0, |
|
"learning_rate": 8.657667341823448e-05, |
|
"loss": 0.0021, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 206.33, |
|
"learning_rate": 8.624731322880912e-05, |
|
"loss": 0.0025, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 206.67, |
|
"learning_rate": 8.591810503227635e-05, |
|
"loss": 0.0023, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 207.0, |
|
"learning_rate": 8.558905246700201e-05, |
|
"loss": 0.0021, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 207.33, |
|
"learning_rate": 8.526015916963191e-05, |
|
"loss": 0.0021, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 207.67, |
|
"learning_rate": 8.49314287750517e-05, |
|
"loss": 0.0023, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 208.0, |
|
"learning_rate": 8.460286491634663e-05, |
|
"loss": 0.0026, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 208.33, |
|
"learning_rate": 8.427447122476148e-05, |
|
"loss": 0.0024, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 208.67, |
|
"learning_rate": 8.394625132966025e-05, |
|
"loss": 0.0023, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 209.0, |
|
"learning_rate": 8.361820885848624e-05, |
|
"loss": 0.0024, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 209.33, |
|
"learning_rate": 8.329034743672187e-05, |
|
"loss": 0.0023, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 209.67, |
|
"learning_rate": 8.296267068784862e-05, |
|
"loss": 0.0022, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 210.0, |
|
"learning_rate": 8.263518223330697e-05, |
|
"loss": 0.0025, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 210.33, |
|
"learning_rate": 8.230788569245648e-05, |
|
"loss": 0.0025, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 210.67, |
|
"learning_rate": 8.198078468253557e-05, |
|
"loss": 0.0022, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 211.0, |
|
"learning_rate": 8.165388281862178e-05, |
|
"loss": 0.0023, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 211.33, |
|
"learning_rate": 8.132718371359166e-05, |
|
"loss": 0.0023, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 211.67, |
|
"learning_rate": 8.100069097808103e-05, |
|
"loss": 0.0024, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 212.0, |
|
"learning_rate": 8.067440822044469e-05, |
|
"loss": 0.0023, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 212.33, |
|
"learning_rate": 8.034833904671698e-05, |
|
"loss": 0.0024, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 212.67, |
|
"learning_rate": 8.002248706057177e-05, |
|
"loss": 0.0022, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 213.0, |
|
"learning_rate": 7.96968558632824e-05, |
|
"loss": 0.0022, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 213.33, |
|
"learning_rate": 7.937144905368226e-05, |
|
"loss": 0.002, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 213.67, |
|
"learning_rate": 7.904627022812483e-05, |
|
"loss": 0.0024, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 214.0, |
|
"learning_rate": 7.872132298044382e-05, |
|
"loss": 0.0026, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 214.33, |
|
"learning_rate": 7.839661090191362e-05, |
|
"loss": 0.0023, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 214.67, |
|
"learning_rate": 7.807213758120966e-05, |
|
"loss": 0.0023, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 215.0, |
|
"learning_rate": 7.774790660436858e-05, |
|
"loss": 0.0023, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 215.33, |
|
"learning_rate": 7.742392155474858e-05, |
|
"loss": 0.0022, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 215.67, |
|
"learning_rate": 7.710018601299004e-05, |
|
"loss": 0.0022, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 216.0, |
|
"learning_rate": 7.677670355697577e-05, |
|
"loss": 0.0025, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 216.33, |
|
"learning_rate": 7.645347776179144e-05, |
|
"loss": 0.0023, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 216.67, |
|
"learning_rate": 7.613051219968623e-05, |
|
"loss": 0.0024, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 217.0, |
|
"learning_rate": 7.580781044003324e-05, |
|
"loss": 0.0022, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 217.33, |
|
"learning_rate": 7.548537604929001e-05, |
|
"loss": 0.0026, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 217.67, |
|
"learning_rate": 7.516321259095921e-05, |
|
"loss": 0.0023, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 218.0, |
|
"learning_rate": 7.484132362554915e-05, |
|
"loss": 0.0021, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 218.33, |
|
"learning_rate": 7.451971271053455e-05, |
|
"loss": 0.0021, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 218.67, |
|
"learning_rate": 7.419838340031708e-05, |
|
"loss": 0.0026, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 219.0, |
|
"learning_rate": 7.387733924618617e-05, |
|
"loss": 0.0021, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 219.33, |
|
"learning_rate": 7.35565837962798e-05, |
|
"loss": 0.0024, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 219.67, |
|
"learning_rate": 7.323612059554513e-05, |
|
"loss": 0.0024, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 220.0, |
|
"learning_rate": 7.291595318569951e-05, |
|
"loss": 0.002, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 220.33, |
|
"learning_rate": 7.25960851051912e-05, |
|
"loss": 0.0023, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 220.67, |
|
"learning_rate": 7.227651988916031e-05, |
|
"loss": 0.0023, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 221.0, |
|
"learning_rate": 7.195726106939974e-05, |
|
"loss": 0.0022, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 221.33, |
|
"learning_rate": 7.163831217431615e-05, |
|
"loss": 0.0022, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 221.67, |
|
"learning_rate": 7.131967672889101e-05, |
|
"loss": 0.0022, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 222.0, |
|
"learning_rate": 7.100135825464139e-05, |
|
"loss": 0.0025, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 222.33, |
|
"learning_rate": 7.068336026958146e-05, |
|
"loss": 0.0021, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 222.67, |
|
"learning_rate": 7.036568628818331e-05, |
|
"loss": 0.0023, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 223.0, |
|
"learning_rate": 7.004833982133808e-05, |
|
"loss": 0.0026, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 223.33, |
|
"learning_rate": 6.973132437631742e-05, |
|
"loss": 0.0022, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 223.67, |
|
"learning_rate": 6.941464345673449e-05, |
|
"loss": 0.0023, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 224.0, |
|
"learning_rate": 6.909830056250527e-05, |
|
"loss": 0.0024, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 224.33, |
|
"learning_rate": 6.878229918981003e-05, |
|
"loss": 0.0024, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 224.67, |
|
"learning_rate": 6.846664283105455e-05, |
|
"loss": 0.0021, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 225.0, |
|
"learning_rate": 6.815133497483157e-05, |
|
"loss": 0.0022, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 225.33, |
|
"learning_rate": 6.783637910588216e-05, |
|
"loss": 0.0021, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 225.67, |
|
"learning_rate": 6.752177870505736e-05, |
|
"loss": 0.0023, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 226.0, |
|
"learning_rate": 6.720753724927958e-05, |
|
"loss": 0.0024, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 226.33, |
|
"learning_rate": 6.68936582115042e-05, |
|
"loss": 0.0021, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 226.67, |
|
"learning_rate": 6.658014506068126e-05, |
|
"loss": 0.0023, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 227.0, |
|
"learning_rate": 6.626700126171702e-05, |
|
"loss": 0.0024, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 227.33, |
|
"learning_rate": 6.595423027543571e-05, |
|
"loss": 0.0024, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 227.67, |
|
"learning_rate": 6.56418355585413e-05, |
|
"loss": 0.002, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 228.0, |
|
"learning_rate": 6.532982056357928e-05, |
|
"loss": 0.0023, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 228.33, |
|
"learning_rate": 6.501818873889855e-05, |
|
"loss": 0.0025, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 228.67, |
|
"learning_rate": 6.470694352861312e-05, |
|
"loss": 0.0022, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 229.0, |
|
"learning_rate": 6.439608837256432e-05, |
|
"loss": 0.0021, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 229.33, |
|
"learning_rate": 6.408562670628266e-05, |
|
"loss": 0.0022, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 229.67, |
|
"learning_rate": 6.377556196094973e-05, |
|
"loss": 0.0023, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 230.0, |
|
"learning_rate": 6.34658975633605e-05, |
|
"loss": 0.0022, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 230.33, |
|
"learning_rate": 6.315663693588534e-05, |
|
"loss": 0.0022, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 230.67, |
|
"learning_rate": 6.28477834964322e-05, |
|
"loss": 0.0023, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 231.0, |
|
"learning_rate": 6.25393406584088e-05, |
|
"loss": 0.0023, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 231.33, |
|
"learning_rate": 6.223131183068499e-05, |
|
"loss": 0.0022, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 231.67, |
|
"learning_rate": 6.192370041755505e-05, |
|
"loss": 0.0023, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 232.0, |
|
"learning_rate": 6.161650981869998e-05, |
|
"loss": 0.0026, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 232.33, |
|
"learning_rate": 6.130974342915005e-05, |
|
"loss": 0.0024, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 232.67, |
|
"learning_rate": 6.100340463924723e-05, |
|
"loss": 0.002, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 233.0, |
|
"learning_rate": 6.069749683460765e-05, |
|
"loss": 0.0024, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 233.33, |
|
"learning_rate": 6.039202339608432e-05, |
|
"loss": 0.0024, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 233.33, |
|
"eval_loss": 1.320059061050415, |
|
"eval_runtime": 3.5029, |
|
"eval_samples_per_second": 5.995, |
|
"eval_steps_per_second": 0.856, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 233.67, |
|
"learning_rate": 6.008698769972967e-05, |
|
"loss": 0.0023, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 234.0, |
|
"learning_rate": 5.978239311675826e-05, |
|
"loss": 0.002, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 234.33, |
|
"learning_rate": 5.9478243013509505e-05, |
|
"loss": 0.0022, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 234.67, |
|
"learning_rate": 5.9174540751410487e-05, |
|
"loss": 0.0023, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 235.0, |
|
"learning_rate": 5.887128968693887e-05, |
|
"loss": 0.0022, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 235.33, |
|
"learning_rate": 5.856849317158563e-05, |
|
"loss": 0.0023, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 235.67, |
|
"learning_rate": 5.8266154551818216e-05, |
|
"loss": 0.0021, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 236.0, |
|
"learning_rate": 5.796427716904347e-05, |
|
"loss": 0.0024, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 236.33, |
|
"learning_rate": 5.7662864359570624e-05, |
|
"loss": 0.0023, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 236.67, |
|
"learning_rate": 5.736191945457463e-05, |
|
"loss": 0.0022, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 237.0, |
|
"learning_rate": 5.7061445780059074e-05, |
|
"loss": 0.0024, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 237.33, |
|
"learning_rate": 5.676144665681974e-05, |
|
"loss": 0.002, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 237.67, |
|
"learning_rate": 5.6461925400407576e-05, |
|
"loss": 0.0023, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 238.0, |
|
"learning_rate": 5.616288532109225e-05, |
|
"loss": 0.0024, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 238.33, |
|
"learning_rate": 5.58643297238256e-05, |
|
"loss": 0.0024, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 238.67, |
|
"learning_rate": 5.5566261908204966e-05, |
|
"loss": 0.0023, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 239.0, |
|
"learning_rate": 5.526868516843673e-05, |
|
"loss": 0.002, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 239.33, |
|
"learning_rate": 5.497160279330014e-05, |
|
"loss": 0.0024, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 239.67, |
|
"learning_rate": 5.467501806611062e-05, |
|
"loss": 0.0021, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 240.0, |
|
"learning_rate": 5.43789342646837e-05, |
|
"loss": 0.0023, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 240.33, |
|
"learning_rate": 5.4083354661298814e-05, |
|
"loss": 0.0025, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 240.67, |
|
"learning_rate": 5.378828252266308e-05, |
|
"loss": 0.002, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 241.0, |
|
"learning_rate": 5.349372110987496e-05, |
|
"loss": 0.0023, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 241.33, |
|
"learning_rate": 5.3199673678388685e-05, |
|
"loss": 0.0024, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 241.67, |
|
"learning_rate": 5.290614347797802e-05, |
|
"loss": 0.0021, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 242.0, |
|
"learning_rate": 5.261313375270014e-05, |
|
"loss": 0.0022, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 242.33, |
|
"learning_rate": 5.232064774086022e-05, |
|
"loss": 0.0022, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 242.67, |
|
"learning_rate": 5.2028688674975415e-05, |
|
"loss": 0.0023, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 243.0, |
|
"learning_rate": 5.1737259781738936e-05, |
|
"loss": 0.0022, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 243.33, |
|
"learning_rate": 5.1446364281984774e-05, |
|
"loss": 0.0022, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 243.67, |
|
"learning_rate": 5.115600539065197e-05, |
|
"loss": 0.0024, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 244.0, |
|
"learning_rate": 5.086618631674888e-05, |
|
"loss": 0.0021, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 244.33, |
|
"learning_rate": 5.057691026331792e-05, |
|
"loss": 0.0023, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 244.67, |
|
"learning_rate": 5.02881804274002e-05, |
|
"loss": 0.0022, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 245.0, |
|
"learning_rate": 5.000000000000002e-05, |
|
"loss": 0.0023, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 245.33, |
|
"learning_rate": 4.971237216604967e-05, |
|
"loss": 0.0022, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 245.67, |
|
"learning_rate": 4.942530010437435e-05, |
|
"loss": 0.0023, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 246.0, |
|
"learning_rate": 4.913878698765686e-05, |
|
"loss": 0.0022, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 246.33, |
|
"learning_rate": 4.885283598240259e-05, |
|
"loss": 0.0023, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 246.67, |
|
"learning_rate": 4.856745024890466e-05, |
|
"loss": 0.0023, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 247.0, |
|
"learning_rate": 4.8282632941208725e-05, |
|
"loss": 0.0022, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 247.33, |
|
"learning_rate": 4.799838720707846e-05, |
|
"loss": 0.0021, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 247.67, |
|
"learning_rate": 4.771471618796043e-05, |
|
"loss": 0.0024, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 248.0, |
|
"learning_rate": 4.743162301894952e-05, |
|
"loss": 0.0023, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 248.33, |
|
"learning_rate": 4.7149110828754464e-05, |
|
"loss": 0.0021, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 248.67, |
|
"learning_rate": 4.686718273966291e-05, |
|
"loss": 0.0023, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 249.0, |
|
"learning_rate": 4.658584186750713e-05, |
|
"loss": 0.0023, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 249.33, |
|
"learning_rate": 4.6305091321629666e-05, |
|
"loss": 0.0021, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 249.67, |
|
"learning_rate": 4.6024934204848745e-05, |
|
"loss": 0.0024, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 250.0, |
|
"learning_rate": 4.574537361342407e-05, |
|
"loss": 0.0022, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 250.33, |
|
"learning_rate": 4.5466412637022704e-05, |
|
"loss": 0.0022, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 250.67, |
|
"learning_rate": 4.518805435868492e-05, |
|
"loss": 0.0022, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 251.0, |
|
"learning_rate": 4.491030185478976e-05, |
|
"loss": 0.0025, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 251.33, |
|
"learning_rate": 4.4633158195021594e-05, |
|
"loss": 0.0025, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 251.67, |
|
"learning_rate": 4.435662644233594e-05, |
|
"loss": 0.0021, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 252.0, |
|
"learning_rate": 4.4080709652925336e-05, |
|
"loss": 0.0021, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 252.33, |
|
"learning_rate": 4.380541087618606e-05, |
|
"loss": 0.0022, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 252.67, |
|
"learning_rate": 4.3530733154684164e-05, |
|
"loss": 0.0024, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 253.0, |
|
"learning_rate": 4.3256679524121834e-05, |
|
"loss": 0.0021, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 253.33, |
|
"learning_rate": 4.298325301330383e-05, |
|
"loss": 0.0024, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 253.67, |
|
"learning_rate": 4.27104566441042e-05, |
|
"loss": 0.0021, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 254.0, |
|
"learning_rate": 4.2438293431432665e-05, |
|
"loss": 0.0022, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 254.33, |
|
"learning_rate": 4.216676638320135e-05, |
|
"loss": 0.0022, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 254.67, |
|
"learning_rate": 4.189587850029169e-05, |
|
"loss": 0.0023, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 255.0, |
|
"learning_rate": 4.1625632776521037e-05, |
|
"loss": 0.0023, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 255.33, |
|
"learning_rate": 4.1356032198609706e-05, |
|
"loss": 0.0023, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 255.67, |
|
"learning_rate": 4.108707974614804e-05, |
|
"loss": 0.0022, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 256.0, |
|
"learning_rate": 4.081877839156325e-05, |
|
"loss": 0.0021, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 256.33, |
|
"learning_rate": 4.0551131100086745e-05, |
|
"loss": 0.0021, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 256.67, |
|
"learning_rate": 4.028414082972141e-05, |
|
"loss": 0.0023, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 257.0, |
|
"learning_rate": 4.001781053120863e-05, |
|
"loss": 0.0021, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 257.33, |
|
"learning_rate": 3.975214314799607e-05, |
|
"loss": 0.0021, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 257.67, |
|
"learning_rate": 3.94871416162048e-05, |
|
"loss": 0.0024, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 258.0, |
|
"learning_rate": 3.9222808864597004e-05, |
|
"loss": 0.002, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 258.33, |
|
"learning_rate": 3.89591478145437e-05, |
|
"loss": 0.002, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 258.67, |
|
"learning_rate": 3.8696161379992225e-05, |
|
"loss": 0.0022, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 259.0, |
|
"learning_rate": 3.843385246743417e-05, |
|
"loss": 0.0025, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 259.33, |
|
"learning_rate": 3.817222397587336e-05, |
|
"loss": 0.0022, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 259.67, |
|
"learning_rate": 3.7911278796793516e-05, |
|
"loss": 0.0022, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 260.0, |
|
"learning_rate": 3.7651019814126654e-05, |
|
"loss": 0.0023, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 260.33, |
|
"learning_rate": 3.739144990422089e-05, |
|
"loss": 0.0021, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 260.67, |
|
"learning_rate": 3.7132571935808924e-05, |
|
"loss": 0.0024, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 261.0, |
|
"learning_rate": 3.687438876997612e-05, |
|
"loss": 0.0022, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 261.33, |
|
"learning_rate": 3.661690326012897e-05, |
|
"loss": 0.0021, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 261.67, |
|
"learning_rate": 3.6360118251963645e-05, |
|
"loss": 0.0022, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 262.0, |
|
"learning_rate": 3.610403658343443e-05, |
|
"loss": 0.0024, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 262.33, |
|
"learning_rate": 3.58486610847223e-05, |
|
"loss": 0.002, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 262.67, |
|
"learning_rate": 3.5593994578203896e-05, |
|
"loss": 0.0023, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 263.0, |
|
"learning_rate": 3.534003987842005e-05, |
|
"loss": 0.0024, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 263.33, |
|
"learning_rate": 3.508679979204481e-05, |
|
"loss": 0.0023, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 263.67, |
|
"learning_rate": 3.483427711785449e-05, |
|
"loss": 0.002, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 264.0, |
|
"learning_rate": 3.458247464669657e-05, |
|
"loss": 0.0027, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 264.33, |
|
"learning_rate": 3.4331395161458955e-05, |
|
"loss": 0.0023, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 264.67, |
|
"learning_rate": 3.408104143703929e-05, |
|
"loss": 0.0021, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 265.0, |
|
"learning_rate": 3.383141624031408e-05, |
|
"loss": 0.0022, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 265.33, |
|
"learning_rate": 3.35825223301083e-05, |
|
"loss": 0.0023, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 265.67, |
|
"learning_rate": 3.333436245716488e-05, |
|
"loss": 0.0023, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 266.0, |
|
"learning_rate": 3.308693936411421e-05, |
|
"loss": 0.0021, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 266.33, |
|
"learning_rate": 3.2840255785443855e-05, |
|
"loss": 0.0023, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 266.67, |
|
"learning_rate": 3.259431444746846e-05, |
|
"loss": 0.0022, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 267.0, |
|
"learning_rate": 3.234911806829948e-05, |
|
"loss": 0.0022, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 267.33, |
|
"learning_rate": 3.210466935781516e-05, |
|
"loss": 0.0024, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 267.67, |
|
"learning_rate": 3.1860971017630604e-05, |
|
"loss": 0.0023, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 268.0, |
|
"learning_rate": 3.161802574106799e-05, |
|
"loss": 0.002, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 268.33, |
|
"learning_rate": 3.137583621312665e-05, |
|
"loss": 0.0023, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 268.67, |
|
"learning_rate": 3.1134405110453515e-05, |
|
"loss": 0.0022, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 269.0, |
|
"learning_rate": 3.089373510131354e-05, |
|
"loss": 0.0021, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 269.33, |
|
"learning_rate": 3.065382884556012e-05, |
|
"loss": 0.0024, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 269.67, |
|
"learning_rate": 3.0414688994605723e-05, |
|
"loss": 0.0023, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 270.0, |
|
"learning_rate": 3.0176318191392726e-05, |
|
"loss": 0.0019, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 270.33, |
|
"learning_rate": 2.9938719070363952e-05, |
|
"loss": 0.0024, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 270.67, |
|
"learning_rate": 2.9701894257433826e-05, |
|
"loss": 0.0022, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 271.0, |
|
"learning_rate": 2.9465846369959127e-05, |
|
"loss": 0.0022, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 271.33, |
|
"learning_rate": 2.923057801671015e-05, |
|
"loss": 0.0022, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 271.67, |
|
"learning_rate": 2.8996091797841973e-05, |
|
"loss": 0.0021, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 272.0, |
|
"learning_rate": 2.876239030486554e-05, |
|
"loss": 0.0023, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 272.33, |
|
"learning_rate": 2.8529476120619104e-05, |
|
"loss": 0.0023, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 272.67, |
|
"learning_rate": 2.829735181923978e-05, |
|
"loss": 0.0022, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 273.0, |
|
"learning_rate": 2.8066019966134904e-05, |
|
"loss": 0.0022, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 273.33, |
|
"learning_rate": 2.7835483117953788e-05, |
|
"loss": 0.0019, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 273.67, |
|
"learning_rate": 2.7605743822559506e-05, |
|
"loss": 0.0024, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 274.0, |
|
"learning_rate": 2.7376804619000707e-05, |
|
"loss": 0.0024, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 274.33, |
|
"learning_rate": 2.7148668037483372e-05, |
|
"loss": 0.0021, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 274.67, |
|
"learning_rate": 2.692133659934315e-05, |
|
"loss": 0.0024, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 275.0, |
|
"learning_rate": 2.669481281701739e-05, |
|
"loss": 0.0023, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 275.33, |
|
"learning_rate": 2.6469099194017143e-05, |
|
"loss": 0.0021, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 275.67, |
|
"learning_rate": 2.624419822489985e-05, |
|
"loss": 0.0022, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 276.0, |
|
"learning_rate": 2.6020112395241624e-05, |
|
"loss": 0.0023, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 276.33, |
|
"learning_rate": 2.579684418160958e-05, |
|
"loss": 0.0022, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 276.67, |
|
"learning_rate": 2.5574396051534832e-05, |
|
"loss": 0.0023, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 277.0, |
|
"learning_rate": 2.5352770463484987e-05, |
|
"loss": 0.0022, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 277.33, |
|
"learning_rate": 2.5131969866836992e-05, |
|
"loss": 0.0022, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 277.67, |
|
"learning_rate": 2.491199670185008e-05, |
|
"loss": 0.0024, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 278.0, |
|
"learning_rate": 2.4692853399638917e-05, |
|
"loss": 0.0021, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 278.33, |
|
"learning_rate": 2.4474542382146537e-05, |
|
"loss": 0.0021, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 278.67, |
|
"learning_rate": 2.425706606211767e-05, |
|
"loss": 0.0024, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 279.0, |
|
"learning_rate": 2.4040426843072206e-05, |
|
"loss": 0.0024, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 279.33, |
|
"learning_rate": 2.3824627119278342e-05, |
|
"loss": 0.0022, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 279.67, |
|
"learning_rate": 2.3609669275726355e-05, |
|
"loss": 0.0021, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 280.0, |
|
"learning_rate": 2.339555568810221e-05, |
|
"loss": 0.0024, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 280.33, |
|
"learning_rate": 2.318228872276118e-05, |
|
"loss": 0.0021, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 280.67, |
|
"learning_rate": 2.2969870736701895e-05, |
|
"loss": 0.0021, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 281.0, |
|
"learning_rate": 2.275830407754006e-05, |
|
"loss": 0.0023, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 281.33, |
|
"learning_rate": 2.2547591083482665e-05, |
|
"loss": 0.0023, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 281.67, |
|
"learning_rate": 2.2337734083302164e-05, |
|
"loss": 0.0023, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 282.0, |
|
"learning_rate": 2.212873539631061e-05, |
|
"loss": 0.002, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 282.33, |
|
"learning_rate": 2.192059733233408e-05, |
|
"loss": 0.0024, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 282.67, |
|
"learning_rate": 2.1713322191687237e-05, |
|
"loss": 0.0023, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 283.0, |
|
"learning_rate": 2.1506912265147772e-05, |
|
"loss": 0.0019, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 283.33, |
|
"learning_rate": 2.1301369833931117e-05, |
|
"loss": 0.0024, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 283.67, |
|
"learning_rate": 2.1096697169665313e-05, |
|
"loss": 0.0022, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 284.0, |
|
"learning_rate": 2.0892896534365904e-05, |
|
"loss": 0.0019, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 284.33, |
|
"learning_rate": 2.068997018041069e-05, |
|
"loss": 0.0022, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 284.67, |
|
"learning_rate": 2.0487920350515212e-05, |
|
"loss": 0.0021, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 285.0, |
|
"learning_rate": 2.0286749277707782e-05, |
|
"loss": 0.0024, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 285.33, |
|
"learning_rate": 2.0086459185304618e-05, |
|
"loss": 0.0021, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 285.67, |
|
"learning_rate": 1.9887052286885655e-05, |
|
"loss": 0.0022, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 286.0, |
|
"learning_rate": 1.9688530786269855e-05, |
|
"loss": 0.0023, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 286.33, |
|
"learning_rate": 1.9490896877490716e-05, |
|
"loss": 0.0022, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 286.67, |
|
"learning_rate": 1.929415274477239e-05, |
|
"loss": 0.0024, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 287.0, |
|
"learning_rate": 1.9098300562505266e-05, |
|
"loss": 0.0021, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 287.33, |
|
"learning_rate": 1.8903342495221977e-05, |
|
"loss": 0.0022, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 287.67, |
|
"learning_rate": 1.870928069757353e-05, |
|
"loss": 0.0023, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 288.0, |
|
"learning_rate": 1.8516117314305524e-05, |
|
"loss": 0.0021, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 288.33, |
|
"learning_rate": 1.832385448023435e-05, |
|
"loss": 0.0022, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 288.67, |
|
"learning_rate": 1.8132494320223638e-05, |
|
"loss": 0.0021, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 289.0, |
|
"learning_rate": 1.7942038949160854e-05, |
|
"loss": 0.0024, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 289.33, |
|
"learning_rate": 1.775249047193377e-05, |
|
"loss": 0.0023, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 289.67, |
|
"learning_rate": 1.756385098340736e-05, |
|
"loss": 0.002, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 290.0, |
|
"learning_rate": 1.7376122568400532e-05, |
|
"loss": 0.0024, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 290.33, |
|
"learning_rate": 1.7189307301663084e-05, |
|
"loss": 0.0021, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 290.67, |
|
"learning_rate": 1.7003407247852943e-05, |
|
"loss": 0.0022, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 291.0, |
|
"learning_rate": 1.681842446151313e-05, |
|
"loss": 0.0022, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 291.33, |
|
"learning_rate": 1.6634360987049115e-05, |
|
"loss": 0.002, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 291.67, |
|
"learning_rate": 1.6451218858706374e-05, |
|
"loss": 0.0022, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 292.0, |
|
"learning_rate": 1.6269000100547683e-05, |
|
"loss": 0.0024, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 292.33, |
|
"learning_rate": 1.6087706726430873e-05, |
|
"loss": 0.0021, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 292.67, |
|
"learning_rate": 1.5907340739986575e-05, |
|
"loss": 0.0022, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 293.0, |
|
"learning_rate": 1.5727904134596083e-05, |
|
"loss": 0.0024, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 293.33, |
|
"learning_rate": 1.5549398893369216e-05, |
|
"loss": 0.0025, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 293.67, |
|
"learning_rate": 1.5371826989122506e-05, |
|
"loss": 0.002, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 294.0, |
|
"learning_rate": 1.5195190384357404e-05, |
|
"loss": 0.0021, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 294.33, |
|
"learning_rate": 1.501949103123852e-05, |
|
"loss": 0.0021, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 294.67, |
|
"learning_rate": 1.4844730871572043e-05, |
|
"loss": 0.0024, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 295.0, |
|
"learning_rate": 1.467091183678444e-05, |
|
"loss": 0.0023, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 295.33, |
|
"learning_rate": 1.449803584790086e-05, |
|
"loss": 0.0022, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 295.67, |
|
"learning_rate": 1.4326104815524088e-05, |
|
"loss": 0.0022, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 296.0, |
|
"learning_rate": 1.415512063981339e-05, |
|
"loss": 0.0022, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 296.33, |
|
"learning_rate": 1.3985085210463477e-05, |
|
"loss": 0.0023, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 296.67, |
|
"learning_rate": 1.3816000406683604e-05, |
|
"loss": 0.0023, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 297.0, |
|
"learning_rate": 1.364786809717692e-05, |
|
"loss": 0.0019, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 297.33, |
|
"learning_rate": 1.3480690140119657e-05, |
|
"loss": 0.0022, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 297.67, |
|
"learning_rate": 1.3314468383140688e-05, |
|
"loss": 0.0023, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 298.0, |
|
"learning_rate": 1.3149204663301118e-05, |
|
"loss": 0.0021, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 298.33, |
|
"learning_rate": 1.2984900807073919e-05, |
|
"loss": 0.002, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 298.67, |
|
"learning_rate": 1.2821558630323772e-05, |
|
"loss": 0.0021, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 299.0, |
|
"learning_rate": 1.2659179938287035e-05, |
|
"loss": 0.0026, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 299.33, |
|
"learning_rate": 1.2497766525551724e-05, |
|
"loss": 0.0023, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 299.67, |
|
"learning_rate": 1.2337320176037759e-05, |
|
"loss": 0.0023, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 300.0, |
|
"learning_rate": 1.2177842662977135e-05, |
|
"loss": 0.002, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 300.33, |
|
"learning_rate": 1.201933574889449e-05, |
|
"loss": 0.0022, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 300.67, |
|
"learning_rate": 1.186180118558743e-05, |
|
"loss": 0.0024, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 301.0, |
|
"learning_rate": 1.1705240714107302e-05, |
|
"loss": 0.0019, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 301.33, |
|
"learning_rate": 1.1549656064739967e-05, |
|
"loss": 0.0024, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 301.67, |
|
"learning_rate": 1.1395048956986575e-05, |
|
"loss": 0.0019, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 302.0, |
|
"learning_rate": 1.124142109954459e-05, |
|
"loss": 0.0025, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 302.33, |
|
"learning_rate": 1.108877419028902e-05, |
|
"loss": 0.0024, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 302.67, |
|
"learning_rate": 1.0937109916253474e-05, |
|
"loss": 0.0021, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 303.0, |
|
"learning_rate": 1.0786429953611666e-05, |
|
"loss": 0.0024, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 303.33, |
|
"learning_rate": 1.0636735967658784e-05, |
|
"loss": 0.0021, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 303.67, |
|
"learning_rate": 1.0488029612793138e-05, |
|
"loss": 0.0022, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 304.0, |
|
"learning_rate": 1.034031253249792e-05, |
|
"loss": 0.0023, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 304.33, |
|
"learning_rate": 1.0193586359322927e-05, |
|
"loss": 0.002, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 304.67, |
|
"learning_rate": 1.004785271486659e-05, |
|
"loss": 0.0025, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 305.0, |
|
"learning_rate": 9.903113209758096e-06, |
|
"loss": 0.0021, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 305.33, |
|
"learning_rate": 9.759369443639454e-06, |
|
"loss": 0.0024, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 305.67, |
|
"learning_rate": 9.616623005147951e-06, |
|
"loss": 0.0021, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 306.0, |
|
"learning_rate": 9.474875471898526e-06, |
|
"loss": 0.0021, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 306.33, |
|
"learning_rate": 9.334128410466358e-06, |
|
"loss": 0.0022, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 306.67, |
|
"learning_rate": 9.194383376369508e-06, |
|
"loss": 0.0022, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 307.0, |
|
"learning_rate": 9.055641914051782e-06, |
|
"loss": 0.002, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 307.33, |
|
"learning_rate": 8.917905556865713e-06, |
|
"loss": 0.0022, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 307.67, |
|
"learning_rate": 8.781175827055389e-06, |
|
"loss": 0.0023, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 308.0, |
|
"learning_rate": 8.645454235739903e-06, |
|
"loss": 0.0022, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 308.33, |
|
"learning_rate": 8.510742282896544e-06, |
|
"loss": 0.0021, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 308.67, |
|
"learning_rate": 8.377041457344103e-06, |
|
"loss": 0.0023, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 309.0, |
|
"learning_rate": 8.24435323672661e-06, |
|
"loss": 0.0025, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 309.33, |
|
"learning_rate": 8.112679087496933e-06, |
|
"loss": 0.0021, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 309.67, |
|
"learning_rate": 7.982020464900486e-06, |
|
"loss": 0.0022, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 310.0, |
|
"learning_rate": 7.852378812959227e-06, |
|
"loss": 0.0025, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 310.33, |
|
"learning_rate": 7.72375556445577e-06, |
|
"loss": 0.0021, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 310.67, |
|
"learning_rate": 7.596152140917368e-06, |
|
"loss": 0.0023, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 311.0, |
|
"learning_rate": 7.46956995260033e-06, |
|
"loss": 0.0022, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 311.33, |
|
"learning_rate": 7.344010398474455e-06, |
|
"loss": 0.0021, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 311.67, |
|
"learning_rate": 7.219474866207465e-06, |
|
"loss": 0.0023, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 312.0, |
|
"learning_rate": 7.09596473214974e-06, |
|
"loss": 0.0023, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 312.33, |
|
"learning_rate": 6.973481361319123e-06, |
|
"loss": 0.0023, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 312.67, |
|
"learning_rate": 6.852026107385756e-06, |
|
"loss": 0.0022, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 313.0, |
|
"learning_rate": 6.731600312657238e-06, |
|
"loss": 0.0021, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 313.33, |
|
"learning_rate": 6.612205308063646e-06, |
|
"loss": 0.0024, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 313.67, |
|
"learning_rate": 6.493842413142914e-06, |
|
"loss": 0.0021, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 314.0, |
|
"learning_rate": 6.37651293602628e-06, |
|
"loss": 0.0021, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 314.33, |
|
"learning_rate": 6.260218173423749e-06, |
|
"loss": 0.0021, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 314.67, |
|
"learning_rate": 6.144959410609785e-06, |
|
"loss": 0.0021, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 315.0, |
|
"learning_rate": 6.030737921409169e-06, |
|
"loss": 0.0025, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 315.33, |
|
"learning_rate": 5.917554968182803e-06, |
|
"loss": 0.0022, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 315.67, |
|
"learning_rate": 5.805411801813865e-06, |
|
"loss": 0.0022, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 316.0, |
|
"learning_rate": 5.694309661693942e-06, |
|
"loss": 0.0024, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 316.33, |
|
"learning_rate": 5.584249775709371e-06, |
|
"loss": 0.0022, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 316.67, |
|
"learning_rate": 5.475233360227516e-06, |
|
"loss": 0.0022, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 317.0, |
|
"learning_rate": 5.367261620083575e-06, |
|
"loss": 0.0021, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 317.33, |
|
"learning_rate": 5.26033574856708e-06, |
|
"loss": 0.0022, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 317.67, |
|
"learning_rate": 5.1544569274087125e-06, |
|
"loss": 0.0024, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 318.0, |
|
"learning_rate": 5.049626326767365e-06, |
|
"loss": 0.002, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 318.33, |
|
"learning_rate": 4.945845105217117e-06, |
|
"loss": 0.0023, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 318.67, |
|
"learning_rate": 4.843114409734384e-06, |
|
"loss": 0.0021, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 319.0, |
|
"learning_rate": 4.741435375685377e-06, |
|
"loss": 0.0024, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 319.33, |
|
"learning_rate": 4.640809126813484e-06, |
|
"loss": 0.0022, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 319.67, |
|
"learning_rate": 4.541236775226809e-06, |
|
"loss": 0.0025, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 320.0, |
|
"learning_rate": 4.442719421385922e-06, |
|
"loss": 0.002, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 320.33, |
|
"learning_rate": 4.3452581540917465e-06, |
|
"loss": 0.0021, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 320.67, |
|
"learning_rate": 4.248854050473405e-06, |
|
"loss": 0.0021, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 321.0, |
|
"learning_rate": 4.153508175976428e-06, |
|
"loss": 0.0024, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 321.33, |
|
"learning_rate": 4.059221584350958e-06, |
|
"loss": 0.002, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 321.67, |
|
"learning_rate": 3.965995317640025e-06, |
|
"loss": 0.0021, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 322.0, |
|
"learning_rate": 3.873830406168111e-06, |
|
"loss": 0.0025, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 322.33, |
|
"learning_rate": 3.7827278685297785e-06, |
|
"loss": 0.0021, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 322.67, |
|
"learning_rate": 3.692688711578296e-06, |
|
"loss": 0.0025, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 323.0, |
|
"learning_rate": 3.6037139304146762e-06, |
|
"loss": 0.0019, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 323.33, |
|
"learning_rate": 3.515804508376508e-06, |
|
"loss": 0.0024, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 323.67, |
|
"learning_rate": 3.428961417027221e-06, |
|
"loss": 0.0021, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 324.0, |
|
"learning_rate": 3.3431856161452835e-06, |
|
"loss": 0.0022, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 324.33, |
|
"learning_rate": 3.2584780537136207e-06, |
|
"loss": 0.0021, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 324.67, |
|
"learning_rate": 3.1748396659090797e-06, |
|
"loss": 0.0023, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 325.0, |
|
"learning_rate": 3.092271377092215e-06, |
|
"loss": 0.0022, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 325.33, |
|
"learning_rate": 3.010774099796898e-06, |
|
"loss": 0.0022, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 325.67, |
|
"learning_rate": 2.9303487347203783e-06, |
|
"loss": 0.0022, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 326.0, |
|
"learning_rate": 2.8509961707132494e-06, |
|
"loss": 0.0022, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 326.33, |
|
"learning_rate": 2.772717284769677e-06, |
|
"loss": 0.0023, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 326.67, |
|
"learning_rate": 2.6955129420176196e-06, |
|
"loss": 0.0022, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 327.0, |
|
"learning_rate": 2.619383995709368e-06, |
|
"loss": 0.0022, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 327.33, |
|
"learning_rate": 2.5443312872120763e-06, |
|
"loss": 0.0025, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 327.67, |
|
"learning_rate": 2.4703556459984456e-06, |
|
"loss": 0.002, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 328.0, |
|
"learning_rate": 2.3974578896375553e-06, |
|
"loss": 0.0021, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 328.33, |
|
"learning_rate": 2.3256388237858807e-06, |
|
"loss": 0.0021, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 328.67, |
|
"learning_rate": 2.25489924217831e-06, |
|
"loss": 0.0024, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 329.0, |
|
"learning_rate": 2.1852399266194314e-06, |
|
"loss": 0.0022, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 329.33, |
|
"learning_rate": 2.1166616469749044e-06, |
|
"loss": 0.002, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 329.67, |
|
"learning_rate": 2.049165161162858e-06, |
|
"loss": 0.0022, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 330.0, |
|
"learning_rate": 1.9827512151456173e-06, |
|
"loss": 0.0025, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 330.33, |
|
"learning_rate": 1.917420542921433e-06, |
|
"loss": 0.0021, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 330.67, |
|
"learning_rate": 1.8531738665163112e-06, |
|
"loss": 0.0024, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 331.0, |
|
"learning_rate": 1.790011895976118e-06, |
|
"loss": 0.002, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 331.33, |
|
"learning_rate": 1.7279353293586765e-06, |
|
"loss": 0.0025, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 331.67, |
|
"learning_rate": 1.66694485272606e-06, |
|
"loss": 0.0018, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 332.0, |
|
"learning_rate": 1.6070411401370334e-06, |
|
"loss": 0.0025, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 332.33, |
|
"learning_rate": 1.5482248536395905e-06, |
|
"loss": 0.0024, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 332.67, |
|
"learning_rate": 1.4904966432635947e-06, |
|
"loss": 0.0023, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 333.0, |
|
"learning_rate": 1.4338571470137063e-06, |
|
"loss": 0.0018, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 333.33, |
|
"learning_rate": 1.378306990862177e-06, |
|
"loss": 0.0023, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 333.67, |
|
"learning_rate": 1.323846788742078e-06, |
|
"loss": 0.0022, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 334.0, |
|
"learning_rate": 1.2704771425404382e-06, |
|
"loss": 0.0021, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 334.33, |
|
"learning_rate": 1.2181986420915615e-06, |
|
"loss": 0.0021, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 334.67, |
|
"learning_rate": 1.1670118651706197e-06, |
|
"loss": 0.0024, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 335.0, |
|
"learning_rate": 1.1169173774871478e-06, |
|
"loss": 0.0024, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 335.33, |
|
"learning_rate": 1.067915732678859e-06, |
|
"loss": 0.0021, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 335.67, |
|
"learning_rate": 1.0200074723055398e-06, |
|
"loss": 0.0024, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 336.0, |
|
"learning_rate": 9.731931258429638e-07, |
|
"loss": 0.0022, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 336.33, |
|
"learning_rate": 9.274732106771988e-07, |
|
"loss": 0.0021, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 336.67, |
|
"learning_rate": 8.828482320987319e-07, |
|
"loss": 0.0023, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 337.0, |
|
"learning_rate": 8.393186832969746e-07, |
|
"loss": 0.0021, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 337.33, |
|
"learning_rate": 7.968850453548226e-07, |
|
"loss": 0.0022, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 337.67, |
|
"learning_rate": 7.555477872432715e-07, |
|
"loss": 0.002, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 338.0, |
|
"learning_rate": 7.153073658162646e-07, |
|
"loss": 0.0024, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 338.33, |
|
"learning_rate": 6.761642258056978e-07, |
|
"loss": 0.002, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 338.67, |
|
"learning_rate": 6.381187998164229e-07, |
|
"loss": 0.0022, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 339.0, |
|
"learning_rate": 6.011715083214741e-07, |
|
"loss": 0.0024, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 339.33, |
|
"learning_rate": 5.653227596575161e-07, |
|
"loss": 0.002, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 339.67, |
|
"learning_rate": 5.305729500201917e-07, |
|
"loss": 0.0019, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 340.0, |
|
"learning_rate": 4.969224634598591e-07, |
|
"loss": 0.0028, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 340.33, |
|
"learning_rate": 4.6437167187728393e-07, |
|
"loss": 0.0023, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 340.67, |
|
"learning_rate": 4.329209350195651e-07, |
|
"loss": 0.0022, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 341.0, |
|
"learning_rate": 4.025706004760932e-07, |
|
"loss": 0.0021, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 341.33, |
|
"learning_rate": 3.7332100367482024e-07, |
|
"loss": 0.0022, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 341.67, |
|
"learning_rate": 3.451724678784518e-07, |
|
"loss": 0.0023, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 342.0, |
|
"learning_rate": 3.1812530418090513e-07, |
|
"loss": 0.0021, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 342.33, |
|
"learning_rate": 2.921798115039009e-07, |
|
"loss": 0.0018, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 342.67, |
|
"learning_rate": 2.673362765936327e-07, |
|
"loss": 0.0023, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 343.0, |
|
"learning_rate": 2.4359497401758024e-07, |
|
"loss": 0.0025, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 343.33, |
|
"learning_rate": 2.2095616616150115e-07, |
|
"loss": 0.0021, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 343.67, |
|
"learning_rate": 1.9942010322655524e-07, |
|
"loss": 0.0022, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 344.0, |
|
"learning_rate": 1.7898702322648453e-07, |
|
"loss": 0.0024, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 344.33, |
|
"learning_rate": 1.596571519850043e-07, |
|
"loss": 0.0023, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 344.67, |
|
"learning_rate": 1.414307031333273e-07, |
|
"loss": 0.0023, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 345.0, |
|
"learning_rate": 1.2430787810776555e-07, |
|
"loss": 0.0021, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 345.33, |
|
"learning_rate": 1.0828886614754341e-07, |
|
"loss": 0.0024, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 345.67, |
|
"learning_rate": 9.337384429269901e-08, |
|
"loss": 0.0022, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 346.0, |
|
"learning_rate": 7.956297738207497e-08, |
|
"loss": 0.0019, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 346.33, |
|
"learning_rate": 6.685641805158627e-08, |
|
"loss": 0.0023, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 346.67, |
|
"learning_rate": 5.5254306732444025e-08, |
|
"loss": 0.0022, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 347.0, |
|
"learning_rate": 4.475677164966774e-08, |
|
"loss": 0.0022, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 347.33, |
|
"learning_rate": 3.536392882064199e-08, |
|
"loss": 0.0022, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 347.67, |
|
"learning_rate": 2.7075882053828605e-08, |
|
"loss": 0.0022, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 348.0, |
|
"learning_rate": 1.9892722947645326e-08, |
|
"loss": 0.0022, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 348.33, |
|
"learning_rate": 1.3814530889433296e-08, |
|
"loss": 0.0021, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 348.67, |
|
"learning_rate": 8.841373054546686e-09, |
|
"loss": 0.0023, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 349.0, |
|
"learning_rate": 4.973304405697654e-09, |
|
"loss": 0.0022, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 349.33, |
|
"learning_rate": 2.2103676922680117e-09, |
|
"loss": 0.0024, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 349.67, |
|
"learning_rate": 5.525934498651352e-10, |
|
"loss": 0.0021, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 350.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.0021, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 350.0, |
|
"eval_loss": 1.3223165273666382, |
|
"eval_runtime": 3.5062, |
|
"eval_samples_per_second": 5.989, |
|
"eval_steps_per_second": 0.856, |
|
"step": 1050 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1050, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 350, |
|
"save_steps": 350, |
|
"total_flos": 1.277394092556288e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|