rationale-2-7b-jurnal / trainer_state.json
bagusatmaja's picture
Upload 11 files
021f4b4
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 350.0,
"eval_steps": 350,
"global_step": 1050,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.33,
"learning_rate": 1.9047619047619051e-06,
"loss": 1.9355,
"step": 1
},
{
"epoch": 0.67,
"learning_rate": 3.8095238095238102e-06,
"loss": 1.977,
"step": 2
},
{
"epoch": 1.0,
"learning_rate": 5.7142857142857145e-06,
"loss": 1.909,
"step": 3
},
{
"epoch": 1.33,
"learning_rate": 7.6190476190476205e-06,
"loss": 1.9514,
"step": 4
},
{
"epoch": 1.67,
"learning_rate": 9.523809523809523e-06,
"loss": 1.945,
"step": 5
},
{
"epoch": 2.0,
"learning_rate": 1.1428571428571429e-05,
"loss": 1.9343,
"step": 6
},
{
"epoch": 2.33,
"learning_rate": 1.3333333333333333e-05,
"loss": 1.923,
"step": 7
},
{
"epoch": 2.67,
"learning_rate": 1.5238095238095241e-05,
"loss": 1.9339,
"step": 8
},
{
"epoch": 3.0,
"learning_rate": 1.7142857142857145e-05,
"loss": 1.9657,
"step": 9
},
{
"epoch": 3.33,
"learning_rate": 1.9047619047619046e-05,
"loss": 1.9215,
"step": 10
},
{
"epoch": 3.67,
"learning_rate": 2.0952380952380954e-05,
"loss": 1.9188,
"step": 11
},
{
"epoch": 4.0,
"learning_rate": 2.2857142857142858e-05,
"loss": 1.9504,
"step": 12
},
{
"epoch": 4.33,
"learning_rate": 2.4761904761904762e-05,
"loss": 1.9009,
"step": 13
},
{
"epoch": 4.67,
"learning_rate": 2.6666666666666667e-05,
"loss": 1.9399,
"step": 14
},
{
"epoch": 5.0,
"learning_rate": 2.857142857142857e-05,
"loss": 1.902,
"step": 15
},
{
"epoch": 5.33,
"learning_rate": 3.0476190476190482e-05,
"loss": 1.8687,
"step": 16
},
{
"epoch": 5.67,
"learning_rate": 3.2380952380952386e-05,
"loss": 1.8792,
"step": 17
},
{
"epoch": 6.0,
"learning_rate": 3.428571428571429e-05,
"loss": 1.9199,
"step": 18
},
{
"epoch": 6.33,
"learning_rate": 3.619047619047619e-05,
"loss": 1.8893,
"step": 19
},
{
"epoch": 6.67,
"learning_rate": 3.809523809523809e-05,
"loss": 1.7973,
"step": 20
},
{
"epoch": 7.0,
"learning_rate": 4e-05,
"loss": 1.8354,
"step": 21
},
{
"epoch": 7.33,
"learning_rate": 4.190476190476191e-05,
"loss": 1.8192,
"step": 22
},
{
"epoch": 7.67,
"learning_rate": 4.380952380952381e-05,
"loss": 1.7685,
"step": 23
},
{
"epoch": 8.0,
"learning_rate": 4.5714285714285716e-05,
"loss": 1.7412,
"step": 24
},
{
"epoch": 8.33,
"learning_rate": 4.761904761904762e-05,
"loss": 1.7596,
"step": 25
},
{
"epoch": 8.67,
"learning_rate": 4.9523809523809525e-05,
"loss": 1.6698,
"step": 26
},
{
"epoch": 9.0,
"learning_rate": 5.142857142857143e-05,
"loss": 1.6695,
"step": 27
},
{
"epoch": 9.33,
"learning_rate": 5.333333333333333e-05,
"loss": 1.6478,
"step": 28
},
{
"epoch": 9.67,
"learning_rate": 5.5238095238095244e-05,
"loss": 1.6236,
"step": 29
},
{
"epoch": 10.0,
"learning_rate": 5.714285714285714e-05,
"loss": 1.6301,
"step": 30
},
{
"epoch": 10.33,
"learning_rate": 5.904761904761905e-05,
"loss": 1.5615,
"step": 31
},
{
"epoch": 10.67,
"learning_rate": 6.0952380952380964e-05,
"loss": 1.5969,
"step": 32
},
{
"epoch": 11.0,
"learning_rate": 6.285714285714286e-05,
"loss": 1.5722,
"step": 33
},
{
"epoch": 11.33,
"learning_rate": 6.476190476190477e-05,
"loss": 1.5809,
"step": 34
},
{
"epoch": 11.67,
"learning_rate": 6.666666666666667e-05,
"loss": 1.4969,
"step": 35
},
{
"epoch": 12.0,
"learning_rate": 6.857142857142858e-05,
"loss": 1.5034,
"step": 36
},
{
"epoch": 12.33,
"learning_rate": 7.047619047619048e-05,
"loss": 1.4814,
"step": 37
},
{
"epoch": 12.67,
"learning_rate": 7.238095238095238e-05,
"loss": 1.4822,
"step": 38
},
{
"epoch": 13.0,
"learning_rate": 7.428571428571429e-05,
"loss": 1.4758,
"step": 39
},
{
"epoch": 13.33,
"learning_rate": 7.619047619047618e-05,
"loss": 1.4583,
"step": 40
},
{
"epoch": 13.67,
"learning_rate": 7.80952380952381e-05,
"loss": 1.4306,
"step": 41
},
{
"epoch": 14.0,
"learning_rate": 8e-05,
"loss": 1.4247,
"step": 42
},
{
"epoch": 14.33,
"learning_rate": 8.19047619047619e-05,
"loss": 1.3784,
"step": 43
},
{
"epoch": 14.67,
"learning_rate": 8.380952380952382e-05,
"loss": 1.4326,
"step": 44
},
{
"epoch": 15.0,
"learning_rate": 8.571428571428571e-05,
"loss": 1.3538,
"step": 45
},
{
"epoch": 15.33,
"learning_rate": 8.761904761904762e-05,
"loss": 1.3156,
"step": 46
},
{
"epoch": 15.67,
"learning_rate": 8.952380952380953e-05,
"loss": 1.3364,
"step": 47
},
{
"epoch": 16.0,
"learning_rate": 9.142857142857143e-05,
"loss": 1.3473,
"step": 48
},
{
"epoch": 16.33,
"learning_rate": 9.333333333333334e-05,
"loss": 1.2919,
"step": 49
},
{
"epoch": 16.67,
"learning_rate": 9.523809523809524e-05,
"loss": 1.2749,
"step": 50
},
{
"epoch": 17.0,
"learning_rate": 9.714285714285715e-05,
"loss": 1.2587,
"step": 51
},
{
"epoch": 17.33,
"learning_rate": 9.904761904761905e-05,
"loss": 1.2509,
"step": 52
},
{
"epoch": 17.67,
"learning_rate": 0.00010095238095238096,
"loss": 1.2122,
"step": 53
},
{
"epoch": 18.0,
"learning_rate": 0.00010285714285714286,
"loss": 1.2101,
"step": 54
},
{
"epoch": 18.33,
"learning_rate": 0.00010476190476190477,
"loss": 1.1981,
"step": 55
},
{
"epoch": 18.67,
"learning_rate": 0.00010666666666666667,
"loss": 1.1938,
"step": 56
},
{
"epoch": 19.0,
"learning_rate": 0.00010857142857142856,
"loss": 1.1346,
"step": 57
},
{
"epoch": 19.33,
"learning_rate": 0.00011047619047619049,
"loss": 1.2012,
"step": 58
},
{
"epoch": 19.67,
"learning_rate": 0.00011238095238095239,
"loss": 1.1305,
"step": 59
},
{
"epoch": 20.0,
"learning_rate": 0.00011428571428571428,
"loss": 1.089,
"step": 60
},
{
"epoch": 20.33,
"learning_rate": 0.00011619047619047621,
"loss": 1.0899,
"step": 61
},
{
"epoch": 20.67,
"learning_rate": 0.0001180952380952381,
"loss": 1.0853,
"step": 62
},
{
"epoch": 21.0,
"learning_rate": 0.00012,
"loss": 1.1892,
"step": 63
},
{
"epoch": 21.33,
"learning_rate": 0.00012190476190476193,
"loss": 1.0786,
"step": 64
},
{
"epoch": 21.67,
"learning_rate": 0.0001238095238095238,
"loss": 1.0845,
"step": 65
},
{
"epoch": 22.0,
"learning_rate": 0.00012571428571428572,
"loss": 1.0965,
"step": 66
},
{
"epoch": 22.33,
"learning_rate": 0.0001276190476190476,
"loss": 1.0556,
"step": 67
},
{
"epoch": 22.67,
"learning_rate": 0.00012952380952380954,
"loss": 1.1,
"step": 68
},
{
"epoch": 23.0,
"learning_rate": 0.00013142857142857143,
"loss": 1.0133,
"step": 69
},
{
"epoch": 23.33,
"learning_rate": 0.00013333333333333334,
"loss": 1.0485,
"step": 70
},
{
"epoch": 23.67,
"learning_rate": 0.00013523809523809525,
"loss": 1.0284,
"step": 71
},
{
"epoch": 24.0,
"learning_rate": 0.00013714285714285716,
"loss": 1.0065,
"step": 72
},
{
"epoch": 24.33,
"learning_rate": 0.00013904761904761905,
"loss": 1.087,
"step": 73
},
{
"epoch": 24.67,
"learning_rate": 0.00014095238095238096,
"loss": 0.9647,
"step": 74
},
{
"epoch": 25.0,
"learning_rate": 0.00014285714285714287,
"loss": 0.9509,
"step": 75
},
{
"epoch": 25.33,
"learning_rate": 0.00014476190476190475,
"loss": 0.9621,
"step": 76
},
{
"epoch": 25.67,
"learning_rate": 0.00014666666666666666,
"loss": 0.9913,
"step": 77
},
{
"epoch": 26.0,
"learning_rate": 0.00014857142857142857,
"loss": 1.0245,
"step": 78
},
{
"epoch": 26.33,
"learning_rate": 0.00015047619047619048,
"loss": 0.9638,
"step": 79
},
{
"epoch": 26.67,
"learning_rate": 0.00015238095238095237,
"loss": 0.9652,
"step": 80
},
{
"epoch": 27.0,
"learning_rate": 0.0001542857142857143,
"loss": 0.9183,
"step": 81
},
{
"epoch": 27.33,
"learning_rate": 0.0001561904761904762,
"loss": 0.9729,
"step": 82
},
{
"epoch": 27.67,
"learning_rate": 0.0001580952380952381,
"loss": 0.9248,
"step": 83
},
{
"epoch": 28.0,
"learning_rate": 0.00016,
"loss": 0.8567,
"step": 84
},
{
"epoch": 28.33,
"learning_rate": 0.00016190476190476192,
"loss": 0.911,
"step": 85
},
{
"epoch": 28.67,
"learning_rate": 0.0001638095238095238,
"loss": 0.8622,
"step": 86
},
{
"epoch": 29.0,
"learning_rate": 0.00016571428571428575,
"loss": 0.9135,
"step": 87
},
{
"epoch": 29.33,
"learning_rate": 0.00016761904761904763,
"loss": 0.869,
"step": 88
},
{
"epoch": 29.67,
"learning_rate": 0.00016952380952380954,
"loss": 0.9034,
"step": 89
},
{
"epoch": 30.0,
"learning_rate": 0.00017142857142857143,
"loss": 0.8036,
"step": 90
},
{
"epoch": 30.33,
"learning_rate": 0.00017333333333333334,
"loss": 0.8248,
"step": 91
},
{
"epoch": 30.67,
"learning_rate": 0.00017523809523809525,
"loss": 0.8567,
"step": 92
},
{
"epoch": 31.0,
"learning_rate": 0.00017714285714285713,
"loss": 0.7961,
"step": 93
},
{
"epoch": 31.33,
"learning_rate": 0.00017904761904761907,
"loss": 0.7859,
"step": 94
},
{
"epoch": 31.67,
"learning_rate": 0.00018095238095238095,
"loss": 0.8019,
"step": 95
},
{
"epoch": 32.0,
"learning_rate": 0.00018285714285714286,
"loss": 0.8066,
"step": 96
},
{
"epoch": 32.33,
"learning_rate": 0.00018476190476190478,
"loss": 0.781,
"step": 97
},
{
"epoch": 32.67,
"learning_rate": 0.0001866666666666667,
"loss": 0.7525,
"step": 98
},
{
"epoch": 33.0,
"learning_rate": 0.00018857142857142857,
"loss": 0.7167,
"step": 99
},
{
"epoch": 33.33,
"learning_rate": 0.00019047619047619048,
"loss": 0.7536,
"step": 100
},
{
"epoch": 33.67,
"learning_rate": 0.0001923809523809524,
"loss": 0.6921,
"step": 101
},
{
"epoch": 34.0,
"learning_rate": 0.0001942857142857143,
"loss": 0.6864,
"step": 102
},
{
"epoch": 34.33,
"learning_rate": 0.0001961904761904762,
"loss": 0.7011,
"step": 103
},
{
"epoch": 34.67,
"learning_rate": 0.0001980952380952381,
"loss": 0.6718,
"step": 104
},
{
"epoch": 35.0,
"learning_rate": 0.0002,
"loss": 0.652,
"step": 105
},
{
"epoch": 35.33,
"learning_rate": 0.00019999944740655014,
"loss": 0.6498,
"step": 106
},
{
"epoch": 35.67,
"learning_rate": 0.00019999778963230775,
"loss": 0.6476,
"step": 107
},
{
"epoch": 36.0,
"learning_rate": 0.00019999502669559432,
"loss": 0.6011,
"step": 108
},
{
"epoch": 36.33,
"learning_rate": 0.00019999115862694546,
"loss": 0.595,
"step": 109
},
{
"epoch": 36.67,
"learning_rate": 0.00019998618546911056,
"loss": 0.6427,
"step": 110
},
{
"epoch": 37.0,
"learning_rate": 0.00019998010727705236,
"loss": 0.5505,
"step": 111
},
{
"epoch": 37.33,
"learning_rate": 0.00019997292411794618,
"loss": 0.617,
"step": 112
},
{
"epoch": 37.67,
"learning_rate": 0.00019996463607117935,
"loss": 0.537,
"step": 113
},
{
"epoch": 38.0,
"learning_rate": 0.00019995524322835034,
"loss": 0.5147,
"step": 114
},
{
"epoch": 38.33,
"learning_rate": 0.00019994474569326757,
"loss": 0.5067,
"step": 115
},
{
"epoch": 38.67,
"learning_rate": 0.00019993314358194843,
"loss": 0.5243,
"step": 116
},
{
"epoch": 39.0,
"learning_rate": 0.00019992043702261793,
"loss": 0.5144,
"step": 117
},
{
"epoch": 39.33,
"learning_rate": 0.0001999066261557073,
"loss": 0.4697,
"step": 118
},
{
"epoch": 39.67,
"learning_rate": 0.0001998917111338525,
"loss": 0.4706,
"step": 119
},
{
"epoch": 40.0,
"learning_rate": 0.00019987569212189224,
"loss": 0.4908,
"step": 120
},
{
"epoch": 40.33,
"learning_rate": 0.00019985856929686667,
"loss": 0.4193,
"step": 121
},
{
"epoch": 40.67,
"learning_rate": 0.00019984034284801502,
"loss": 0.4805,
"step": 122
},
{
"epoch": 41.0,
"learning_rate": 0.0001998210129767735,
"loss": 0.4101,
"step": 123
},
{
"epoch": 41.33,
"learning_rate": 0.00019980057989677345,
"loss": 0.3676,
"step": 124
},
{
"epoch": 41.67,
"learning_rate": 0.0001997790438338385,
"loss": 0.442,
"step": 125
},
{
"epoch": 42.0,
"learning_rate": 0.00019975640502598244,
"loss": 0.385,
"step": 126
},
{
"epoch": 42.33,
"learning_rate": 0.00019973266372340639,
"loss": 0.38,
"step": 127
},
{
"epoch": 42.67,
"learning_rate": 0.0001997078201884961,
"loss": 0.3504,
"step": 128
},
{
"epoch": 43.0,
"learning_rate": 0.0001996818746958191,
"loss": 0.3622,
"step": 129
},
{
"epoch": 43.33,
"learning_rate": 0.00019965482753212156,
"loss": 0.3173,
"step": 130
},
{
"epoch": 43.67,
"learning_rate": 0.00019962667899632518,
"loss": 0.3491,
"step": 131
},
{
"epoch": 44.0,
"learning_rate": 0.00019959742939952392,
"loss": 0.3067,
"step": 132
},
{
"epoch": 44.33,
"learning_rate": 0.00019956707906498044,
"loss": 0.2769,
"step": 133
},
{
"epoch": 44.67,
"learning_rate": 0.00019953562832812272,
"loss": 0.306,
"step": 134
},
{
"epoch": 45.0,
"learning_rate": 0.00019950307753654017,
"loss": 0.2923,
"step": 135
},
{
"epoch": 45.33,
"learning_rate": 0.00019946942704997982,
"loss": 0.2585,
"step": 136
},
{
"epoch": 45.67,
"learning_rate": 0.00019943467724034252,
"loss": 0.2664,
"step": 137
},
{
"epoch": 46.0,
"learning_rate": 0.00019939882849167852,
"loss": 0.2606,
"step": 138
},
{
"epoch": 46.33,
"learning_rate": 0.0001993618812001836,
"loss": 0.2376,
"step": 139
},
{
"epoch": 46.67,
"learning_rate": 0.00019932383577419432,
"loss": 0.213,
"step": 140
},
{
"epoch": 47.0,
"learning_rate": 0.00019928469263418374,
"loss": 0.2371,
"step": 141
},
{
"epoch": 47.33,
"learning_rate": 0.00019924445221275675,
"loss": 0.2132,
"step": 142
},
{
"epoch": 47.67,
"learning_rate": 0.00019920311495464518,
"loss": 0.2004,
"step": 143
},
{
"epoch": 48.0,
"learning_rate": 0.00019916068131670302,
"loss": 0.2163,
"step": 144
},
{
"epoch": 48.33,
"learning_rate": 0.0001991171517679013,
"loss": 0.2106,
"step": 145
},
{
"epoch": 48.67,
"learning_rate": 0.0001990725267893228,
"loss": 0.1734,
"step": 146
},
{
"epoch": 49.0,
"learning_rate": 0.00019902680687415705,
"loss": 0.1896,
"step": 147
},
{
"epoch": 49.33,
"learning_rate": 0.00019897999252769448,
"loss": 0.1678,
"step": 148
},
{
"epoch": 49.67,
"learning_rate": 0.00019893208426732115,
"loss": 0.179,
"step": 149
},
{
"epoch": 50.0,
"learning_rate": 0.00019888308262251285,
"loss": 0.152,
"step": 150
},
{
"epoch": 50.33,
"learning_rate": 0.00019883298813482938,
"loss": 0.1437,
"step": 151
},
{
"epoch": 50.67,
"learning_rate": 0.00019878180135790845,
"loss": 0.1491,
"step": 152
},
{
"epoch": 51.0,
"learning_rate": 0.00019872952285745959,
"loss": 0.1562,
"step": 153
},
{
"epoch": 51.33,
"learning_rate": 0.00019867615321125795,
"loss": 0.137,
"step": 154
},
{
"epoch": 51.67,
"learning_rate": 0.00019862169300913785,
"loss": 0.1305,
"step": 155
},
{
"epoch": 52.0,
"learning_rate": 0.0001985661428529863,
"loss": 0.1266,
"step": 156
},
{
"epoch": 52.33,
"learning_rate": 0.00019850950335673643,
"loss": 0.1205,
"step": 157
},
{
"epoch": 52.67,
"learning_rate": 0.00019845177514636042,
"loss": 0.1262,
"step": 158
},
{
"epoch": 53.0,
"learning_rate": 0.00019839295885986296,
"loss": 0.1023,
"step": 159
},
{
"epoch": 53.33,
"learning_rate": 0.00019833305514727395,
"loss": 0.1071,
"step": 160
},
{
"epoch": 53.67,
"learning_rate": 0.00019827206467064133,
"loss": 0.0987,
"step": 161
},
{
"epoch": 54.0,
"learning_rate": 0.0001982099881040239,
"loss": 0.0992,
"step": 162
},
{
"epoch": 54.33,
"learning_rate": 0.0001981468261334837,
"loss": 0.0812,
"step": 163
},
{
"epoch": 54.67,
"learning_rate": 0.0001980825794570786,
"loss": 0.0976,
"step": 164
},
{
"epoch": 55.0,
"learning_rate": 0.00019801724878485438,
"loss": 0.0957,
"step": 165
},
{
"epoch": 55.33,
"learning_rate": 0.00019795083483883715,
"loss": 0.0867,
"step": 166
},
{
"epoch": 55.67,
"learning_rate": 0.0001978833383530251,
"loss": 0.0724,
"step": 167
},
{
"epoch": 56.0,
"learning_rate": 0.00019781476007338058,
"loss": 0.0926,
"step": 168
},
{
"epoch": 56.33,
"learning_rate": 0.00019774510075782172,
"loss": 0.0717,
"step": 169
},
{
"epoch": 56.67,
"learning_rate": 0.00019767436117621413,
"loss": 0.0764,
"step": 170
},
{
"epoch": 57.0,
"learning_rate": 0.00019760254211036244,
"loss": 0.073,
"step": 171
},
{
"epoch": 57.33,
"learning_rate": 0.00019752964435400155,
"loss": 0.065,
"step": 172
},
{
"epoch": 57.67,
"learning_rate": 0.00019745566871278794,
"loss": 0.0695,
"step": 173
},
{
"epoch": 58.0,
"learning_rate": 0.00019738061600429064,
"loss": 0.0638,
"step": 174
},
{
"epoch": 58.33,
"learning_rate": 0.00019730448705798239,
"loss": 0.0583,
"step": 175
},
{
"epoch": 58.67,
"learning_rate": 0.00019722728271523034,
"loss": 0.0594,
"step": 176
},
{
"epoch": 59.0,
"learning_rate": 0.00019714900382928675,
"loss": 0.0574,
"step": 177
},
{
"epoch": 59.33,
"learning_rate": 0.00019706965126527963,
"loss": 0.0489,
"step": 178
},
{
"epoch": 59.67,
"learning_rate": 0.00019698922590020312,
"loss": 0.0602,
"step": 179
},
{
"epoch": 60.0,
"learning_rate": 0.0001969077286229078,
"loss": 0.0438,
"step": 180
},
{
"epoch": 60.33,
"learning_rate": 0.00019682516033409092,
"loss": 0.047,
"step": 181
},
{
"epoch": 60.67,
"learning_rate": 0.00019674152194628638,
"loss": 0.0437,
"step": 182
},
{
"epoch": 61.0,
"learning_rate": 0.00019665681438385473,
"loss": 0.0533,
"step": 183
},
{
"epoch": 61.33,
"learning_rate": 0.0001965710385829728,
"loss": 0.0463,
"step": 184
},
{
"epoch": 61.67,
"learning_rate": 0.00019648419549162348,
"loss": 0.0441,
"step": 185
},
{
"epoch": 62.0,
"learning_rate": 0.00019639628606958533,
"loss": 0.0398,
"step": 186
},
{
"epoch": 62.33,
"learning_rate": 0.0001963073112884217,
"loss": 0.0396,
"step": 187
},
{
"epoch": 62.67,
"learning_rate": 0.00019621727213147027,
"loss": 0.0372,
"step": 188
},
{
"epoch": 63.0,
"learning_rate": 0.0001961261695938319,
"loss": 0.0447,
"step": 189
},
{
"epoch": 63.33,
"learning_rate": 0.00019603400468235998,
"loss": 0.0366,
"step": 190
},
{
"epoch": 63.67,
"learning_rate": 0.00019594077841564907,
"loss": 0.0369,
"step": 191
},
{
"epoch": 64.0,
"learning_rate": 0.00019584649182402357,
"loss": 0.0338,
"step": 192
},
{
"epoch": 64.33,
"learning_rate": 0.0001957511459495266,
"loss": 0.0337,
"step": 193
},
{
"epoch": 64.67,
"learning_rate": 0.00019565474184590826,
"loss": 0.0331,
"step": 194
},
{
"epoch": 65.0,
"learning_rate": 0.0001955572805786141,
"loss": 0.0345,
"step": 195
},
{
"epoch": 65.33,
"learning_rate": 0.0001954587632247732,
"loss": 0.0294,
"step": 196
},
{
"epoch": 65.67,
"learning_rate": 0.00019535919087318652,
"loss": 0.031,
"step": 197
},
{
"epoch": 66.0,
"learning_rate": 0.0001952585646243146,
"loss": 0.0303,
"step": 198
},
{
"epoch": 66.33,
"learning_rate": 0.00019515688559026563,
"loss": 0.0289,
"step": 199
},
{
"epoch": 66.67,
"learning_rate": 0.0001950541548947829,
"loss": 0.0276,
"step": 200
},
{
"epoch": 67.0,
"learning_rate": 0.00019495037367323262,
"loss": 0.0272,
"step": 201
},
{
"epoch": 67.33,
"learning_rate": 0.0001948455430725913,
"loss": 0.0255,
"step": 202
},
{
"epoch": 67.67,
"learning_rate": 0.00019473966425143292,
"loss": 0.0261,
"step": 203
},
{
"epoch": 68.0,
"learning_rate": 0.00019463273837991643,
"loss": 0.0278,
"step": 204
},
{
"epoch": 68.33,
"learning_rate": 0.00019452476663977248,
"loss": 0.0225,
"step": 205
},
{
"epoch": 68.67,
"learning_rate": 0.00019441575022429065,
"loss": 0.0236,
"step": 206
},
{
"epoch": 69.0,
"learning_rate": 0.00019430569033830605,
"loss": 0.0261,
"step": 207
},
{
"epoch": 69.33,
"learning_rate": 0.00019419458819818614,
"loss": 0.0218,
"step": 208
},
{
"epoch": 69.67,
"learning_rate": 0.00019408244503181724,
"loss": 0.0222,
"step": 209
},
{
"epoch": 70.0,
"learning_rate": 0.00019396926207859084,
"loss": 0.0224,
"step": 210
},
{
"epoch": 70.33,
"learning_rate": 0.00019385504058939024,
"loss": 0.0207,
"step": 211
},
{
"epoch": 70.67,
"learning_rate": 0.00019373978182657625,
"loss": 0.0207,
"step": 212
},
{
"epoch": 71.0,
"learning_rate": 0.00019362348706397373,
"loss": 0.0194,
"step": 213
},
{
"epoch": 71.33,
"learning_rate": 0.00019350615758685708,
"loss": 0.0167,
"step": 214
},
{
"epoch": 71.67,
"learning_rate": 0.00019338779469193639,
"loss": 0.02,
"step": 215
},
{
"epoch": 72.0,
"learning_rate": 0.00019326839968734279,
"loss": 0.0196,
"step": 216
},
{
"epoch": 72.33,
"learning_rate": 0.00019314797389261424,
"loss": 0.0155,
"step": 217
},
{
"epoch": 72.67,
"learning_rate": 0.00019302651863868092,
"loss": 0.019,
"step": 218
},
{
"epoch": 73.0,
"learning_rate": 0.00019290403526785025,
"loss": 0.0174,
"step": 219
},
{
"epoch": 73.33,
"learning_rate": 0.00019278052513379255,
"loss": 0.0172,
"step": 220
},
{
"epoch": 73.67,
"learning_rate": 0.00019265598960152555,
"loss": 0.0164,
"step": 221
},
{
"epoch": 74.0,
"learning_rate": 0.00019253043004739968,
"loss": 0.0149,
"step": 222
},
{
"epoch": 74.33,
"learning_rate": 0.00019240384785908265,
"loss": 0.0147,
"step": 223
},
{
"epoch": 74.67,
"learning_rate": 0.00019227624443554425,
"loss": 0.0156,
"step": 224
},
{
"epoch": 75.0,
"learning_rate": 0.00019214762118704076,
"loss": 0.0151,
"step": 225
},
{
"epoch": 75.33,
"learning_rate": 0.00019201797953509955,
"loss": 0.0153,
"step": 226
},
{
"epoch": 75.67,
"learning_rate": 0.00019188732091250307,
"loss": 0.0131,
"step": 227
},
{
"epoch": 76.0,
"learning_rate": 0.00019175564676327339,
"loss": 0.0136,
"step": 228
},
{
"epoch": 76.33,
"learning_rate": 0.00019162295854265594,
"loss": 0.0142,
"step": 229
},
{
"epoch": 76.67,
"learning_rate": 0.00019148925771710347,
"loss": 0.0124,
"step": 230
},
{
"epoch": 77.0,
"learning_rate": 0.0001913545457642601,
"loss": 0.0127,
"step": 231
},
{
"epoch": 77.33,
"learning_rate": 0.00019121882417294462,
"loss": 0.0119,
"step": 232
},
{
"epoch": 77.67,
"learning_rate": 0.00019108209444313433,
"loss": 0.0128,
"step": 233
},
{
"epoch": 78.0,
"learning_rate": 0.00019094435808594823,
"loss": 0.0117,
"step": 234
},
{
"epoch": 78.33,
"learning_rate": 0.0001908056166236305,
"loss": 0.0122,
"step": 235
},
{
"epoch": 78.67,
"learning_rate": 0.00019066587158953366,
"loss": 0.0115,
"step": 236
},
{
"epoch": 79.0,
"learning_rate": 0.0001905251245281015,
"loss": 0.0108,
"step": 237
},
{
"epoch": 79.33,
"learning_rate": 0.00019038337699485208,
"loss": 0.0104,
"step": 238
},
{
"epoch": 79.67,
"learning_rate": 0.00019024063055636057,
"loss": 0.0108,
"step": 239
},
{
"epoch": 80.0,
"learning_rate": 0.0001900968867902419,
"loss": 0.0099,
"step": 240
},
{
"epoch": 80.33,
"learning_rate": 0.00018995214728513343,
"loss": 0.0099,
"step": 241
},
{
"epoch": 80.67,
"learning_rate": 0.0001898064136406771,
"loss": 0.0096,
"step": 242
},
{
"epoch": 81.0,
"learning_rate": 0.0001896596874675021,
"loss": 0.0101,
"step": 243
},
{
"epoch": 81.33,
"learning_rate": 0.00018951197038720688,
"loss": 0.0105,
"step": 244
},
{
"epoch": 81.67,
"learning_rate": 0.00018936326403234125,
"loss": 0.0088,
"step": 245
},
{
"epoch": 82.0,
"learning_rate": 0.00018921357004638835,
"loss": 0.0097,
"step": 246
},
{
"epoch": 82.33,
"learning_rate": 0.00018906289008374655,
"loss": 0.0084,
"step": 247
},
{
"epoch": 82.67,
"learning_rate": 0.00018891122580971098,
"loss": 0.0095,
"step": 248
},
{
"epoch": 83.0,
"learning_rate": 0.00018875857890045543,
"loss": 0.0084,
"step": 249
},
{
"epoch": 83.33,
"learning_rate": 0.00018860495104301345,
"loss": 0.0074,
"step": 250
},
{
"epoch": 83.67,
"learning_rate": 0.00018845034393526005,
"loss": 0.0088,
"step": 251
},
{
"epoch": 84.0,
"learning_rate": 0.00018829475928589271,
"loss": 0.0078,
"step": 252
},
{
"epoch": 84.33,
"learning_rate": 0.0001881381988144126,
"loss": 0.0081,
"step": 253
},
{
"epoch": 84.67,
"learning_rate": 0.0001879806642511055,
"loss": 0.0071,
"step": 254
},
{
"epoch": 85.0,
"learning_rate": 0.00018782215733702286,
"loss": 0.0069,
"step": 255
},
{
"epoch": 85.33,
"learning_rate": 0.00018766267982396224,
"loss": 0.007,
"step": 256
},
{
"epoch": 85.67,
"learning_rate": 0.00018750223347444828,
"loss": 0.0074,
"step": 257
},
{
"epoch": 86.0,
"learning_rate": 0.00018734082006171299,
"loss": 0.0065,
"step": 258
},
{
"epoch": 86.33,
"learning_rate": 0.00018717844136967624,
"loss": 0.0063,
"step": 259
},
{
"epoch": 86.67,
"learning_rate": 0.00018701509919292613,
"loss": 0.0072,
"step": 260
},
{
"epoch": 87.0,
"learning_rate": 0.0001868507953366989,
"loss": 0.006,
"step": 261
},
{
"epoch": 87.33,
"learning_rate": 0.00018668553161685933,
"loss": 0.0062,
"step": 262
},
{
"epoch": 87.67,
"learning_rate": 0.00018651930985988036,
"loss": 0.0059,
"step": 263
},
{
"epoch": 88.0,
"learning_rate": 0.0001863521319028231,
"loss": 0.0068,
"step": 264
},
{
"epoch": 88.33,
"learning_rate": 0.0001861839995933164,
"loss": 0.0056,
"step": 265
},
{
"epoch": 88.67,
"learning_rate": 0.00018601491478953657,
"loss": 0.0058,
"step": 266
},
{
"epoch": 89.0,
"learning_rate": 0.00018584487936018661,
"loss": 0.0063,
"step": 267
},
{
"epoch": 89.33,
"learning_rate": 0.0001856738951844759,
"loss": 0.0057,
"step": 268
},
{
"epoch": 89.67,
"learning_rate": 0.00018550196415209914,
"loss": 0.006,
"step": 269
},
{
"epoch": 90.0,
"learning_rate": 0.00018532908816321558,
"loss": 0.0062,
"step": 270
},
{
"epoch": 90.33,
"learning_rate": 0.00018515526912842796,
"loss": 0.0059,
"step": 271
},
{
"epoch": 90.67,
"learning_rate": 0.0001849805089687615,
"loss": 0.0071,
"step": 272
},
{
"epoch": 91.0,
"learning_rate": 0.0001848048096156426,
"loss": 0.0053,
"step": 273
},
{
"epoch": 91.33,
"learning_rate": 0.00018462817301087748,
"loss": 0.0061,
"step": 274
},
{
"epoch": 91.67,
"learning_rate": 0.0001844506011066308,
"loss": 0.0049,
"step": 275
},
{
"epoch": 92.0,
"learning_rate": 0.0001842720958654039,
"loss": 0.0057,
"step": 276
},
{
"epoch": 92.33,
"learning_rate": 0.00018409265926001343,
"loss": 0.0052,
"step": 277
},
{
"epoch": 92.67,
"learning_rate": 0.00018391229327356916,
"loss": 0.0052,
"step": 278
},
{
"epoch": 93.0,
"learning_rate": 0.00018373099989945236,
"loss": 0.0051,
"step": 279
},
{
"epoch": 93.33,
"learning_rate": 0.00018354878114129367,
"loss": 0.0054,
"step": 280
},
{
"epoch": 93.67,
"learning_rate": 0.0001833656390129509,
"loss": 0.0051,
"step": 281
},
{
"epoch": 94.0,
"learning_rate": 0.0001831815755384869,
"loss": 0.0049,
"step": 282
},
{
"epoch": 94.33,
"learning_rate": 0.00018299659275214706,
"loss": 0.0043,
"step": 283
},
{
"epoch": 94.67,
"learning_rate": 0.00018281069269833692,
"loss": 0.005,
"step": 284
},
{
"epoch": 95.0,
"learning_rate": 0.0001826238774315995,
"loss": 0.0058,
"step": 285
},
{
"epoch": 95.33,
"learning_rate": 0.00018243614901659264,
"loss": 0.0049,
"step": 286
},
{
"epoch": 95.67,
"learning_rate": 0.00018224750952806624,
"loss": 0.0049,
"step": 287
},
{
"epoch": 96.0,
"learning_rate": 0.00018205796105083915,
"loss": 0.0047,
"step": 288
},
{
"epoch": 96.33,
"learning_rate": 0.00018186750567977637,
"loss": 0.0046,
"step": 289
},
{
"epoch": 96.67,
"learning_rate": 0.00018167614551976567,
"loss": 0.0051,
"step": 290
},
{
"epoch": 97.0,
"learning_rate": 0.00018148388268569453,
"loss": 0.0052,
"step": 291
},
{
"epoch": 97.33,
"learning_rate": 0.00018129071930242648,
"loss": 0.0042,
"step": 292
},
{
"epoch": 97.67,
"learning_rate": 0.00018109665750477806,
"loss": 0.0054,
"step": 293
},
{
"epoch": 98.0,
"learning_rate": 0.00018090169943749476,
"loss": 0.0054,
"step": 294
},
{
"epoch": 98.33,
"learning_rate": 0.00018070584725522762,
"loss": 0.0041,
"step": 295
},
{
"epoch": 98.67,
"learning_rate": 0.00018050910312250931,
"loss": 0.0051,
"step": 296
},
{
"epoch": 99.0,
"learning_rate": 0.00018031146921373018,
"loss": 0.0047,
"step": 297
},
{
"epoch": 99.33,
"learning_rate": 0.00018011294771311435,
"loss": 0.0039,
"step": 298
},
{
"epoch": 99.67,
"learning_rate": 0.00017991354081469538,
"loss": 0.0048,
"step": 299
},
{
"epoch": 100.0,
"learning_rate": 0.00017971325072229226,
"loss": 0.0048,
"step": 300
},
{
"epoch": 100.33,
"learning_rate": 0.0001795120796494848,
"loss": 0.0043,
"step": 301
},
{
"epoch": 100.67,
"learning_rate": 0.00017931002981958933,
"loss": 0.0044,
"step": 302
},
{
"epoch": 101.0,
"learning_rate": 0.00017910710346563416,
"loss": 0.0041,
"step": 303
},
{
"epoch": 101.33,
"learning_rate": 0.00017890330283033468,
"loss": 0.0044,
"step": 304
},
{
"epoch": 101.67,
"learning_rate": 0.0001786986301660689,
"loss": 0.0045,
"step": 305
},
{
"epoch": 102.0,
"learning_rate": 0.00017849308773485226,
"loss": 0.0035,
"step": 306
},
{
"epoch": 102.33,
"learning_rate": 0.00017828667780831278,
"loss": 0.0039,
"step": 307
},
{
"epoch": 102.67,
"learning_rate": 0.00017807940266766593,
"loss": 0.0043,
"step": 308
},
{
"epoch": 103.0,
"learning_rate": 0.0001778712646036894,
"loss": 0.0041,
"step": 309
},
{
"epoch": 103.33,
"learning_rate": 0.00017766226591669785,
"loss": 0.004,
"step": 310
},
{
"epoch": 103.67,
"learning_rate": 0.00017745240891651735,
"loss": 0.0036,
"step": 311
},
{
"epoch": 104.0,
"learning_rate": 0.00017724169592245995,
"loss": 0.0047,
"step": 312
},
{
"epoch": 104.33,
"learning_rate": 0.00017703012926329815,
"loss": 0.0038,
"step": 313
},
{
"epoch": 104.67,
"learning_rate": 0.0001768177112772388,
"loss": 0.0039,
"step": 314
},
{
"epoch": 105.0,
"learning_rate": 0.0001766044443118978,
"loss": 0.0038,
"step": 315
},
{
"epoch": 105.33,
"learning_rate": 0.00017639033072427366,
"loss": 0.004,
"step": 316
},
{
"epoch": 105.67,
"learning_rate": 0.0001761753728807217,
"loss": 0.004,
"step": 317
},
{
"epoch": 106.0,
"learning_rate": 0.00017595957315692782,
"loss": 0.0033,
"step": 318
},
{
"epoch": 106.33,
"learning_rate": 0.00017574293393788235,
"loss": 0.0039,
"step": 319
},
{
"epoch": 106.67,
"learning_rate": 0.0001755254576178535,
"loss": 0.0041,
"step": 320
},
{
"epoch": 107.0,
"learning_rate": 0.00017530714660036112,
"loss": 0.0033,
"step": 321
},
{
"epoch": 107.33,
"learning_rate": 0.00017508800329814995,
"loss": 0.0036,
"step": 322
},
{
"epoch": 107.67,
"learning_rate": 0.000174868030133163,
"loss": 0.0036,
"step": 323
},
{
"epoch": 108.0,
"learning_rate": 0.00017464722953651504,
"loss": 0.0039,
"step": 324
},
{
"epoch": 108.33,
"learning_rate": 0.00017442560394846516,
"loss": 0.0039,
"step": 325
},
{
"epoch": 108.67,
"learning_rate": 0.00017420315581839044,
"loss": 0.0036,
"step": 326
},
{
"epoch": 109.0,
"learning_rate": 0.0001739798876047584,
"loss": 0.0035,
"step": 327
},
{
"epoch": 109.33,
"learning_rate": 0.00017375580177510016,
"loss": 0.0039,
"step": 328
},
{
"epoch": 109.67,
"learning_rate": 0.0001735309008059829,
"loss": 0.0032,
"step": 329
},
{
"epoch": 110.0,
"learning_rate": 0.00017330518718298264,
"loss": 0.0037,
"step": 330
},
{
"epoch": 110.33,
"learning_rate": 0.00017307866340065685,
"loss": 0.0041,
"step": 331
},
{
"epoch": 110.67,
"learning_rate": 0.00017285133196251663,
"loss": 0.0033,
"step": 332
},
{
"epoch": 111.0,
"learning_rate": 0.0001726231953809993,
"loss": 0.0033,
"step": 333
},
{
"epoch": 111.33,
"learning_rate": 0.00017239425617744048,
"loss": 0.0033,
"step": 334
},
{
"epoch": 111.67,
"learning_rate": 0.0001721645168820462,
"loss": 0.0038,
"step": 335
},
{
"epoch": 112.0,
"learning_rate": 0.0001719339800338651,
"loss": 0.0036,
"step": 336
},
{
"epoch": 112.33,
"learning_rate": 0.00017170264818076026,
"loss": 0.0033,
"step": 337
},
{
"epoch": 112.67,
"learning_rate": 0.0001714705238793809,
"loss": 0.0035,
"step": 338
},
{
"epoch": 113.0,
"learning_rate": 0.0001712376096951345,
"loss": 0.0038,
"step": 339
},
{
"epoch": 113.33,
"learning_rate": 0.00017100390820215804,
"loss": 0.0034,
"step": 340
},
{
"epoch": 113.67,
"learning_rate": 0.00017076942198328987,
"loss": 0.0034,
"step": 341
},
{
"epoch": 114.0,
"learning_rate": 0.0001705341536300409,
"loss": 0.0035,
"step": 342
},
{
"epoch": 114.33,
"learning_rate": 0.0001702981057425662,
"loss": 0.0033,
"step": 343
},
{
"epoch": 114.67,
"learning_rate": 0.00017006128092963605,
"loss": 0.0036,
"step": 344
},
{
"epoch": 115.0,
"learning_rate": 0.00016982368180860728,
"loss": 0.0034,
"step": 345
},
{
"epoch": 115.33,
"learning_rate": 0.00016958531100539427,
"loss": 0.0032,
"step": 346
},
{
"epoch": 115.67,
"learning_rate": 0.00016934617115443992,
"loss": 0.0033,
"step": 347
},
{
"epoch": 116.0,
"learning_rate": 0.00016910626489868649,
"loss": 0.004,
"step": 348
},
{
"epoch": 116.33,
"learning_rate": 0.00016886559488954648,
"loss": 0.0032,
"step": 349
},
{
"epoch": 116.67,
"learning_rate": 0.0001686241637868734,
"loss": 0.0035,
"step": 350
},
{
"epoch": 116.67,
"eval_loss": 1.2795522212982178,
"eval_runtime": 3.5029,
"eval_samples_per_second": 5.995,
"eval_steps_per_second": 0.856,
"step": 350
},
{
"epoch": 117.0,
"learning_rate": 0.00016838197425893202,
"loss": 0.0038,
"step": 351
},
{
"epoch": 117.33,
"learning_rate": 0.00016813902898236939,
"loss": 0.0033,
"step": 352
},
{
"epoch": 117.67,
"learning_rate": 0.00016789533064218485,
"loss": 0.0035,
"step": 353
},
{
"epoch": 118.0,
"learning_rate": 0.00016765088193170053,
"loss": 0.0033,
"step": 354
},
{
"epoch": 118.33,
"learning_rate": 0.00016740568555253155,
"loss": 0.0037,
"step": 355
},
{
"epoch": 118.67,
"learning_rate": 0.00016715974421455617,
"loss": 0.0032,
"step": 356
},
{
"epoch": 119.0,
"learning_rate": 0.00016691306063588583,
"loss": 0.0031,
"step": 357
},
{
"epoch": 119.33,
"learning_rate": 0.00016666563754283515,
"loss": 0.0035,
"step": 358
},
{
"epoch": 119.67,
"learning_rate": 0.0001664174776698917,
"loss": 0.0031,
"step": 359
},
{
"epoch": 120.0,
"learning_rate": 0.00016616858375968595,
"loss": 0.0033,
"step": 360
},
{
"epoch": 120.33,
"learning_rate": 0.00016591895856296073,
"loss": 0.0036,
"step": 361
},
{
"epoch": 120.67,
"learning_rate": 0.00016566860483854104,
"loss": 0.0032,
"step": 362
},
{
"epoch": 121.0,
"learning_rate": 0.00016541752535330345,
"loss": 0.0032,
"step": 363
},
{
"epoch": 121.33,
"learning_rate": 0.00016516572288214552,
"loss": 0.003,
"step": 364
},
{
"epoch": 121.67,
"learning_rate": 0.0001649132002079552,
"loss": 0.003,
"step": 365
},
{
"epoch": 122.0,
"learning_rate": 0.00016465996012157995,
"loss": 0.0036,
"step": 366
},
{
"epoch": 122.33,
"learning_rate": 0.00016440600542179615,
"loss": 0.0036,
"step": 367
},
{
"epoch": 122.67,
"learning_rate": 0.0001641513389152777,
"loss": 0.0032,
"step": 368
},
{
"epoch": 123.0,
"learning_rate": 0.0001638959634165656,
"loss": 0.0027,
"step": 369
},
{
"epoch": 123.33,
"learning_rate": 0.00016363988174803638,
"loss": 0.0034,
"step": 370
},
{
"epoch": 123.67,
"learning_rate": 0.00016338309673987101,
"loss": 0.0033,
"step": 371
},
{
"epoch": 124.0,
"learning_rate": 0.0001631256112300239,
"loss": 0.0027,
"step": 372
},
{
"epoch": 124.33,
"learning_rate": 0.00016286742806419108,
"loss": 0.0033,
"step": 373
},
{
"epoch": 124.67,
"learning_rate": 0.0001626085500957791,
"loss": 0.0031,
"step": 374
},
{
"epoch": 125.0,
"learning_rate": 0.00016234898018587337,
"loss": 0.0032,
"step": 375
},
{
"epoch": 125.33,
"learning_rate": 0.0001620887212032065,
"loss": 0.003,
"step": 376
},
{
"epoch": 125.67,
"learning_rate": 0.00016182777602412665,
"loss": 0.0034,
"step": 377
},
{
"epoch": 126.0,
"learning_rate": 0.0001615661475325658,
"loss": 0.0031,
"step": 378
},
{
"epoch": 126.33,
"learning_rate": 0.0001613038386200078,
"loss": 0.0031,
"step": 379
},
{
"epoch": 126.67,
"learning_rate": 0.00016104085218545633,
"loss": 0.0034,
"step": 380
},
{
"epoch": 127.0,
"learning_rate": 0.00016077719113540302,
"loss": 0.0027,
"step": 381
},
{
"epoch": 127.33,
"learning_rate": 0.00016051285838379525,
"loss": 0.0035,
"step": 382
},
{
"epoch": 127.67,
"learning_rate": 0.00016024785685200395,
"loss": 0.0029,
"step": 383
},
{
"epoch": 128.0,
"learning_rate": 0.00015998218946879138,
"loss": 0.0027,
"step": 384
},
{
"epoch": 128.33,
"learning_rate": 0.00015971585917027862,
"loss": 0.0035,
"step": 385
},
{
"epoch": 128.67,
"learning_rate": 0.00015944886889991325,
"loss": 0.0029,
"step": 386
},
{
"epoch": 129.0,
"learning_rate": 0.00015918122160843678,
"loss": 0.0029,
"step": 387
},
{
"epoch": 129.33,
"learning_rate": 0.000158912920253852,
"loss": 0.0032,
"step": 388
},
{
"epoch": 129.67,
"learning_rate": 0.0001586439678013903,
"loss": 0.0029,
"step": 389
},
{
"epoch": 130.0,
"learning_rate": 0.000158374367223479,
"loss": 0.0032,
"step": 390
},
{
"epoch": 130.33,
"learning_rate": 0.00015810412149970833,
"loss": 0.0027,
"step": 391
},
{
"epoch": 130.67,
"learning_rate": 0.00015783323361679864,
"loss": 0.0034,
"step": 392
},
{
"epoch": 131.0,
"learning_rate": 0.00015756170656856737,
"loss": 0.0032,
"step": 393
},
{
"epoch": 131.33,
"learning_rate": 0.0001572895433558958,
"loss": 0.0033,
"step": 394
},
{
"epoch": 131.67,
"learning_rate": 0.0001570167469866962,
"loss": 0.0027,
"step": 395
},
{
"epoch": 132.0,
"learning_rate": 0.0001567433204758782,
"loss": 0.003,
"step": 396
},
{
"epoch": 132.33,
"learning_rate": 0.00015646926684531585,
"loss": 0.003,
"step": 397
},
{
"epoch": 132.67,
"learning_rate": 0.00015619458912381396,
"loss": 0.003,
"step": 398
},
{
"epoch": 133.0,
"learning_rate": 0.0001559192903470747,
"loss": 0.003,
"step": 399
},
{
"epoch": 133.33,
"learning_rate": 0.00015564337355766412,
"loss": 0.0033,
"step": 400
},
{
"epoch": 133.67,
"learning_rate": 0.0001553668418049784,
"loss": 0.0027,
"step": 401
},
{
"epoch": 134.0,
"learning_rate": 0.00015508969814521025,
"loss": 0.003,
"step": 402
},
{
"epoch": 134.33,
"learning_rate": 0.00015481194564131512,
"loss": 0.0028,
"step": 403
},
{
"epoch": 134.67,
"learning_rate": 0.00015453358736297729,
"loss": 0.0029,
"step": 404
},
{
"epoch": 135.0,
"learning_rate": 0.00015425462638657595,
"loss": 0.0031,
"step": 405
},
{
"epoch": 135.33,
"learning_rate": 0.0001539750657951513,
"loss": 0.0027,
"step": 406
},
{
"epoch": 135.67,
"learning_rate": 0.00015369490867837035,
"loss": 0.0029,
"step": 407
},
{
"epoch": 136.0,
"learning_rate": 0.00015341415813249288,
"loss": 0.0032,
"step": 408
},
{
"epoch": 136.33,
"learning_rate": 0.00015313281726033715,
"loss": 0.0031,
"step": 409
},
{
"epoch": 136.67,
"learning_rate": 0.00015285088917124556,
"loss": 0.0029,
"step": 410
},
{
"epoch": 137.0,
"learning_rate": 0.00015256837698105047,
"loss": 0.0028,
"step": 411
},
{
"epoch": 137.33,
"learning_rate": 0.00015228528381203962,
"loss": 0.003,
"step": 412
},
{
"epoch": 137.67,
"learning_rate": 0.00015200161279292155,
"loss": 0.0029,
"step": 413
},
{
"epoch": 138.0,
"learning_rate": 0.00015171736705879126,
"loss": 0.0028,
"step": 414
},
{
"epoch": 138.33,
"learning_rate": 0.00015143254975109538,
"loss": 0.0025,
"step": 415
},
{
"epoch": 138.67,
"learning_rate": 0.0001511471640175974,
"loss": 0.0031,
"step": 416
},
{
"epoch": 139.0,
"learning_rate": 0.00015086121301234316,
"loss": 0.0029,
"step": 417
},
{
"epoch": 139.33,
"learning_rate": 0.00015057469989562567,
"loss": 0.0027,
"step": 418
},
{
"epoch": 139.67,
"learning_rate": 0.00015028762783395034,
"loss": 0.0028,
"step": 419
},
{
"epoch": 140.0,
"learning_rate": 0.00015000000000000001,
"loss": 0.0031,
"step": 420
},
{
"epoch": 140.33,
"learning_rate": 0.0001497118195725998,
"loss": 0.0029,
"step": 421
},
{
"epoch": 140.67,
"learning_rate": 0.0001494230897366821,
"loss": 0.003,
"step": 422
},
{
"epoch": 141.0,
"learning_rate": 0.00014913381368325115,
"loss": 0.0027,
"step": 423
},
{
"epoch": 141.33,
"learning_rate": 0.00014884399460934805,
"loss": 0.003,
"step": 424
},
{
"epoch": 141.67,
"learning_rate": 0.00014855363571801523,
"loss": 0.0025,
"step": 425
},
{
"epoch": 142.0,
"learning_rate": 0.0001482627402182611,
"loss": 0.003,
"step": 426
},
{
"epoch": 142.33,
"learning_rate": 0.00014797131132502465,
"loss": 0.0028,
"step": 427
},
{
"epoch": 142.67,
"learning_rate": 0.00014767935225913975,
"loss": 0.0027,
"step": 428
},
{
"epoch": 143.0,
"learning_rate": 0.00014738686624729986,
"loss": 0.0031,
"step": 429
},
{
"epoch": 143.33,
"learning_rate": 0.00014709385652202203,
"loss": 0.0029,
"step": 430
},
{
"epoch": 143.67,
"learning_rate": 0.0001468003263216113,
"loss": 0.0029,
"step": 431
},
{
"epoch": 144.0,
"learning_rate": 0.00014650627889012507,
"loss": 0.0025,
"step": 432
},
{
"epoch": 144.33,
"learning_rate": 0.00014621171747733697,
"loss": 0.0028,
"step": 433
},
{
"epoch": 144.67,
"learning_rate": 0.00014591664533870118,
"loss": 0.0029,
"step": 434
},
{
"epoch": 145.0,
"learning_rate": 0.0001456210657353163,
"loss": 0.0026,
"step": 435
},
{
"epoch": 145.33,
"learning_rate": 0.0001453249819338894,
"loss": 0.0029,
"step": 436
},
{
"epoch": 145.67,
"learning_rate": 0.00014502839720669989,
"loss": 0.0027,
"step": 437
},
{
"epoch": 146.0,
"learning_rate": 0.00014473131483156327,
"loss": 0.0027,
"step": 438
},
{
"epoch": 146.33,
"learning_rate": 0.00014443373809179508,
"loss": 0.0028,
"step": 439
},
{
"epoch": 146.67,
"learning_rate": 0.0001441356702761744,
"loss": 0.0028,
"step": 440
},
{
"epoch": 147.0,
"learning_rate": 0.00014383711467890774,
"loss": 0.0026,
"step": 441
},
{
"epoch": 147.33,
"learning_rate": 0.00014353807459959242,
"loss": 0.0027,
"step": 442
},
{
"epoch": 147.67,
"learning_rate": 0.00014323855334318026,
"loss": 0.0026,
"step": 443
},
{
"epoch": 148.0,
"learning_rate": 0.00014293855421994094,
"loss": 0.0028,
"step": 444
},
{
"epoch": 148.33,
"learning_rate": 0.0001426380805454254,
"loss": 0.0028,
"step": 445
},
{
"epoch": 148.67,
"learning_rate": 0.00014233713564042937,
"loss": 0.0027,
"step": 446
},
{
"epoch": 149.0,
"learning_rate": 0.00014203572283095657,
"loss": 0.0026,
"step": 447
},
{
"epoch": 149.33,
"learning_rate": 0.0001417338454481818,
"loss": 0.0024,
"step": 448
},
{
"epoch": 149.67,
"learning_rate": 0.00014143150682841438,
"loss": 0.0029,
"step": 449
},
{
"epoch": 150.0,
"learning_rate": 0.00014112871031306119,
"loss": 0.0028,
"step": 450
},
{
"epoch": 150.33,
"learning_rate": 0.00014082545924858954,
"loss": 0.0027,
"step": 451
},
{
"epoch": 150.67,
"learning_rate": 0.00014052175698649053,
"loss": 0.0029,
"step": 452
},
{
"epoch": 151.0,
"learning_rate": 0.00014021760688324176,
"loss": 0.0023,
"step": 453
},
{
"epoch": 151.33,
"learning_rate": 0.0001399130123002703,
"loss": 0.0028,
"step": 454
},
{
"epoch": 151.67,
"learning_rate": 0.0001396079766039157,
"loss": 0.0027,
"step": 455
},
{
"epoch": 152.0,
"learning_rate": 0.00013930250316539238,
"loss": 0.0025,
"step": 456
},
{
"epoch": 152.33,
"learning_rate": 0.0001389965953607528,
"loss": 0.0027,
"step": 457
},
{
"epoch": 152.67,
"learning_rate": 0.00013869025657084995,
"loss": 0.0028,
"step": 458
},
{
"epoch": 153.0,
"learning_rate": 0.00013838349018130007,
"loss": 0.0024,
"step": 459
},
{
"epoch": 153.33,
"learning_rate": 0.00013807629958244498,
"loss": 0.0026,
"step": 460
},
{
"epoch": 153.67,
"learning_rate": 0.00013776868816931502,
"loss": 0.0027,
"step": 461
},
{
"epoch": 154.0,
"learning_rate": 0.00013746065934159123,
"loss": 0.0025,
"step": 462
},
{
"epoch": 154.33,
"learning_rate": 0.0001371522165035678,
"loss": 0.0027,
"step": 463
},
{
"epoch": 154.67,
"learning_rate": 0.00013684336306411468,
"loss": 0.0026,
"step": 464
},
{
"epoch": 155.0,
"learning_rate": 0.00013653410243663952,
"loss": 0.0026,
"step": 465
},
{
"epoch": 155.33,
"learning_rate": 0.00013622443803905027,
"loss": 0.0026,
"step": 466
},
{
"epoch": 155.67,
"learning_rate": 0.00013591437329371736,
"loss": 0.0026,
"step": 467
},
{
"epoch": 156.0,
"learning_rate": 0.00013560391162743569,
"loss": 0.0027,
"step": 468
},
{
"epoch": 156.33,
"learning_rate": 0.00013529305647138687,
"loss": 0.003,
"step": 469
},
{
"epoch": 156.67,
"learning_rate": 0.0001349818112611015,
"loss": 0.0024,
"step": 470
},
{
"epoch": 157.0,
"learning_rate": 0.00013467017943642073,
"loss": 0.0024,
"step": 471
},
{
"epoch": 157.33,
"learning_rate": 0.0001343581644414587,
"loss": 0.0024,
"step": 472
},
{
"epoch": 157.67,
"learning_rate": 0.00013404576972456431,
"loss": 0.0027,
"step": 473
},
{
"epoch": 158.0,
"learning_rate": 0.00013373299873828303,
"loss": 0.0026,
"step": 474
},
{
"epoch": 158.33,
"learning_rate": 0.00013341985493931877,
"loss": 0.0026,
"step": 475
},
{
"epoch": 158.67,
"learning_rate": 0.0001331063417884958,
"loss": 0.0024,
"step": 476
},
{
"epoch": 159.0,
"learning_rate": 0.00013279246275072046,
"loss": 0.0028,
"step": 477
},
{
"epoch": 159.33,
"learning_rate": 0.00013247822129494266,
"loss": 0.0026,
"step": 478
},
{
"epoch": 159.67,
"learning_rate": 0.00013216362089411783,
"loss": 0.0026,
"step": 479
},
{
"epoch": 160.0,
"learning_rate": 0.00013184866502516845,
"loss": 0.0027,
"step": 480
},
{
"epoch": 160.33,
"learning_rate": 0.00013153335716894544,
"loss": 0.0028,
"step": 481
},
{
"epoch": 160.67,
"learning_rate": 0.00013121770081018998,
"loss": 0.0026,
"step": 482
},
{
"epoch": 161.0,
"learning_rate": 0.00013090169943749476,
"loss": 0.0024,
"step": 483
},
{
"epoch": 161.33,
"learning_rate": 0.00013058535654326554,
"loss": 0.0027,
"step": 484
},
{
"epoch": 161.67,
"learning_rate": 0.0001302686756236826,
"loss": 0.0025,
"step": 485
},
{
"epoch": 162.0,
"learning_rate": 0.00012995166017866193,
"loss": 0.0027,
"step": 486
},
{
"epoch": 162.33,
"learning_rate": 0.00012963431371181672,
"loss": 0.0025,
"step": 487
},
{
"epoch": 162.67,
"learning_rate": 0.00012931663973041855,
"loss": 0.0026,
"step": 488
},
{
"epoch": 163.0,
"learning_rate": 0.00012899864174535864,
"loss": 0.0025,
"step": 489
},
{
"epoch": 163.33,
"learning_rate": 0.00012868032327110904,
"loss": 0.0026,
"step": 490
},
{
"epoch": 163.67,
"learning_rate": 0.00012836168782568385,
"loss": 0.0027,
"step": 491
},
{
"epoch": 164.0,
"learning_rate": 0.00012804273893060028,
"loss": 0.0023,
"step": 492
},
{
"epoch": 164.33,
"learning_rate": 0.00012772348011083973,
"loss": 0.0025,
"step": 493
},
{
"epoch": 164.67,
"learning_rate": 0.00012740391489480884,
"loss": 0.0026,
"step": 494
},
{
"epoch": 165.0,
"learning_rate": 0.00012708404681430053,
"loss": 0.0027,
"step": 495
},
{
"epoch": 165.33,
"learning_rate": 0.0001267638794044549,
"loss": 0.0026,
"step": 496
},
{
"epoch": 165.67,
"learning_rate": 0.00012644341620372023,
"loss": 0.0026,
"step": 497
},
{
"epoch": 166.0,
"learning_rate": 0.00012612266075381386,
"loss": 0.0024,
"step": 498
},
{
"epoch": 166.33,
"learning_rate": 0.00012580161659968294,
"loss": 0.0026,
"step": 499
},
{
"epoch": 166.67,
"learning_rate": 0.0001254802872894655,
"loss": 0.0025,
"step": 500
},
{
"epoch": 167.0,
"learning_rate": 0.00012515867637445086,
"loss": 0.0027,
"step": 501
},
{
"epoch": 167.33,
"learning_rate": 0.00012483678740904082,
"loss": 0.0028,
"step": 502
},
{
"epoch": 167.67,
"learning_rate": 0.00012451462395071,
"loss": 0.0024,
"step": 503
},
{
"epoch": 168.0,
"learning_rate": 0.00012419218955996676,
"loss": 0.0024,
"step": 504
},
{
"epoch": 168.33,
"learning_rate": 0.0001238694878003138,
"loss": 0.0024,
"step": 505
},
{
"epoch": 168.67,
"learning_rate": 0.00012354652223820858,
"loss": 0.0022,
"step": 506
},
{
"epoch": 169.0,
"learning_rate": 0.00012322329644302426,
"loss": 0.0031,
"step": 507
},
{
"epoch": 169.33,
"learning_rate": 0.00012289981398700995,
"loss": 0.0022,
"step": 508
},
{
"epoch": 169.67,
"learning_rate": 0.00012257607844525146,
"loss": 0.0026,
"step": 509
},
{
"epoch": 170.0,
"learning_rate": 0.00012225209339563145,
"loss": 0.0027,
"step": 510
},
{
"epoch": 170.33,
"learning_rate": 0.00012192786241879033,
"loss": 0.0024,
"step": 511
},
{
"epoch": 170.67,
"learning_rate": 0.0001216033890980864,
"loss": 0.0025,
"step": 512
},
{
"epoch": 171.0,
"learning_rate": 0.00012127867701955622,
"loss": 0.0026,
"step": 513
},
{
"epoch": 171.33,
"learning_rate": 0.0001209537297718752,
"loss": 0.0026,
"step": 514
},
{
"epoch": 171.67,
"learning_rate": 0.00012062855094631778,
"loss": 0.0023,
"step": 515
},
{
"epoch": 172.0,
"learning_rate": 0.00012030314413671762,
"loss": 0.0027,
"step": 516
},
{
"epoch": 172.33,
"learning_rate": 0.00011997751293942827,
"loss": 0.0027,
"step": 517
},
{
"epoch": 172.67,
"learning_rate": 0.00011965166095328301,
"loss": 0.0023,
"step": 518
},
{
"epoch": 173.0,
"learning_rate": 0.00011932559177955533,
"loss": 0.0024,
"step": 519
},
{
"epoch": 173.33,
"learning_rate": 0.00011899930902191902,
"loss": 0.0024,
"step": 520
},
{
"epoch": 173.67,
"learning_rate": 0.00011867281628640835,
"loss": 0.0026,
"step": 521
},
{
"epoch": 174.0,
"learning_rate": 0.00011834611718137824,
"loss": 0.0024,
"step": 522
},
{
"epoch": 174.33,
"learning_rate": 0.00011801921531746444,
"loss": 0.0023,
"step": 523
},
{
"epoch": 174.67,
"learning_rate": 0.00011769211430754357,
"loss": 0.0025,
"step": 524
},
{
"epoch": 175.0,
"learning_rate": 0.00011736481776669306,
"loss": 0.0025,
"step": 525
},
{
"epoch": 175.33,
"learning_rate": 0.00011703732931215141,
"loss": 0.0024,
"step": 526
},
{
"epoch": 175.67,
"learning_rate": 0.00011670965256327818,
"loss": 0.0024,
"step": 527
},
{
"epoch": 176.0,
"learning_rate": 0.00011638179114151377,
"loss": 0.0024,
"step": 528
},
{
"epoch": 176.33,
"learning_rate": 0.00011605374867033977,
"loss": 0.0024,
"step": 529
},
{
"epoch": 176.67,
"learning_rate": 0.00011572552877523854,
"loss": 0.0024,
"step": 530
},
{
"epoch": 177.0,
"learning_rate": 0.00011539713508365335,
"loss": 0.0025,
"step": 531
},
{
"epoch": 177.33,
"learning_rate": 0.00011506857122494831,
"loss": 0.0024,
"step": 532
},
{
"epoch": 177.67,
"learning_rate": 0.00011473984083036813,
"loss": 0.0028,
"step": 533
},
{
"epoch": 178.0,
"learning_rate": 0.00011441094753299801,
"loss": 0.0021,
"step": 534
},
{
"epoch": 178.33,
"learning_rate": 0.00011408189496772368,
"loss": 0.0027,
"step": 535
},
{
"epoch": 178.67,
"learning_rate": 0.00011375268677119089,
"loss": 0.0024,
"step": 536
},
{
"epoch": 179.0,
"learning_rate": 0.00011342332658176555,
"loss": 0.0022,
"step": 537
},
{
"epoch": 179.33,
"learning_rate": 0.00011309381803949333,
"loss": 0.0027,
"step": 538
},
{
"epoch": 179.67,
"learning_rate": 0.00011276416478605949,
"loss": 0.0024,
"step": 539
},
{
"epoch": 180.0,
"learning_rate": 0.00011243437046474853,
"loss": 0.0023,
"step": 540
},
{
"epoch": 180.33,
"learning_rate": 0.00011210443872040414,
"loss": 0.0022,
"step": 541
},
{
"epoch": 180.67,
"learning_rate": 0.00011177437319938875,
"loss": 0.0026,
"step": 542
},
{
"epoch": 181.0,
"learning_rate": 0.0001114441775495432,
"loss": 0.0029,
"step": 543
},
{
"epoch": 181.33,
"learning_rate": 0.00011111385542014663,
"loss": 0.0025,
"step": 544
},
{
"epoch": 181.67,
"learning_rate": 0.00011078341046187589,
"loss": 0.0022,
"step": 545
},
{
"epoch": 182.0,
"learning_rate": 0.00011045284632676536,
"loss": 0.0027,
"step": 546
},
{
"epoch": 182.33,
"learning_rate": 0.00011012216666816659,
"loss": 0.0025,
"step": 547
},
{
"epoch": 182.67,
"learning_rate": 0.00010979137514070782,
"loss": 0.0025,
"step": 548
},
{
"epoch": 183.0,
"learning_rate": 0.00010946047540025372,
"loss": 0.0024,
"step": 549
},
{
"epoch": 183.33,
"learning_rate": 0.00010912947110386484,
"loss": 0.0024,
"step": 550
},
{
"epoch": 183.67,
"learning_rate": 0.00010879836590975731,
"loss": 0.0024,
"step": 551
},
{
"epoch": 184.0,
"learning_rate": 0.00010846716347726233,
"loss": 0.0025,
"step": 552
},
{
"epoch": 184.33,
"learning_rate": 0.00010813586746678583,
"loss": 0.0026,
"step": 553
},
{
"epoch": 184.67,
"learning_rate": 0.00010780448153976793,
"loss": 0.0023,
"step": 554
},
{
"epoch": 185.0,
"learning_rate": 0.00010747300935864243,
"loss": 0.0023,
"step": 555
},
{
"epoch": 185.33,
"learning_rate": 0.00010714145458679649,
"loss": 0.0027,
"step": 556
},
{
"epoch": 185.67,
"learning_rate": 0.00010680982088853002,
"loss": 0.0022,
"step": 557
},
{
"epoch": 186.0,
"learning_rate": 0.00010647811192901518,
"loss": 0.0023,
"step": 558
},
{
"epoch": 186.33,
"learning_rate": 0.00010614633137425598,
"loss": 0.0022,
"step": 559
},
{
"epoch": 186.67,
"learning_rate": 0.00010581448289104758,
"loss": 0.0025,
"step": 560
},
{
"epoch": 187.0,
"learning_rate": 0.00010548257014693601,
"loss": 0.0027,
"step": 561
},
{
"epoch": 187.33,
"learning_rate": 0.0001051505968101774,
"loss": 0.0023,
"step": 562
},
{
"epoch": 187.67,
"learning_rate": 0.00010481856654969758,
"loss": 0.0027,
"step": 563
},
{
"epoch": 188.0,
"learning_rate": 0.00010448648303505151,
"loss": 0.0021,
"step": 564
},
{
"epoch": 188.33,
"learning_rate": 0.00010415434993638269,
"loss": 0.0026,
"step": 565
},
{
"epoch": 188.67,
"learning_rate": 0.00010382217092438255,
"loss": 0.0023,
"step": 566
},
{
"epoch": 189.0,
"learning_rate": 0.00010348994967025012,
"loss": 0.0022,
"step": 567
},
{
"epoch": 189.33,
"learning_rate": 0.0001031576898456511,
"loss": 0.0022,
"step": 568
},
{
"epoch": 189.67,
"learning_rate": 0.00010282539512267757,
"loss": 0.0024,
"step": 569
},
{
"epoch": 190.0,
"learning_rate": 0.0001024930691738073,
"loss": 0.0028,
"step": 570
},
{
"epoch": 190.33,
"learning_rate": 0.00010216071567186312,
"loss": 0.0022,
"step": 571
},
{
"epoch": 190.67,
"learning_rate": 0.00010182833828997238,
"loss": 0.0027,
"step": 572
},
{
"epoch": 191.0,
"learning_rate": 0.00010149594070152638,
"loss": 0.0021,
"step": 573
},
{
"epoch": 191.33,
"learning_rate": 0.00010116352658013973,
"loss": 0.0024,
"step": 574
},
{
"epoch": 191.67,
"learning_rate": 0.00010083109959960973,
"loss": 0.0024,
"step": 575
},
{
"epoch": 192.0,
"learning_rate": 0.00010049866343387581,
"loss": 0.0025,
"step": 576
},
{
"epoch": 192.33,
"learning_rate": 0.00010016622175697898,
"loss": 0.0024,
"step": 577
},
{
"epoch": 192.67,
"learning_rate": 9.983377824302106e-05,
"loss": 0.0024,
"step": 578
},
{
"epoch": 193.0,
"learning_rate": 9.950133656612421e-05,
"loss": 0.0022,
"step": 579
},
{
"epoch": 193.33,
"learning_rate": 9.916890040039031e-05,
"loss": 0.0023,
"step": 580
},
{
"epoch": 193.67,
"learning_rate": 9.883647341986032e-05,
"loss": 0.0023,
"step": 581
},
{
"epoch": 194.0,
"learning_rate": 9.850405929847366e-05,
"loss": 0.0025,
"step": 582
},
{
"epoch": 194.33,
"learning_rate": 9.817166171002765e-05,
"loss": 0.0023,
"step": 583
},
{
"epoch": 194.67,
"learning_rate": 9.783928432813688e-05,
"loss": 0.0026,
"step": 584
},
{
"epoch": 195.0,
"learning_rate": 9.750693082619273e-05,
"loss": 0.0022,
"step": 585
},
{
"epoch": 195.33,
"learning_rate": 9.717460487732245e-05,
"loss": 0.0023,
"step": 586
},
{
"epoch": 195.67,
"learning_rate": 9.68423101543489e-05,
"loss": 0.0025,
"step": 587
},
{
"epoch": 196.0,
"learning_rate": 9.651005032974994e-05,
"loss": 0.0026,
"step": 588
},
{
"epoch": 196.33,
"learning_rate": 9.617782907561748e-05,
"loss": 0.0025,
"step": 589
},
{
"epoch": 196.67,
"learning_rate": 9.584565006361734e-05,
"loss": 0.0023,
"step": 590
},
{
"epoch": 197.0,
"learning_rate": 9.551351696494854e-05,
"loss": 0.0024,
"step": 591
},
{
"epoch": 197.33,
"learning_rate": 9.518143345030246e-05,
"loss": 0.0022,
"step": 592
},
{
"epoch": 197.67,
"learning_rate": 9.48494031898226e-05,
"loss": 0.0026,
"step": 593
},
{
"epoch": 198.0,
"learning_rate": 9.451742985306398e-05,
"loss": 0.0022,
"step": 594
},
{
"epoch": 198.33,
"learning_rate": 9.418551710895243e-05,
"loss": 0.0021,
"step": 595
},
{
"epoch": 198.67,
"learning_rate": 9.385366862574404e-05,
"loss": 0.0026,
"step": 596
},
{
"epoch": 199.0,
"learning_rate": 9.352188807098481e-05,
"loss": 0.0025,
"step": 597
},
{
"epoch": 199.33,
"learning_rate": 9.319017911147e-05,
"loss": 0.0024,
"step": 598
},
{
"epoch": 199.67,
"learning_rate": 9.285854541320352e-05,
"loss": 0.0024,
"step": 599
},
{
"epoch": 200.0,
"learning_rate": 9.252699064135758e-05,
"loss": 0.0021,
"step": 600
},
{
"epoch": 200.33,
"learning_rate": 9.219551846023211e-05,
"loss": 0.0022,
"step": 601
},
{
"epoch": 200.67,
"learning_rate": 9.186413253321418e-05,
"loss": 0.0025,
"step": 602
},
{
"epoch": 201.0,
"learning_rate": 9.153283652273768e-05,
"loss": 0.0023,
"step": 603
},
{
"epoch": 201.33,
"learning_rate": 9.120163409024271e-05,
"loss": 0.0023,
"step": 604
},
{
"epoch": 201.67,
"learning_rate": 9.087052889613518e-05,
"loss": 0.0023,
"step": 605
},
{
"epoch": 202.0,
"learning_rate": 9.05395245997463e-05,
"loss": 0.0025,
"step": 606
},
{
"epoch": 202.33,
"learning_rate": 9.020862485929219e-05,
"loss": 0.0026,
"step": 607
},
{
"epoch": 202.67,
"learning_rate": 8.987783333183344e-05,
"loss": 0.0022,
"step": 608
},
{
"epoch": 203.0,
"learning_rate": 8.954715367323468e-05,
"loss": 0.0023,
"step": 609
},
{
"epoch": 203.33,
"learning_rate": 8.921658953812415e-05,
"loss": 0.0023,
"step": 610
},
{
"epoch": 203.67,
"learning_rate": 8.888614457985341e-05,
"loss": 0.0023,
"step": 611
},
{
"epoch": 204.0,
"learning_rate": 8.855582245045683e-05,
"loss": 0.0025,
"step": 612
},
{
"epoch": 204.33,
"learning_rate": 8.822562680061125e-05,
"loss": 0.0023,
"step": 613
},
{
"epoch": 204.67,
"learning_rate": 8.789556127959585e-05,
"loss": 0.0023,
"step": 614
},
{
"epoch": 205.0,
"learning_rate": 8.756562953525152e-05,
"loss": 0.0023,
"step": 615
},
{
"epoch": 205.33,
"learning_rate": 8.723583521394054e-05,
"loss": 0.0023,
"step": 616
},
{
"epoch": 205.67,
"learning_rate": 8.690618196050666e-05,
"loss": 0.0024,
"step": 617
},
{
"epoch": 206.0,
"learning_rate": 8.657667341823448e-05,
"loss": 0.0021,
"step": 618
},
{
"epoch": 206.33,
"learning_rate": 8.624731322880912e-05,
"loss": 0.0025,
"step": 619
},
{
"epoch": 206.67,
"learning_rate": 8.591810503227635e-05,
"loss": 0.0023,
"step": 620
},
{
"epoch": 207.0,
"learning_rate": 8.558905246700201e-05,
"loss": 0.0021,
"step": 621
},
{
"epoch": 207.33,
"learning_rate": 8.526015916963191e-05,
"loss": 0.0021,
"step": 622
},
{
"epoch": 207.67,
"learning_rate": 8.49314287750517e-05,
"loss": 0.0023,
"step": 623
},
{
"epoch": 208.0,
"learning_rate": 8.460286491634663e-05,
"loss": 0.0026,
"step": 624
},
{
"epoch": 208.33,
"learning_rate": 8.427447122476148e-05,
"loss": 0.0024,
"step": 625
},
{
"epoch": 208.67,
"learning_rate": 8.394625132966025e-05,
"loss": 0.0023,
"step": 626
},
{
"epoch": 209.0,
"learning_rate": 8.361820885848624e-05,
"loss": 0.0024,
"step": 627
},
{
"epoch": 209.33,
"learning_rate": 8.329034743672187e-05,
"loss": 0.0023,
"step": 628
},
{
"epoch": 209.67,
"learning_rate": 8.296267068784862e-05,
"loss": 0.0022,
"step": 629
},
{
"epoch": 210.0,
"learning_rate": 8.263518223330697e-05,
"loss": 0.0025,
"step": 630
},
{
"epoch": 210.33,
"learning_rate": 8.230788569245648e-05,
"loss": 0.0025,
"step": 631
},
{
"epoch": 210.67,
"learning_rate": 8.198078468253557e-05,
"loss": 0.0022,
"step": 632
},
{
"epoch": 211.0,
"learning_rate": 8.165388281862178e-05,
"loss": 0.0023,
"step": 633
},
{
"epoch": 211.33,
"learning_rate": 8.132718371359166e-05,
"loss": 0.0023,
"step": 634
},
{
"epoch": 211.67,
"learning_rate": 8.100069097808103e-05,
"loss": 0.0024,
"step": 635
},
{
"epoch": 212.0,
"learning_rate": 8.067440822044469e-05,
"loss": 0.0023,
"step": 636
},
{
"epoch": 212.33,
"learning_rate": 8.034833904671698e-05,
"loss": 0.0024,
"step": 637
},
{
"epoch": 212.67,
"learning_rate": 8.002248706057177e-05,
"loss": 0.0022,
"step": 638
},
{
"epoch": 213.0,
"learning_rate": 7.96968558632824e-05,
"loss": 0.0022,
"step": 639
},
{
"epoch": 213.33,
"learning_rate": 7.937144905368226e-05,
"loss": 0.002,
"step": 640
},
{
"epoch": 213.67,
"learning_rate": 7.904627022812483e-05,
"loss": 0.0024,
"step": 641
},
{
"epoch": 214.0,
"learning_rate": 7.872132298044382e-05,
"loss": 0.0026,
"step": 642
},
{
"epoch": 214.33,
"learning_rate": 7.839661090191362e-05,
"loss": 0.0023,
"step": 643
},
{
"epoch": 214.67,
"learning_rate": 7.807213758120966e-05,
"loss": 0.0023,
"step": 644
},
{
"epoch": 215.0,
"learning_rate": 7.774790660436858e-05,
"loss": 0.0023,
"step": 645
},
{
"epoch": 215.33,
"learning_rate": 7.742392155474858e-05,
"loss": 0.0022,
"step": 646
},
{
"epoch": 215.67,
"learning_rate": 7.710018601299004e-05,
"loss": 0.0022,
"step": 647
},
{
"epoch": 216.0,
"learning_rate": 7.677670355697577e-05,
"loss": 0.0025,
"step": 648
},
{
"epoch": 216.33,
"learning_rate": 7.645347776179144e-05,
"loss": 0.0023,
"step": 649
},
{
"epoch": 216.67,
"learning_rate": 7.613051219968623e-05,
"loss": 0.0024,
"step": 650
},
{
"epoch": 217.0,
"learning_rate": 7.580781044003324e-05,
"loss": 0.0022,
"step": 651
},
{
"epoch": 217.33,
"learning_rate": 7.548537604929001e-05,
"loss": 0.0026,
"step": 652
},
{
"epoch": 217.67,
"learning_rate": 7.516321259095921e-05,
"loss": 0.0023,
"step": 653
},
{
"epoch": 218.0,
"learning_rate": 7.484132362554915e-05,
"loss": 0.0021,
"step": 654
},
{
"epoch": 218.33,
"learning_rate": 7.451971271053455e-05,
"loss": 0.0021,
"step": 655
},
{
"epoch": 218.67,
"learning_rate": 7.419838340031708e-05,
"loss": 0.0026,
"step": 656
},
{
"epoch": 219.0,
"learning_rate": 7.387733924618617e-05,
"loss": 0.0021,
"step": 657
},
{
"epoch": 219.33,
"learning_rate": 7.35565837962798e-05,
"loss": 0.0024,
"step": 658
},
{
"epoch": 219.67,
"learning_rate": 7.323612059554513e-05,
"loss": 0.0024,
"step": 659
},
{
"epoch": 220.0,
"learning_rate": 7.291595318569951e-05,
"loss": 0.002,
"step": 660
},
{
"epoch": 220.33,
"learning_rate": 7.25960851051912e-05,
"loss": 0.0023,
"step": 661
},
{
"epoch": 220.67,
"learning_rate": 7.227651988916031e-05,
"loss": 0.0023,
"step": 662
},
{
"epoch": 221.0,
"learning_rate": 7.195726106939974e-05,
"loss": 0.0022,
"step": 663
},
{
"epoch": 221.33,
"learning_rate": 7.163831217431615e-05,
"loss": 0.0022,
"step": 664
},
{
"epoch": 221.67,
"learning_rate": 7.131967672889101e-05,
"loss": 0.0022,
"step": 665
},
{
"epoch": 222.0,
"learning_rate": 7.100135825464139e-05,
"loss": 0.0025,
"step": 666
},
{
"epoch": 222.33,
"learning_rate": 7.068336026958146e-05,
"loss": 0.0021,
"step": 667
},
{
"epoch": 222.67,
"learning_rate": 7.036568628818331e-05,
"loss": 0.0023,
"step": 668
},
{
"epoch": 223.0,
"learning_rate": 7.004833982133808e-05,
"loss": 0.0026,
"step": 669
},
{
"epoch": 223.33,
"learning_rate": 6.973132437631742e-05,
"loss": 0.0022,
"step": 670
},
{
"epoch": 223.67,
"learning_rate": 6.941464345673449e-05,
"loss": 0.0023,
"step": 671
},
{
"epoch": 224.0,
"learning_rate": 6.909830056250527e-05,
"loss": 0.0024,
"step": 672
},
{
"epoch": 224.33,
"learning_rate": 6.878229918981003e-05,
"loss": 0.0024,
"step": 673
},
{
"epoch": 224.67,
"learning_rate": 6.846664283105455e-05,
"loss": 0.0021,
"step": 674
},
{
"epoch": 225.0,
"learning_rate": 6.815133497483157e-05,
"loss": 0.0022,
"step": 675
},
{
"epoch": 225.33,
"learning_rate": 6.783637910588216e-05,
"loss": 0.0021,
"step": 676
},
{
"epoch": 225.67,
"learning_rate": 6.752177870505736e-05,
"loss": 0.0023,
"step": 677
},
{
"epoch": 226.0,
"learning_rate": 6.720753724927958e-05,
"loss": 0.0024,
"step": 678
},
{
"epoch": 226.33,
"learning_rate": 6.68936582115042e-05,
"loss": 0.0021,
"step": 679
},
{
"epoch": 226.67,
"learning_rate": 6.658014506068126e-05,
"loss": 0.0023,
"step": 680
},
{
"epoch": 227.0,
"learning_rate": 6.626700126171702e-05,
"loss": 0.0024,
"step": 681
},
{
"epoch": 227.33,
"learning_rate": 6.595423027543571e-05,
"loss": 0.0024,
"step": 682
},
{
"epoch": 227.67,
"learning_rate": 6.56418355585413e-05,
"loss": 0.002,
"step": 683
},
{
"epoch": 228.0,
"learning_rate": 6.532982056357928e-05,
"loss": 0.0023,
"step": 684
},
{
"epoch": 228.33,
"learning_rate": 6.501818873889855e-05,
"loss": 0.0025,
"step": 685
},
{
"epoch": 228.67,
"learning_rate": 6.470694352861312e-05,
"loss": 0.0022,
"step": 686
},
{
"epoch": 229.0,
"learning_rate": 6.439608837256432e-05,
"loss": 0.0021,
"step": 687
},
{
"epoch": 229.33,
"learning_rate": 6.408562670628266e-05,
"loss": 0.0022,
"step": 688
},
{
"epoch": 229.67,
"learning_rate": 6.377556196094973e-05,
"loss": 0.0023,
"step": 689
},
{
"epoch": 230.0,
"learning_rate": 6.34658975633605e-05,
"loss": 0.0022,
"step": 690
},
{
"epoch": 230.33,
"learning_rate": 6.315663693588534e-05,
"loss": 0.0022,
"step": 691
},
{
"epoch": 230.67,
"learning_rate": 6.28477834964322e-05,
"loss": 0.0023,
"step": 692
},
{
"epoch": 231.0,
"learning_rate": 6.25393406584088e-05,
"loss": 0.0023,
"step": 693
},
{
"epoch": 231.33,
"learning_rate": 6.223131183068499e-05,
"loss": 0.0022,
"step": 694
},
{
"epoch": 231.67,
"learning_rate": 6.192370041755505e-05,
"loss": 0.0023,
"step": 695
},
{
"epoch": 232.0,
"learning_rate": 6.161650981869998e-05,
"loss": 0.0026,
"step": 696
},
{
"epoch": 232.33,
"learning_rate": 6.130974342915005e-05,
"loss": 0.0024,
"step": 697
},
{
"epoch": 232.67,
"learning_rate": 6.100340463924723e-05,
"loss": 0.002,
"step": 698
},
{
"epoch": 233.0,
"learning_rate": 6.069749683460765e-05,
"loss": 0.0024,
"step": 699
},
{
"epoch": 233.33,
"learning_rate": 6.039202339608432e-05,
"loss": 0.0024,
"step": 700
},
{
"epoch": 233.33,
"eval_loss": 1.320059061050415,
"eval_runtime": 3.5029,
"eval_samples_per_second": 5.995,
"eval_steps_per_second": 0.856,
"step": 700
},
{
"epoch": 233.67,
"learning_rate": 6.008698769972967e-05,
"loss": 0.0023,
"step": 701
},
{
"epoch": 234.0,
"learning_rate": 5.978239311675826e-05,
"loss": 0.002,
"step": 702
},
{
"epoch": 234.33,
"learning_rate": 5.9478243013509505e-05,
"loss": 0.0022,
"step": 703
},
{
"epoch": 234.67,
"learning_rate": 5.9174540751410487e-05,
"loss": 0.0023,
"step": 704
},
{
"epoch": 235.0,
"learning_rate": 5.887128968693887e-05,
"loss": 0.0022,
"step": 705
},
{
"epoch": 235.33,
"learning_rate": 5.856849317158563e-05,
"loss": 0.0023,
"step": 706
},
{
"epoch": 235.67,
"learning_rate": 5.8266154551818216e-05,
"loss": 0.0021,
"step": 707
},
{
"epoch": 236.0,
"learning_rate": 5.796427716904347e-05,
"loss": 0.0024,
"step": 708
},
{
"epoch": 236.33,
"learning_rate": 5.7662864359570624e-05,
"loss": 0.0023,
"step": 709
},
{
"epoch": 236.67,
"learning_rate": 5.736191945457463e-05,
"loss": 0.0022,
"step": 710
},
{
"epoch": 237.0,
"learning_rate": 5.7061445780059074e-05,
"loss": 0.0024,
"step": 711
},
{
"epoch": 237.33,
"learning_rate": 5.676144665681974e-05,
"loss": 0.002,
"step": 712
},
{
"epoch": 237.67,
"learning_rate": 5.6461925400407576e-05,
"loss": 0.0023,
"step": 713
},
{
"epoch": 238.0,
"learning_rate": 5.616288532109225e-05,
"loss": 0.0024,
"step": 714
},
{
"epoch": 238.33,
"learning_rate": 5.58643297238256e-05,
"loss": 0.0024,
"step": 715
},
{
"epoch": 238.67,
"learning_rate": 5.5566261908204966e-05,
"loss": 0.0023,
"step": 716
},
{
"epoch": 239.0,
"learning_rate": 5.526868516843673e-05,
"loss": 0.002,
"step": 717
},
{
"epoch": 239.33,
"learning_rate": 5.497160279330014e-05,
"loss": 0.0024,
"step": 718
},
{
"epoch": 239.67,
"learning_rate": 5.467501806611062e-05,
"loss": 0.0021,
"step": 719
},
{
"epoch": 240.0,
"learning_rate": 5.43789342646837e-05,
"loss": 0.0023,
"step": 720
},
{
"epoch": 240.33,
"learning_rate": 5.4083354661298814e-05,
"loss": 0.0025,
"step": 721
},
{
"epoch": 240.67,
"learning_rate": 5.378828252266308e-05,
"loss": 0.002,
"step": 722
},
{
"epoch": 241.0,
"learning_rate": 5.349372110987496e-05,
"loss": 0.0023,
"step": 723
},
{
"epoch": 241.33,
"learning_rate": 5.3199673678388685e-05,
"loss": 0.0024,
"step": 724
},
{
"epoch": 241.67,
"learning_rate": 5.290614347797802e-05,
"loss": 0.0021,
"step": 725
},
{
"epoch": 242.0,
"learning_rate": 5.261313375270014e-05,
"loss": 0.0022,
"step": 726
},
{
"epoch": 242.33,
"learning_rate": 5.232064774086022e-05,
"loss": 0.0022,
"step": 727
},
{
"epoch": 242.67,
"learning_rate": 5.2028688674975415e-05,
"loss": 0.0023,
"step": 728
},
{
"epoch": 243.0,
"learning_rate": 5.1737259781738936e-05,
"loss": 0.0022,
"step": 729
},
{
"epoch": 243.33,
"learning_rate": 5.1446364281984774e-05,
"loss": 0.0022,
"step": 730
},
{
"epoch": 243.67,
"learning_rate": 5.115600539065197e-05,
"loss": 0.0024,
"step": 731
},
{
"epoch": 244.0,
"learning_rate": 5.086618631674888e-05,
"loss": 0.0021,
"step": 732
},
{
"epoch": 244.33,
"learning_rate": 5.057691026331792e-05,
"loss": 0.0023,
"step": 733
},
{
"epoch": 244.67,
"learning_rate": 5.02881804274002e-05,
"loss": 0.0022,
"step": 734
},
{
"epoch": 245.0,
"learning_rate": 5.000000000000002e-05,
"loss": 0.0023,
"step": 735
},
{
"epoch": 245.33,
"learning_rate": 4.971237216604967e-05,
"loss": 0.0022,
"step": 736
},
{
"epoch": 245.67,
"learning_rate": 4.942530010437435e-05,
"loss": 0.0023,
"step": 737
},
{
"epoch": 246.0,
"learning_rate": 4.913878698765686e-05,
"loss": 0.0022,
"step": 738
},
{
"epoch": 246.33,
"learning_rate": 4.885283598240259e-05,
"loss": 0.0023,
"step": 739
},
{
"epoch": 246.67,
"learning_rate": 4.856745024890466e-05,
"loss": 0.0023,
"step": 740
},
{
"epoch": 247.0,
"learning_rate": 4.8282632941208725e-05,
"loss": 0.0022,
"step": 741
},
{
"epoch": 247.33,
"learning_rate": 4.799838720707846e-05,
"loss": 0.0021,
"step": 742
},
{
"epoch": 247.67,
"learning_rate": 4.771471618796043e-05,
"loss": 0.0024,
"step": 743
},
{
"epoch": 248.0,
"learning_rate": 4.743162301894952e-05,
"loss": 0.0023,
"step": 744
},
{
"epoch": 248.33,
"learning_rate": 4.7149110828754464e-05,
"loss": 0.0021,
"step": 745
},
{
"epoch": 248.67,
"learning_rate": 4.686718273966291e-05,
"loss": 0.0023,
"step": 746
},
{
"epoch": 249.0,
"learning_rate": 4.658584186750713e-05,
"loss": 0.0023,
"step": 747
},
{
"epoch": 249.33,
"learning_rate": 4.6305091321629666e-05,
"loss": 0.0021,
"step": 748
},
{
"epoch": 249.67,
"learning_rate": 4.6024934204848745e-05,
"loss": 0.0024,
"step": 749
},
{
"epoch": 250.0,
"learning_rate": 4.574537361342407e-05,
"loss": 0.0022,
"step": 750
},
{
"epoch": 250.33,
"learning_rate": 4.5466412637022704e-05,
"loss": 0.0022,
"step": 751
},
{
"epoch": 250.67,
"learning_rate": 4.518805435868492e-05,
"loss": 0.0022,
"step": 752
},
{
"epoch": 251.0,
"learning_rate": 4.491030185478976e-05,
"loss": 0.0025,
"step": 753
},
{
"epoch": 251.33,
"learning_rate": 4.4633158195021594e-05,
"loss": 0.0025,
"step": 754
},
{
"epoch": 251.67,
"learning_rate": 4.435662644233594e-05,
"loss": 0.0021,
"step": 755
},
{
"epoch": 252.0,
"learning_rate": 4.4080709652925336e-05,
"loss": 0.0021,
"step": 756
},
{
"epoch": 252.33,
"learning_rate": 4.380541087618606e-05,
"loss": 0.0022,
"step": 757
},
{
"epoch": 252.67,
"learning_rate": 4.3530733154684164e-05,
"loss": 0.0024,
"step": 758
},
{
"epoch": 253.0,
"learning_rate": 4.3256679524121834e-05,
"loss": 0.0021,
"step": 759
},
{
"epoch": 253.33,
"learning_rate": 4.298325301330383e-05,
"loss": 0.0024,
"step": 760
},
{
"epoch": 253.67,
"learning_rate": 4.27104566441042e-05,
"loss": 0.0021,
"step": 761
},
{
"epoch": 254.0,
"learning_rate": 4.2438293431432665e-05,
"loss": 0.0022,
"step": 762
},
{
"epoch": 254.33,
"learning_rate": 4.216676638320135e-05,
"loss": 0.0022,
"step": 763
},
{
"epoch": 254.67,
"learning_rate": 4.189587850029169e-05,
"loss": 0.0023,
"step": 764
},
{
"epoch": 255.0,
"learning_rate": 4.1625632776521037e-05,
"loss": 0.0023,
"step": 765
},
{
"epoch": 255.33,
"learning_rate": 4.1356032198609706e-05,
"loss": 0.0023,
"step": 766
},
{
"epoch": 255.67,
"learning_rate": 4.108707974614804e-05,
"loss": 0.0022,
"step": 767
},
{
"epoch": 256.0,
"learning_rate": 4.081877839156325e-05,
"loss": 0.0021,
"step": 768
},
{
"epoch": 256.33,
"learning_rate": 4.0551131100086745e-05,
"loss": 0.0021,
"step": 769
},
{
"epoch": 256.67,
"learning_rate": 4.028414082972141e-05,
"loss": 0.0023,
"step": 770
},
{
"epoch": 257.0,
"learning_rate": 4.001781053120863e-05,
"loss": 0.0021,
"step": 771
},
{
"epoch": 257.33,
"learning_rate": 3.975214314799607e-05,
"loss": 0.0021,
"step": 772
},
{
"epoch": 257.67,
"learning_rate": 3.94871416162048e-05,
"loss": 0.0024,
"step": 773
},
{
"epoch": 258.0,
"learning_rate": 3.9222808864597004e-05,
"loss": 0.002,
"step": 774
},
{
"epoch": 258.33,
"learning_rate": 3.89591478145437e-05,
"loss": 0.002,
"step": 775
},
{
"epoch": 258.67,
"learning_rate": 3.8696161379992225e-05,
"loss": 0.0022,
"step": 776
},
{
"epoch": 259.0,
"learning_rate": 3.843385246743417e-05,
"loss": 0.0025,
"step": 777
},
{
"epoch": 259.33,
"learning_rate": 3.817222397587336e-05,
"loss": 0.0022,
"step": 778
},
{
"epoch": 259.67,
"learning_rate": 3.7911278796793516e-05,
"loss": 0.0022,
"step": 779
},
{
"epoch": 260.0,
"learning_rate": 3.7651019814126654e-05,
"loss": 0.0023,
"step": 780
},
{
"epoch": 260.33,
"learning_rate": 3.739144990422089e-05,
"loss": 0.0021,
"step": 781
},
{
"epoch": 260.67,
"learning_rate": 3.7132571935808924e-05,
"loss": 0.0024,
"step": 782
},
{
"epoch": 261.0,
"learning_rate": 3.687438876997612e-05,
"loss": 0.0022,
"step": 783
},
{
"epoch": 261.33,
"learning_rate": 3.661690326012897e-05,
"loss": 0.0021,
"step": 784
},
{
"epoch": 261.67,
"learning_rate": 3.6360118251963645e-05,
"loss": 0.0022,
"step": 785
},
{
"epoch": 262.0,
"learning_rate": 3.610403658343443e-05,
"loss": 0.0024,
"step": 786
},
{
"epoch": 262.33,
"learning_rate": 3.58486610847223e-05,
"loss": 0.002,
"step": 787
},
{
"epoch": 262.67,
"learning_rate": 3.5593994578203896e-05,
"loss": 0.0023,
"step": 788
},
{
"epoch": 263.0,
"learning_rate": 3.534003987842005e-05,
"loss": 0.0024,
"step": 789
},
{
"epoch": 263.33,
"learning_rate": 3.508679979204481e-05,
"loss": 0.0023,
"step": 790
},
{
"epoch": 263.67,
"learning_rate": 3.483427711785449e-05,
"loss": 0.002,
"step": 791
},
{
"epoch": 264.0,
"learning_rate": 3.458247464669657e-05,
"loss": 0.0027,
"step": 792
},
{
"epoch": 264.33,
"learning_rate": 3.4331395161458955e-05,
"loss": 0.0023,
"step": 793
},
{
"epoch": 264.67,
"learning_rate": 3.408104143703929e-05,
"loss": 0.0021,
"step": 794
},
{
"epoch": 265.0,
"learning_rate": 3.383141624031408e-05,
"loss": 0.0022,
"step": 795
},
{
"epoch": 265.33,
"learning_rate": 3.35825223301083e-05,
"loss": 0.0023,
"step": 796
},
{
"epoch": 265.67,
"learning_rate": 3.333436245716488e-05,
"loss": 0.0023,
"step": 797
},
{
"epoch": 266.0,
"learning_rate": 3.308693936411421e-05,
"loss": 0.0021,
"step": 798
},
{
"epoch": 266.33,
"learning_rate": 3.2840255785443855e-05,
"loss": 0.0023,
"step": 799
},
{
"epoch": 266.67,
"learning_rate": 3.259431444746846e-05,
"loss": 0.0022,
"step": 800
},
{
"epoch": 267.0,
"learning_rate": 3.234911806829948e-05,
"loss": 0.0022,
"step": 801
},
{
"epoch": 267.33,
"learning_rate": 3.210466935781516e-05,
"loss": 0.0024,
"step": 802
},
{
"epoch": 267.67,
"learning_rate": 3.1860971017630604e-05,
"loss": 0.0023,
"step": 803
},
{
"epoch": 268.0,
"learning_rate": 3.161802574106799e-05,
"loss": 0.002,
"step": 804
},
{
"epoch": 268.33,
"learning_rate": 3.137583621312665e-05,
"loss": 0.0023,
"step": 805
},
{
"epoch": 268.67,
"learning_rate": 3.1134405110453515e-05,
"loss": 0.0022,
"step": 806
},
{
"epoch": 269.0,
"learning_rate": 3.089373510131354e-05,
"loss": 0.0021,
"step": 807
},
{
"epoch": 269.33,
"learning_rate": 3.065382884556012e-05,
"loss": 0.0024,
"step": 808
},
{
"epoch": 269.67,
"learning_rate": 3.0414688994605723e-05,
"loss": 0.0023,
"step": 809
},
{
"epoch": 270.0,
"learning_rate": 3.0176318191392726e-05,
"loss": 0.0019,
"step": 810
},
{
"epoch": 270.33,
"learning_rate": 2.9938719070363952e-05,
"loss": 0.0024,
"step": 811
},
{
"epoch": 270.67,
"learning_rate": 2.9701894257433826e-05,
"loss": 0.0022,
"step": 812
},
{
"epoch": 271.0,
"learning_rate": 2.9465846369959127e-05,
"loss": 0.0022,
"step": 813
},
{
"epoch": 271.33,
"learning_rate": 2.923057801671015e-05,
"loss": 0.0022,
"step": 814
},
{
"epoch": 271.67,
"learning_rate": 2.8996091797841973e-05,
"loss": 0.0021,
"step": 815
},
{
"epoch": 272.0,
"learning_rate": 2.876239030486554e-05,
"loss": 0.0023,
"step": 816
},
{
"epoch": 272.33,
"learning_rate": 2.8529476120619104e-05,
"loss": 0.0023,
"step": 817
},
{
"epoch": 272.67,
"learning_rate": 2.829735181923978e-05,
"loss": 0.0022,
"step": 818
},
{
"epoch": 273.0,
"learning_rate": 2.8066019966134904e-05,
"loss": 0.0022,
"step": 819
},
{
"epoch": 273.33,
"learning_rate": 2.7835483117953788e-05,
"loss": 0.0019,
"step": 820
},
{
"epoch": 273.67,
"learning_rate": 2.7605743822559506e-05,
"loss": 0.0024,
"step": 821
},
{
"epoch": 274.0,
"learning_rate": 2.7376804619000707e-05,
"loss": 0.0024,
"step": 822
},
{
"epoch": 274.33,
"learning_rate": 2.7148668037483372e-05,
"loss": 0.0021,
"step": 823
},
{
"epoch": 274.67,
"learning_rate": 2.692133659934315e-05,
"loss": 0.0024,
"step": 824
},
{
"epoch": 275.0,
"learning_rate": 2.669481281701739e-05,
"loss": 0.0023,
"step": 825
},
{
"epoch": 275.33,
"learning_rate": 2.6469099194017143e-05,
"loss": 0.0021,
"step": 826
},
{
"epoch": 275.67,
"learning_rate": 2.624419822489985e-05,
"loss": 0.0022,
"step": 827
},
{
"epoch": 276.0,
"learning_rate": 2.6020112395241624e-05,
"loss": 0.0023,
"step": 828
},
{
"epoch": 276.33,
"learning_rate": 2.579684418160958e-05,
"loss": 0.0022,
"step": 829
},
{
"epoch": 276.67,
"learning_rate": 2.5574396051534832e-05,
"loss": 0.0023,
"step": 830
},
{
"epoch": 277.0,
"learning_rate": 2.5352770463484987e-05,
"loss": 0.0022,
"step": 831
},
{
"epoch": 277.33,
"learning_rate": 2.5131969866836992e-05,
"loss": 0.0022,
"step": 832
},
{
"epoch": 277.67,
"learning_rate": 2.491199670185008e-05,
"loss": 0.0024,
"step": 833
},
{
"epoch": 278.0,
"learning_rate": 2.4692853399638917e-05,
"loss": 0.0021,
"step": 834
},
{
"epoch": 278.33,
"learning_rate": 2.4474542382146537e-05,
"loss": 0.0021,
"step": 835
},
{
"epoch": 278.67,
"learning_rate": 2.425706606211767e-05,
"loss": 0.0024,
"step": 836
},
{
"epoch": 279.0,
"learning_rate": 2.4040426843072206e-05,
"loss": 0.0024,
"step": 837
},
{
"epoch": 279.33,
"learning_rate": 2.3824627119278342e-05,
"loss": 0.0022,
"step": 838
},
{
"epoch": 279.67,
"learning_rate": 2.3609669275726355e-05,
"loss": 0.0021,
"step": 839
},
{
"epoch": 280.0,
"learning_rate": 2.339555568810221e-05,
"loss": 0.0024,
"step": 840
},
{
"epoch": 280.33,
"learning_rate": 2.318228872276118e-05,
"loss": 0.0021,
"step": 841
},
{
"epoch": 280.67,
"learning_rate": 2.2969870736701895e-05,
"loss": 0.0021,
"step": 842
},
{
"epoch": 281.0,
"learning_rate": 2.275830407754006e-05,
"loss": 0.0023,
"step": 843
},
{
"epoch": 281.33,
"learning_rate": 2.2547591083482665e-05,
"loss": 0.0023,
"step": 844
},
{
"epoch": 281.67,
"learning_rate": 2.2337734083302164e-05,
"loss": 0.0023,
"step": 845
},
{
"epoch": 282.0,
"learning_rate": 2.212873539631061e-05,
"loss": 0.002,
"step": 846
},
{
"epoch": 282.33,
"learning_rate": 2.192059733233408e-05,
"loss": 0.0024,
"step": 847
},
{
"epoch": 282.67,
"learning_rate": 2.1713322191687237e-05,
"loss": 0.0023,
"step": 848
},
{
"epoch": 283.0,
"learning_rate": 2.1506912265147772e-05,
"loss": 0.0019,
"step": 849
},
{
"epoch": 283.33,
"learning_rate": 2.1301369833931117e-05,
"loss": 0.0024,
"step": 850
},
{
"epoch": 283.67,
"learning_rate": 2.1096697169665313e-05,
"loss": 0.0022,
"step": 851
},
{
"epoch": 284.0,
"learning_rate": 2.0892896534365904e-05,
"loss": 0.0019,
"step": 852
},
{
"epoch": 284.33,
"learning_rate": 2.068997018041069e-05,
"loss": 0.0022,
"step": 853
},
{
"epoch": 284.67,
"learning_rate": 2.0487920350515212e-05,
"loss": 0.0021,
"step": 854
},
{
"epoch": 285.0,
"learning_rate": 2.0286749277707782e-05,
"loss": 0.0024,
"step": 855
},
{
"epoch": 285.33,
"learning_rate": 2.0086459185304618e-05,
"loss": 0.0021,
"step": 856
},
{
"epoch": 285.67,
"learning_rate": 1.9887052286885655e-05,
"loss": 0.0022,
"step": 857
},
{
"epoch": 286.0,
"learning_rate": 1.9688530786269855e-05,
"loss": 0.0023,
"step": 858
},
{
"epoch": 286.33,
"learning_rate": 1.9490896877490716e-05,
"loss": 0.0022,
"step": 859
},
{
"epoch": 286.67,
"learning_rate": 1.929415274477239e-05,
"loss": 0.0024,
"step": 860
},
{
"epoch": 287.0,
"learning_rate": 1.9098300562505266e-05,
"loss": 0.0021,
"step": 861
},
{
"epoch": 287.33,
"learning_rate": 1.8903342495221977e-05,
"loss": 0.0022,
"step": 862
},
{
"epoch": 287.67,
"learning_rate": 1.870928069757353e-05,
"loss": 0.0023,
"step": 863
},
{
"epoch": 288.0,
"learning_rate": 1.8516117314305524e-05,
"loss": 0.0021,
"step": 864
},
{
"epoch": 288.33,
"learning_rate": 1.832385448023435e-05,
"loss": 0.0022,
"step": 865
},
{
"epoch": 288.67,
"learning_rate": 1.8132494320223638e-05,
"loss": 0.0021,
"step": 866
},
{
"epoch": 289.0,
"learning_rate": 1.7942038949160854e-05,
"loss": 0.0024,
"step": 867
},
{
"epoch": 289.33,
"learning_rate": 1.775249047193377e-05,
"loss": 0.0023,
"step": 868
},
{
"epoch": 289.67,
"learning_rate": 1.756385098340736e-05,
"loss": 0.002,
"step": 869
},
{
"epoch": 290.0,
"learning_rate": 1.7376122568400532e-05,
"loss": 0.0024,
"step": 870
},
{
"epoch": 290.33,
"learning_rate": 1.7189307301663084e-05,
"loss": 0.0021,
"step": 871
},
{
"epoch": 290.67,
"learning_rate": 1.7003407247852943e-05,
"loss": 0.0022,
"step": 872
},
{
"epoch": 291.0,
"learning_rate": 1.681842446151313e-05,
"loss": 0.0022,
"step": 873
},
{
"epoch": 291.33,
"learning_rate": 1.6634360987049115e-05,
"loss": 0.002,
"step": 874
},
{
"epoch": 291.67,
"learning_rate": 1.6451218858706374e-05,
"loss": 0.0022,
"step": 875
},
{
"epoch": 292.0,
"learning_rate": 1.6269000100547683e-05,
"loss": 0.0024,
"step": 876
},
{
"epoch": 292.33,
"learning_rate": 1.6087706726430873e-05,
"loss": 0.0021,
"step": 877
},
{
"epoch": 292.67,
"learning_rate": 1.5907340739986575e-05,
"loss": 0.0022,
"step": 878
},
{
"epoch": 293.0,
"learning_rate": 1.5727904134596083e-05,
"loss": 0.0024,
"step": 879
},
{
"epoch": 293.33,
"learning_rate": 1.5549398893369216e-05,
"loss": 0.0025,
"step": 880
},
{
"epoch": 293.67,
"learning_rate": 1.5371826989122506e-05,
"loss": 0.002,
"step": 881
},
{
"epoch": 294.0,
"learning_rate": 1.5195190384357404e-05,
"loss": 0.0021,
"step": 882
},
{
"epoch": 294.33,
"learning_rate": 1.501949103123852e-05,
"loss": 0.0021,
"step": 883
},
{
"epoch": 294.67,
"learning_rate": 1.4844730871572043e-05,
"loss": 0.0024,
"step": 884
},
{
"epoch": 295.0,
"learning_rate": 1.467091183678444e-05,
"loss": 0.0023,
"step": 885
},
{
"epoch": 295.33,
"learning_rate": 1.449803584790086e-05,
"loss": 0.0022,
"step": 886
},
{
"epoch": 295.67,
"learning_rate": 1.4326104815524088e-05,
"loss": 0.0022,
"step": 887
},
{
"epoch": 296.0,
"learning_rate": 1.415512063981339e-05,
"loss": 0.0022,
"step": 888
},
{
"epoch": 296.33,
"learning_rate": 1.3985085210463477e-05,
"loss": 0.0023,
"step": 889
},
{
"epoch": 296.67,
"learning_rate": 1.3816000406683604e-05,
"loss": 0.0023,
"step": 890
},
{
"epoch": 297.0,
"learning_rate": 1.364786809717692e-05,
"loss": 0.0019,
"step": 891
},
{
"epoch": 297.33,
"learning_rate": 1.3480690140119657e-05,
"loss": 0.0022,
"step": 892
},
{
"epoch": 297.67,
"learning_rate": 1.3314468383140688e-05,
"loss": 0.0023,
"step": 893
},
{
"epoch": 298.0,
"learning_rate": 1.3149204663301118e-05,
"loss": 0.0021,
"step": 894
},
{
"epoch": 298.33,
"learning_rate": 1.2984900807073919e-05,
"loss": 0.002,
"step": 895
},
{
"epoch": 298.67,
"learning_rate": 1.2821558630323772e-05,
"loss": 0.0021,
"step": 896
},
{
"epoch": 299.0,
"learning_rate": 1.2659179938287035e-05,
"loss": 0.0026,
"step": 897
},
{
"epoch": 299.33,
"learning_rate": 1.2497766525551724e-05,
"loss": 0.0023,
"step": 898
},
{
"epoch": 299.67,
"learning_rate": 1.2337320176037759e-05,
"loss": 0.0023,
"step": 899
},
{
"epoch": 300.0,
"learning_rate": 1.2177842662977135e-05,
"loss": 0.002,
"step": 900
},
{
"epoch": 300.33,
"learning_rate": 1.201933574889449e-05,
"loss": 0.0022,
"step": 901
},
{
"epoch": 300.67,
"learning_rate": 1.186180118558743e-05,
"loss": 0.0024,
"step": 902
},
{
"epoch": 301.0,
"learning_rate": 1.1705240714107302e-05,
"loss": 0.0019,
"step": 903
},
{
"epoch": 301.33,
"learning_rate": 1.1549656064739967e-05,
"loss": 0.0024,
"step": 904
},
{
"epoch": 301.67,
"learning_rate": 1.1395048956986575e-05,
"loss": 0.0019,
"step": 905
},
{
"epoch": 302.0,
"learning_rate": 1.124142109954459e-05,
"loss": 0.0025,
"step": 906
},
{
"epoch": 302.33,
"learning_rate": 1.108877419028902e-05,
"loss": 0.0024,
"step": 907
},
{
"epoch": 302.67,
"learning_rate": 1.0937109916253474e-05,
"loss": 0.0021,
"step": 908
},
{
"epoch": 303.0,
"learning_rate": 1.0786429953611666e-05,
"loss": 0.0024,
"step": 909
},
{
"epoch": 303.33,
"learning_rate": 1.0636735967658784e-05,
"loss": 0.0021,
"step": 910
},
{
"epoch": 303.67,
"learning_rate": 1.0488029612793138e-05,
"loss": 0.0022,
"step": 911
},
{
"epoch": 304.0,
"learning_rate": 1.034031253249792e-05,
"loss": 0.0023,
"step": 912
},
{
"epoch": 304.33,
"learning_rate": 1.0193586359322927e-05,
"loss": 0.002,
"step": 913
},
{
"epoch": 304.67,
"learning_rate": 1.004785271486659e-05,
"loss": 0.0025,
"step": 914
},
{
"epoch": 305.0,
"learning_rate": 9.903113209758096e-06,
"loss": 0.0021,
"step": 915
},
{
"epoch": 305.33,
"learning_rate": 9.759369443639454e-06,
"loss": 0.0024,
"step": 916
},
{
"epoch": 305.67,
"learning_rate": 9.616623005147951e-06,
"loss": 0.0021,
"step": 917
},
{
"epoch": 306.0,
"learning_rate": 9.474875471898526e-06,
"loss": 0.0021,
"step": 918
},
{
"epoch": 306.33,
"learning_rate": 9.334128410466358e-06,
"loss": 0.0022,
"step": 919
},
{
"epoch": 306.67,
"learning_rate": 9.194383376369508e-06,
"loss": 0.0022,
"step": 920
},
{
"epoch": 307.0,
"learning_rate": 9.055641914051782e-06,
"loss": 0.002,
"step": 921
},
{
"epoch": 307.33,
"learning_rate": 8.917905556865713e-06,
"loss": 0.0022,
"step": 922
},
{
"epoch": 307.67,
"learning_rate": 8.781175827055389e-06,
"loss": 0.0023,
"step": 923
},
{
"epoch": 308.0,
"learning_rate": 8.645454235739903e-06,
"loss": 0.0022,
"step": 924
},
{
"epoch": 308.33,
"learning_rate": 8.510742282896544e-06,
"loss": 0.0021,
"step": 925
},
{
"epoch": 308.67,
"learning_rate": 8.377041457344103e-06,
"loss": 0.0023,
"step": 926
},
{
"epoch": 309.0,
"learning_rate": 8.24435323672661e-06,
"loss": 0.0025,
"step": 927
},
{
"epoch": 309.33,
"learning_rate": 8.112679087496933e-06,
"loss": 0.0021,
"step": 928
},
{
"epoch": 309.67,
"learning_rate": 7.982020464900486e-06,
"loss": 0.0022,
"step": 929
},
{
"epoch": 310.0,
"learning_rate": 7.852378812959227e-06,
"loss": 0.0025,
"step": 930
},
{
"epoch": 310.33,
"learning_rate": 7.72375556445577e-06,
"loss": 0.0021,
"step": 931
},
{
"epoch": 310.67,
"learning_rate": 7.596152140917368e-06,
"loss": 0.0023,
"step": 932
},
{
"epoch": 311.0,
"learning_rate": 7.46956995260033e-06,
"loss": 0.0022,
"step": 933
},
{
"epoch": 311.33,
"learning_rate": 7.344010398474455e-06,
"loss": 0.0021,
"step": 934
},
{
"epoch": 311.67,
"learning_rate": 7.219474866207465e-06,
"loss": 0.0023,
"step": 935
},
{
"epoch": 312.0,
"learning_rate": 7.09596473214974e-06,
"loss": 0.0023,
"step": 936
},
{
"epoch": 312.33,
"learning_rate": 6.973481361319123e-06,
"loss": 0.0023,
"step": 937
},
{
"epoch": 312.67,
"learning_rate": 6.852026107385756e-06,
"loss": 0.0022,
"step": 938
},
{
"epoch": 313.0,
"learning_rate": 6.731600312657238e-06,
"loss": 0.0021,
"step": 939
},
{
"epoch": 313.33,
"learning_rate": 6.612205308063646e-06,
"loss": 0.0024,
"step": 940
},
{
"epoch": 313.67,
"learning_rate": 6.493842413142914e-06,
"loss": 0.0021,
"step": 941
},
{
"epoch": 314.0,
"learning_rate": 6.37651293602628e-06,
"loss": 0.0021,
"step": 942
},
{
"epoch": 314.33,
"learning_rate": 6.260218173423749e-06,
"loss": 0.0021,
"step": 943
},
{
"epoch": 314.67,
"learning_rate": 6.144959410609785e-06,
"loss": 0.0021,
"step": 944
},
{
"epoch": 315.0,
"learning_rate": 6.030737921409169e-06,
"loss": 0.0025,
"step": 945
},
{
"epoch": 315.33,
"learning_rate": 5.917554968182803e-06,
"loss": 0.0022,
"step": 946
},
{
"epoch": 315.67,
"learning_rate": 5.805411801813865e-06,
"loss": 0.0022,
"step": 947
},
{
"epoch": 316.0,
"learning_rate": 5.694309661693942e-06,
"loss": 0.0024,
"step": 948
},
{
"epoch": 316.33,
"learning_rate": 5.584249775709371e-06,
"loss": 0.0022,
"step": 949
},
{
"epoch": 316.67,
"learning_rate": 5.475233360227516e-06,
"loss": 0.0022,
"step": 950
},
{
"epoch": 317.0,
"learning_rate": 5.367261620083575e-06,
"loss": 0.0021,
"step": 951
},
{
"epoch": 317.33,
"learning_rate": 5.26033574856708e-06,
"loss": 0.0022,
"step": 952
},
{
"epoch": 317.67,
"learning_rate": 5.1544569274087125e-06,
"loss": 0.0024,
"step": 953
},
{
"epoch": 318.0,
"learning_rate": 5.049626326767365e-06,
"loss": 0.002,
"step": 954
},
{
"epoch": 318.33,
"learning_rate": 4.945845105217117e-06,
"loss": 0.0023,
"step": 955
},
{
"epoch": 318.67,
"learning_rate": 4.843114409734384e-06,
"loss": 0.0021,
"step": 956
},
{
"epoch": 319.0,
"learning_rate": 4.741435375685377e-06,
"loss": 0.0024,
"step": 957
},
{
"epoch": 319.33,
"learning_rate": 4.640809126813484e-06,
"loss": 0.0022,
"step": 958
},
{
"epoch": 319.67,
"learning_rate": 4.541236775226809e-06,
"loss": 0.0025,
"step": 959
},
{
"epoch": 320.0,
"learning_rate": 4.442719421385922e-06,
"loss": 0.002,
"step": 960
},
{
"epoch": 320.33,
"learning_rate": 4.3452581540917465e-06,
"loss": 0.0021,
"step": 961
},
{
"epoch": 320.67,
"learning_rate": 4.248854050473405e-06,
"loss": 0.0021,
"step": 962
},
{
"epoch": 321.0,
"learning_rate": 4.153508175976428e-06,
"loss": 0.0024,
"step": 963
},
{
"epoch": 321.33,
"learning_rate": 4.059221584350958e-06,
"loss": 0.002,
"step": 964
},
{
"epoch": 321.67,
"learning_rate": 3.965995317640025e-06,
"loss": 0.0021,
"step": 965
},
{
"epoch": 322.0,
"learning_rate": 3.873830406168111e-06,
"loss": 0.0025,
"step": 966
},
{
"epoch": 322.33,
"learning_rate": 3.7827278685297785e-06,
"loss": 0.0021,
"step": 967
},
{
"epoch": 322.67,
"learning_rate": 3.692688711578296e-06,
"loss": 0.0025,
"step": 968
},
{
"epoch": 323.0,
"learning_rate": 3.6037139304146762e-06,
"loss": 0.0019,
"step": 969
},
{
"epoch": 323.33,
"learning_rate": 3.515804508376508e-06,
"loss": 0.0024,
"step": 970
},
{
"epoch": 323.67,
"learning_rate": 3.428961417027221e-06,
"loss": 0.0021,
"step": 971
},
{
"epoch": 324.0,
"learning_rate": 3.3431856161452835e-06,
"loss": 0.0022,
"step": 972
},
{
"epoch": 324.33,
"learning_rate": 3.2584780537136207e-06,
"loss": 0.0021,
"step": 973
},
{
"epoch": 324.67,
"learning_rate": 3.1748396659090797e-06,
"loss": 0.0023,
"step": 974
},
{
"epoch": 325.0,
"learning_rate": 3.092271377092215e-06,
"loss": 0.0022,
"step": 975
},
{
"epoch": 325.33,
"learning_rate": 3.010774099796898e-06,
"loss": 0.0022,
"step": 976
},
{
"epoch": 325.67,
"learning_rate": 2.9303487347203783e-06,
"loss": 0.0022,
"step": 977
},
{
"epoch": 326.0,
"learning_rate": 2.8509961707132494e-06,
"loss": 0.0022,
"step": 978
},
{
"epoch": 326.33,
"learning_rate": 2.772717284769677e-06,
"loss": 0.0023,
"step": 979
},
{
"epoch": 326.67,
"learning_rate": 2.6955129420176196e-06,
"loss": 0.0022,
"step": 980
},
{
"epoch": 327.0,
"learning_rate": 2.619383995709368e-06,
"loss": 0.0022,
"step": 981
},
{
"epoch": 327.33,
"learning_rate": 2.5443312872120763e-06,
"loss": 0.0025,
"step": 982
},
{
"epoch": 327.67,
"learning_rate": 2.4703556459984456e-06,
"loss": 0.002,
"step": 983
},
{
"epoch": 328.0,
"learning_rate": 2.3974578896375553e-06,
"loss": 0.0021,
"step": 984
},
{
"epoch": 328.33,
"learning_rate": 2.3256388237858807e-06,
"loss": 0.0021,
"step": 985
},
{
"epoch": 328.67,
"learning_rate": 2.25489924217831e-06,
"loss": 0.0024,
"step": 986
},
{
"epoch": 329.0,
"learning_rate": 2.1852399266194314e-06,
"loss": 0.0022,
"step": 987
},
{
"epoch": 329.33,
"learning_rate": 2.1166616469749044e-06,
"loss": 0.002,
"step": 988
},
{
"epoch": 329.67,
"learning_rate": 2.049165161162858e-06,
"loss": 0.0022,
"step": 989
},
{
"epoch": 330.0,
"learning_rate": 1.9827512151456173e-06,
"loss": 0.0025,
"step": 990
},
{
"epoch": 330.33,
"learning_rate": 1.917420542921433e-06,
"loss": 0.0021,
"step": 991
},
{
"epoch": 330.67,
"learning_rate": 1.8531738665163112e-06,
"loss": 0.0024,
"step": 992
},
{
"epoch": 331.0,
"learning_rate": 1.790011895976118e-06,
"loss": 0.002,
"step": 993
},
{
"epoch": 331.33,
"learning_rate": 1.7279353293586765e-06,
"loss": 0.0025,
"step": 994
},
{
"epoch": 331.67,
"learning_rate": 1.66694485272606e-06,
"loss": 0.0018,
"step": 995
},
{
"epoch": 332.0,
"learning_rate": 1.6070411401370334e-06,
"loss": 0.0025,
"step": 996
},
{
"epoch": 332.33,
"learning_rate": 1.5482248536395905e-06,
"loss": 0.0024,
"step": 997
},
{
"epoch": 332.67,
"learning_rate": 1.4904966432635947e-06,
"loss": 0.0023,
"step": 998
},
{
"epoch": 333.0,
"learning_rate": 1.4338571470137063e-06,
"loss": 0.0018,
"step": 999
},
{
"epoch": 333.33,
"learning_rate": 1.378306990862177e-06,
"loss": 0.0023,
"step": 1000
},
{
"epoch": 333.67,
"learning_rate": 1.323846788742078e-06,
"loss": 0.0022,
"step": 1001
},
{
"epoch": 334.0,
"learning_rate": 1.2704771425404382e-06,
"loss": 0.0021,
"step": 1002
},
{
"epoch": 334.33,
"learning_rate": 1.2181986420915615e-06,
"loss": 0.0021,
"step": 1003
},
{
"epoch": 334.67,
"learning_rate": 1.1670118651706197e-06,
"loss": 0.0024,
"step": 1004
},
{
"epoch": 335.0,
"learning_rate": 1.1169173774871478e-06,
"loss": 0.0024,
"step": 1005
},
{
"epoch": 335.33,
"learning_rate": 1.067915732678859e-06,
"loss": 0.0021,
"step": 1006
},
{
"epoch": 335.67,
"learning_rate": 1.0200074723055398e-06,
"loss": 0.0024,
"step": 1007
},
{
"epoch": 336.0,
"learning_rate": 9.731931258429638e-07,
"loss": 0.0022,
"step": 1008
},
{
"epoch": 336.33,
"learning_rate": 9.274732106771988e-07,
"loss": 0.0021,
"step": 1009
},
{
"epoch": 336.67,
"learning_rate": 8.828482320987319e-07,
"loss": 0.0023,
"step": 1010
},
{
"epoch": 337.0,
"learning_rate": 8.393186832969746e-07,
"loss": 0.0021,
"step": 1011
},
{
"epoch": 337.33,
"learning_rate": 7.968850453548226e-07,
"loss": 0.0022,
"step": 1012
},
{
"epoch": 337.67,
"learning_rate": 7.555477872432715e-07,
"loss": 0.002,
"step": 1013
},
{
"epoch": 338.0,
"learning_rate": 7.153073658162646e-07,
"loss": 0.0024,
"step": 1014
},
{
"epoch": 338.33,
"learning_rate": 6.761642258056978e-07,
"loss": 0.002,
"step": 1015
},
{
"epoch": 338.67,
"learning_rate": 6.381187998164229e-07,
"loss": 0.0022,
"step": 1016
},
{
"epoch": 339.0,
"learning_rate": 6.011715083214741e-07,
"loss": 0.0024,
"step": 1017
},
{
"epoch": 339.33,
"learning_rate": 5.653227596575161e-07,
"loss": 0.002,
"step": 1018
},
{
"epoch": 339.67,
"learning_rate": 5.305729500201917e-07,
"loss": 0.0019,
"step": 1019
},
{
"epoch": 340.0,
"learning_rate": 4.969224634598591e-07,
"loss": 0.0028,
"step": 1020
},
{
"epoch": 340.33,
"learning_rate": 4.6437167187728393e-07,
"loss": 0.0023,
"step": 1021
},
{
"epoch": 340.67,
"learning_rate": 4.329209350195651e-07,
"loss": 0.0022,
"step": 1022
},
{
"epoch": 341.0,
"learning_rate": 4.025706004760932e-07,
"loss": 0.0021,
"step": 1023
},
{
"epoch": 341.33,
"learning_rate": 3.7332100367482024e-07,
"loss": 0.0022,
"step": 1024
},
{
"epoch": 341.67,
"learning_rate": 3.451724678784518e-07,
"loss": 0.0023,
"step": 1025
},
{
"epoch": 342.0,
"learning_rate": 3.1812530418090513e-07,
"loss": 0.0021,
"step": 1026
},
{
"epoch": 342.33,
"learning_rate": 2.921798115039009e-07,
"loss": 0.0018,
"step": 1027
},
{
"epoch": 342.67,
"learning_rate": 2.673362765936327e-07,
"loss": 0.0023,
"step": 1028
},
{
"epoch": 343.0,
"learning_rate": 2.4359497401758024e-07,
"loss": 0.0025,
"step": 1029
},
{
"epoch": 343.33,
"learning_rate": 2.2095616616150115e-07,
"loss": 0.0021,
"step": 1030
},
{
"epoch": 343.67,
"learning_rate": 1.9942010322655524e-07,
"loss": 0.0022,
"step": 1031
},
{
"epoch": 344.0,
"learning_rate": 1.7898702322648453e-07,
"loss": 0.0024,
"step": 1032
},
{
"epoch": 344.33,
"learning_rate": 1.596571519850043e-07,
"loss": 0.0023,
"step": 1033
},
{
"epoch": 344.67,
"learning_rate": 1.414307031333273e-07,
"loss": 0.0023,
"step": 1034
},
{
"epoch": 345.0,
"learning_rate": 1.2430787810776555e-07,
"loss": 0.0021,
"step": 1035
},
{
"epoch": 345.33,
"learning_rate": 1.0828886614754341e-07,
"loss": 0.0024,
"step": 1036
},
{
"epoch": 345.67,
"learning_rate": 9.337384429269901e-08,
"loss": 0.0022,
"step": 1037
},
{
"epoch": 346.0,
"learning_rate": 7.956297738207497e-08,
"loss": 0.0019,
"step": 1038
},
{
"epoch": 346.33,
"learning_rate": 6.685641805158627e-08,
"loss": 0.0023,
"step": 1039
},
{
"epoch": 346.67,
"learning_rate": 5.5254306732444025e-08,
"loss": 0.0022,
"step": 1040
},
{
"epoch": 347.0,
"learning_rate": 4.475677164966774e-08,
"loss": 0.0022,
"step": 1041
},
{
"epoch": 347.33,
"learning_rate": 3.536392882064199e-08,
"loss": 0.0022,
"step": 1042
},
{
"epoch": 347.67,
"learning_rate": 2.7075882053828605e-08,
"loss": 0.0022,
"step": 1043
},
{
"epoch": 348.0,
"learning_rate": 1.9892722947645326e-08,
"loss": 0.0022,
"step": 1044
},
{
"epoch": 348.33,
"learning_rate": 1.3814530889433296e-08,
"loss": 0.0021,
"step": 1045
},
{
"epoch": 348.67,
"learning_rate": 8.841373054546686e-09,
"loss": 0.0023,
"step": 1046
},
{
"epoch": 349.0,
"learning_rate": 4.973304405697654e-09,
"loss": 0.0022,
"step": 1047
},
{
"epoch": 349.33,
"learning_rate": 2.2103676922680117e-09,
"loss": 0.0024,
"step": 1048
},
{
"epoch": 349.67,
"learning_rate": 5.525934498651352e-10,
"loss": 0.0021,
"step": 1049
},
{
"epoch": 350.0,
"learning_rate": 0.0,
"loss": 0.0021,
"step": 1050
},
{
"epoch": 350.0,
"eval_loss": 1.3223165273666382,
"eval_runtime": 3.5062,
"eval_samples_per_second": 5.989,
"eval_steps_per_second": 0.856,
"step": 1050
}
],
"logging_steps": 1,
"max_steps": 1050,
"num_input_tokens_seen": 0,
"num_train_epochs": 350,
"save_steps": 350,
"total_flos": 1.277394092556288e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}