text-normalization-ru-new / trainer_state.json
alexue4's picture
End of training
8168262
raw
history blame
29.2 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 15.0,
"eval_steps": 500,
"global_step": 343740,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 2.909175539652063e-09,
"loss": 0.0085,
"step": 1
},
{
"epoch": 0.08,
"learning_rate": 5.000872752661896e-06,
"loss": 0.0066,
"step": 1719
},
{
"epoch": 0.15,
"learning_rate": 1.0001745505323792e-05,
"loss": 0.0061,
"step": 3438
},
{
"epoch": 0.23,
"learning_rate": 1.5002618257985687e-05,
"loss": 0.0059,
"step": 5157
},
{
"epoch": 0.3,
"learning_rate": 2.0003491010647585e-05,
"loss": 0.0056,
"step": 6876
},
{
"epoch": 0.38,
"learning_rate": 2.500436376330948e-05,
"loss": 0.0052,
"step": 8595
},
{
"epoch": 0.45,
"learning_rate": 3.0005236515971374e-05,
"loss": 0.0052,
"step": 10314
},
{
"epoch": 0.53,
"learning_rate": 3.500610926863327e-05,
"loss": 0.0049,
"step": 12033
},
{
"epoch": 0.6,
"learning_rate": 4.000698202129517e-05,
"loss": 0.0049,
"step": 13752
},
{
"epoch": 0.68,
"learning_rate": 4.5007854773957064e-05,
"loss": 0.0052,
"step": 15471
},
{
"epoch": 0.75,
"learning_rate": 5.000872752661896e-05,
"loss": 0.0051,
"step": 17190
},
{
"epoch": 0.83,
"learning_rate": 5.500960027928086e-05,
"loss": 0.0052,
"step": 18909
},
{
"epoch": 0.9,
"learning_rate": 6.001047303194275e-05,
"loss": 0.0048,
"step": 20628
},
{
"epoch": 0.98,
"learning_rate": 6.501134578460465e-05,
"loss": 0.0052,
"step": 22347
},
{
"epoch": 1.0,
"eval_loss": 0.027106985449790955,
"eval_max_distance": 9,
"eval_mean_distance": 0,
"eval_runtime": 14.4629,
"eval_samples_per_second": 17.355,
"eval_steps_per_second": 1.175,
"step": 22916
},
{
"epoch": 1.05,
"learning_rate": 7.001221853726654e-05,
"loss": 0.0045,
"step": 24066
},
{
"epoch": 1.13,
"learning_rate": 7.501309128992844e-05,
"loss": 0.0044,
"step": 25785
},
{
"epoch": 1.2,
"learning_rate": 8.001396404259034e-05,
"loss": 0.0048,
"step": 27504
},
{
"epoch": 1.28,
"learning_rate": 8.501483679525223e-05,
"loss": 0.0045,
"step": 29223
},
{
"epoch": 1.35,
"learning_rate": 9.001570954791413e-05,
"loss": 0.0044,
"step": 30942
},
{
"epoch": 1.43,
"learning_rate": 9.501658230057602e-05,
"loss": 0.0047,
"step": 32661
},
{
"epoch": 1.5,
"learning_rate": 9.999806054964024e-05,
"loss": 0.005,
"step": 34380
},
{
"epoch": 1.58,
"learning_rate": 9.944240802156669e-05,
"loss": 0.0049,
"step": 36099
},
{
"epoch": 1.65,
"learning_rate": 9.888675549349314e-05,
"loss": 0.0053,
"step": 37818
},
{
"epoch": 1.73,
"learning_rate": 9.83311029654196e-05,
"loss": 0.0047,
"step": 39537
},
{
"epoch": 1.8,
"learning_rate": 9.777545043734606e-05,
"loss": 0.005,
"step": 41256
},
{
"epoch": 1.88,
"learning_rate": 9.721979790927251e-05,
"loss": 0.0048,
"step": 42975
},
{
"epoch": 1.95,
"learning_rate": 9.666414538119898e-05,
"loss": 0.0051,
"step": 44694
},
{
"epoch": 2.0,
"eval_loss": 0.026126669719815254,
"eval_max_distance": 8,
"eval_mean_distance": 0,
"eval_runtime": 13.5883,
"eval_samples_per_second": 18.472,
"eval_steps_per_second": 1.251,
"step": 45832
},
{
"epoch": 2.03,
"learning_rate": 9.610849285312543e-05,
"loss": 0.0047,
"step": 46413
},
{
"epoch": 2.1,
"learning_rate": 9.555284032505189e-05,
"loss": 0.004,
"step": 48132
},
{
"epoch": 2.18,
"learning_rate": 9.499718779697834e-05,
"loss": 0.004,
"step": 49851
},
{
"epoch": 2.25,
"learning_rate": 9.44415352689048e-05,
"loss": 0.0042,
"step": 51570
},
{
"epoch": 2.33,
"learning_rate": 9.388588274083125e-05,
"loss": 0.004,
"step": 53289
},
{
"epoch": 2.4,
"learning_rate": 9.333023021275771e-05,
"loss": 0.0043,
"step": 55008
},
{
"epoch": 2.48,
"learning_rate": 9.277457768468416e-05,
"loss": 0.0042,
"step": 56727
},
{
"epoch": 2.55,
"learning_rate": 9.221892515661063e-05,
"loss": 0.004,
"step": 58446
},
{
"epoch": 2.63,
"learning_rate": 9.166327262853708e-05,
"loss": 0.0045,
"step": 60165
},
{
"epoch": 2.7,
"learning_rate": 9.110762010046352e-05,
"loss": 0.0044,
"step": 61884
},
{
"epoch": 2.78,
"learning_rate": 9.055196757238999e-05,
"loss": 0.0044,
"step": 63603
},
{
"epoch": 2.85,
"learning_rate": 8.999631504431645e-05,
"loss": 0.0044,
"step": 65322
},
{
"epoch": 2.93,
"learning_rate": 8.94406625162429e-05,
"loss": 0.0043,
"step": 67041
},
{
"epoch": 3.0,
"eval_loss": 0.03130079433321953,
"eval_max_distance": 8,
"eval_mean_distance": 0,
"eval_runtime": 13.4308,
"eval_samples_per_second": 18.688,
"eval_steps_per_second": 1.266,
"step": 68748
},
{
"epoch": 3.0,
"learning_rate": 8.888500998816935e-05,
"loss": 0.0046,
"step": 68760
},
{
"epoch": 3.08,
"learning_rate": 8.832935746009581e-05,
"loss": 0.0036,
"step": 70479
},
{
"epoch": 3.15,
"learning_rate": 8.777370493202228e-05,
"loss": 0.0036,
"step": 72198
},
{
"epoch": 3.23,
"learning_rate": 8.721805240394872e-05,
"loss": 0.0038,
"step": 73917
},
{
"epoch": 3.3,
"learning_rate": 8.666239987587517e-05,
"loss": 0.0036,
"step": 75636
},
{
"epoch": 3.38,
"learning_rate": 8.610674734780164e-05,
"loss": 0.0038,
"step": 77355
},
{
"epoch": 3.45,
"learning_rate": 8.55510948197281e-05,
"loss": 0.0038,
"step": 79074
},
{
"epoch": 3.53,
"learning_rate": 8.499544229165455e-05,
"loss": 0.0038,
"step": 80793
},
{
"epoch": 3.6,
"learning_rate": 8.4439789763581e-05,
"loss": 0.004,
"step": 82512
},
{
"epoch": 3.68,
"learning_rate": 8.388413723550746e-05,
"loss": 0.0037,
"step": 84231
},
{
"epoch": 3.75,
"learning_rate": 8.332848470743392e-05,
"loss": 0.0038,
"step": 85950
},
{
"epoch": 3.83,
"learning_rate": 8.277283217936037e-05,
"loss": 0.0039,
"step": 87669
},
{
"epoch": 3.9,
"learning_rate": 8.221717965128682e-05,
"loss": 0.0039,
"step": 89388
},
{
"epoch": 3.98,
"learning_rate": 8.166152712321329e-05,
"loss": 0.0041,
"step": 91107
},
{
"epoch": 4.0,
"eval_loss": 0.02780107595026493,
"eval_max_distance": 10,
"eval_mean_distance": 0,
"eval_runtime": 13.351,
"eval_samples_per_second": 18.8,
"eval_steps_per_second": 1.273,
"step": 91664
},
{
"epoch": 4.05,
"learning_rate": 8.110587459513974e-05,
"loss": 0.0037,
"step": 92826
},
{
"epoch": 4.13,
"learning_rate": 8.05502220670662e-05,
"loss": 0.0032,
"step": 94545
},
{
"epoch": 4.2,
"learning_rate": 7.999456953899266e-05,
"loss": 0.0034,
"step": 96264
},
{
"epoch": 4.28,
"learning_rate": 7.943891701091911e-05,
"loss": 0.0034,
"step": 97983
},
{
"epoch": 4.35,
"learning_rate": 7.888326448284556e-05,
"loss": 0.0035,
"step": 99702
},
{
"epoch": 4.43,
"learning_rate": 7.832761195477202e-05,
"loss": 0.0034,
"step": 101421
},
{
"epoch": 4.5,
"learning_rate": 7.777195942669849e-05,
"loss": 0.0036,
"step": 103140
},
{
"epoch": 4.58,
"learning_rate": 7.721630689862494e-05,
"loss": 0.0035,
"step": 104859
},
{
"epoch": 4.65,
"learning_rate": 7.666065437055139e-05,
"loss": 0.0034,
"step": 106578
},
{
"epoch": 4.73,
"learning_rate": 7.610500184247783e-05,
"loss": 0.0034,
"step": 108297
},
{
"epoch": 4.8,
"learning_rate": 7.554934931440431e-05,
"loss": 0.0036,
"step": 110016
},
{
"epoch": 4.88,
"learning_rate": 7.499369678633076e-05,
"loss": 0.0034,
"step": 111735
},
{
"epoch": 4.95,
"learning_rate": 7.443804425825721e-05,
"loss": 0.0037,
"step": 113454
},
{
"epoch": 5.0,
"eval_loss": 0.028013188391923904,
"eval_max_distance": 8,
"eval_mean_distance": 0,
"eval_runtime": 13.4584,
"eval_samples_per_second": 18.65,
"eval_steps_per_second": 1.263,
"step": 114580
},
{
"epoch": 5.03,
"learning_rate": 7.388239173018366e-05,
"loss": 0.0033,
"step": 115173
},
{
"epoch": 5.1,
"learning_rate": 7.332673920211012e-05,
"loss": 0.0031,
"step": 116892
},
{
"epoch": 5.18,
"learning_rate": 7.277108667403659e-05,
"loss": 0.0031,
"step": 118611
},
{
"epoch": 5.25,
"learning_rate": 7.221543414596303e-05,
"loss": 0.0032,
"step": 120330
},
{
"epoch": 5.33,
"learning_rate": 7.16597816178895e-05,
"loss": 0.0031,
"step": 122049
},
{
"epoch": 5.4,
"learning_rate": 7.110412908981595e-05,
"loss": 0.0032,
"step": 123768
},
{
"epoch": 5.48,
"learning_rate": 7.054847656174241e-05,
"loss": 0.0031,
"step": 125487
},
{
"epoch": 5.55,
"learning_rate": 6.999282403366886e-05,
"loss": 0.0032,
"step": 127206
},
{
"epoch": 5.63,
"learning_rate": 6.943717150559532e-05,
"loss": 0.0032,
"step": 128925
},
{
"epoch": 5.7,
"learning_rate": 6.888151897752177e-05,
"loss": 0.0032,
"step": 130644
},
{
"epoch": 5.78,
"learning_rate": 6.832586644944823e-05,
"loss": 0.0031,
"step": 132363
},
{
"epoch": 5.85,
"learning_rate": 6.777021392137468e-05,
"loss": 0.0031,
"step": 134082
},
{
"epoch": 5.93,
"learning_rate": 6.721456139330115e-05,
"loss": 0.0032,
"step": 135801
},
{
"epoch": 6.0,
"eval_loss": 0.028835317119956017,
"eval_max_distance": 8,
"eval_mean_distance": 0,
"eval_runtime": 13.4137,
"eval_samples_per_second": 18.712,
"eval_steps_per_second": 1.267,
"step": 137496
},
{
"epoch": 6.0,
"learning_rate": 6.66589088652276e-05,
"loss": 0.0033,
"step": 137520
},
{
"epoch": 6.08,
"learning_rate": 6.610325633715405e-05,
"loss": 0.0028,
"step": 139239
},
{
"epoch": 6.15,
"learning_rate": 6.554760380908051e-05,
"loss": 0.0026,
"step": 140958
},
{
"epoch": 6.23,
"learning_rate": 6.499195128100697e-05,
"loss": 0.0027,
"step": 142677
},
{
"epoch": 6.3,
"learning_rate": 6.443629875293342e-05,
"loss": 0.0029,
"step": 144396
},
{
"epoch": 6.38,
"learning_rate": 6.388064622485987e-05,
"loss": 0.0029,
"step": 146115
},
{
"epoch": 6.45,
"learning_rate": 6.332499369678633e-05,
"loss": 0.0029,
"step": 147834
},
{
"epoch": 6.53,
"learning_rate": 6.27693411687128e-05,
"loss": 0.0028,
"step": 149553
},
{
"epoch": 6.6,
"learning_rate": 6.221368864063925e-05,
"loss": 0.0029,
"step": 151272
},
{
"epoch": 6.68,
"learning_rate": 6.16580361125657e-05,
"loss": 0.0029,
"step": 152991
},
{
"epoch": 6.75,
"learning_rate": 6.110238358449216e-05,
"loss": 0.0029,
"step": 154710
},
{
"epoch": 6.83,
"learning_rate": 6.0546731056418614e-05,
"loss": 0.0028,
"step": 156429
},
{
"epoch": 6.9,
"learning_rate": 5.999107852834507e-05,
"loss": 0.0029,
"step": 158148
},
{
"epoch": 6.98,
"learning_rate": 5.943542600027152e-05,
"loss": 0.003,
"step": 159867
},
{
"epoch": 7.0,
"eval_loss": 0.030847659334540367,
"eval_max_distance": 8,
"eval_mean_distance": 0,
"eval_runtime": 13.4895,
"eval_samples_per_second": 18.607,
"eval_steps_per_second": 1.26,
"step": 160412
},
{
"epoch": 7.05,
"learning_rate": 5.887977347219798e-05,
"loss": 0.0027,
"step": 161586
},
{
"epoch": 7.13,
"learning_rate": 5.832412094412444e-05,
"loss": 0.0025,
"step": 163305
},
{
"epoch": 7.2,
"learning_rate": 5.7768468416050895e-05,
"loss": 0.0026,
"step": 165024
},
{
"epoch": 7.28,
"learning_rate": 5.7212815887977344e-05,
"loss": 0.0027,
"step": 166743
},
{
"epoch": 7.35,
"learning_rate": 5.665716335990381e-05,
"loss": 0.0025,
"step": 168462
},
{
"epoch": 7.43,
"learning_rate": 5.610151083183026e-05,
"loss": 0.0026,
"step": 170181
},
{
"epoch": 7.5,
"learning_rate": 5.554585830375671e-05,
"loss": 0.0026,
"step": 171900
},
{
"epoch": 7.58,
"learning_rate": 5.499020577568318e-05,
"loss": 0.0026,
"step": 173619
},
{
"epoch": 7.65,
"learning_rate": 5.443455324760963e-05,
"loss": 0.0026,
"step": 175338
},
{
"epoch": 7.73,
"learning_rate": 5.387890071953609e-05,
"loss": 0.0025,
"step": 177057
},
{
"epoch": 7.8,
"learning_rate": 5.332324819146254e-05,
"loss": 0.0027,
"step": 178776
},
{
"epoch": 7.88,
"learning_rate": 5.2767595663389e-05,
"loss": 0.0028,
"step": 180495
},
{
"epoch": 7.95,
"learning_rate": 5.2211943135315456e-05,
"loss": 0.0025,
"step": 182214
},
{
"epoch": 8.0,
"eval_loss": 0.03048335202038288,
"eval_max_distance": 8,
"eval_mean_distance": 0,
"eval_runtime": 13.5077,
"eval_samples_per_second": 18.582,
"eval_steps_per_second": 1.259,
"step": 183328
},
{
"epoch": 8.03,
"learning_rate": 5.1656290607241906e-05,
"loss": 0.0026,
"step": 183933
},
{
"epoch": 8.1,
"learning_rate": 5.110063807916836e-05,
"loss": 0.0023,
"step": 185652
},
{
"epoch": 8.18,
"learning_rate": 5.0544985551094825e-05,
"loss": 0.0024,
"step": 187371
},
{
"epoch": 8.25,
"learning_rate": 4.998933302302128e-05,
"loss": 0.0023,
"step": 189090
},
{
"epoch": 8.33,
"learning_rate": 4.943368049494773e-05,
"loss": 0.0024,
"step": 190809
},
{
"epoch": 8.4,
"learning_rate": 4.887802796687419e-05,
"loss": 0.0024,
"step": 192528
},
{
"epoch": 8.48,
"learning_rate": 4.832237543880065e-05,
"loss": 0.0025,
"step": 194247
},
{
"epoch": 8.55,
"learning_rate": 4.77667229107271e-05,
"loss": 0.0024,
"step": 195966
},
{
"epoch": 8.63,
"learning_rate": 4.721107038265356e-05,
"loss": 0.0026,
"step": 197685
},
{
"epoch": 8.7,
"learning_rate": 4.665541785458001e-05,
"loss": 0.0023,
"step": 199404
},
{
"epoch": 8.78,
"learning_rate": 4.6099765326506474e-05,
"loss": 0.0024,
"step": 201123
},
{
"epoch": 8.85,
"learning_rate": 4.5544112798432924e-05,
"loss": 0.0024,
"step": 202842
},
{
"epoch": 8.93,
"learning_rate": 4.4988460270359386e-05,
"loss": 0.0025,
"step": 204561
},
{
"epoch": 9.0,
"eval_loss": 0.030335595831274986,
"eval_max_distance": 8,
"eval_mean_distance": 0,
"eval_runtime": 13.4109,
"eval_samples_per_second": 18.716,
"eval_steps_per_second": 1.268,
"step": 206244
},
{
"epoch": 9.0,
"learning_rate": 4.4432807742285836e-05,
"loss": 0.0026,
"step": 206280
},
{
"epoch": 9.08,
"learning_rate": 4.38771552142123e-05,
"loss": 0.0021,
"step": 207999
},
{
"epoch": 9.15,
"learning_rate": 4.332150268613875e-05,
"loss": 0.0022,
"step": 209718
},
{
"epoch": 9.23,
"learning_rate": 4.2765850158065204e-05,
"loss": 0.0022,
"step": 211437
},
{
"epoch": 9.3,
"learning_rate": 4.221019762999166e-05,
"loss": 0.0023,
"step": 213156
},
{
"epoch": 9.38,
"learning_rate": 4.165454510191812e-05,
"loss": 0.0023,
"step": 214875
},
{
"epoch": 9.45,
"learning_rate": 4.109889257384457e-05,
"loss": 0.0023,
"step": 216594
},
{
"epoch": 9.53,
"learning_rate": 4.054324004577103e-05,
"loss": 0.0023,
"step": 218313
},
{
"epoch": 9.6,
"learning_rate": 3.998758751769749e-05,
"loss": 0.0024,
"step": 220032
},
{
"epoch": 9.68,
"learning_rate": 3.943193498962394e-05,
"loss": 0.0024,
"step": 221751
},
{
"epoch": 9.75,
"learning_rate": 3.88762824615504e-05,
"loss": 0.0022,
"step": 223470
},
{
"epoch": 9.83,
"learning_rate": 3.8320629933476854e-05,
"loss": 0.0023,
"step": 225189
},
{
"epoch": 9.9,
"learning_rate": 3.776497740540331e-05,
"loss": 0.0024,
"step": 226908
},
{
"epoch": 9.98,
"learning_rate": 3.7209324877329766e-05,
"loss": 0.0023,
"step": 228627
},
{
"epoch": 10.0,
"eval_loss": 0.034065987914800644,
"eval_max_distance": 8,
"eval_mean_distance": 0,
"eval_runtime": 13.4726,
"eval_samples_per_second": 18.63,
"eval_steps_per_second": 1.262,
"step": 229160
},
{
"epoch": 10.05,
"learning_rate": 3.665367234925622e-05,
"loss": 0.0021,
"step": 230346
},
{
"epoch": 10.13,
"learning_rate": 3.609801982118268e-05,
"loss": 0.0021,
"step": 232065
},
{
"epoch": 10.2,
"learning_rate": 3.5542367293109135e-05,
"loss": 0.0021,
"step": 233784
},
{
"epoch": 10.28,
"learning_rate": 3.498671476503559e-05,
"loss": 0.0022,
"step": 235503
},
{
"epoch": 10.35,
"learning_rate": 3.443106223696205e-05,
"loss": 0.0021,
"step": 237222
},
{
"epoch": 10.43,
"learning_rate": 3.38754097088885e-05,
"loss": 0.0022,
"step": 238941
},
{
"epoch": 10.5,
"learning_rate": 3.331975718081496e-05,
"loss": 0.0021,
"step": 240660
},
{
"epoch": 10.58,
"learning_rate": 3.276410465274141e-05,
"loss": 0.0021,
"step": 242379
},
{
"epoch": 10.65,
"learning_rate": 3.220845212466787e-05,
"loss": 0.0021,
"step": 244098
},
{
"epoch": 10.73,
"learning_rate": 3.165279959659433e-05,
"loss": 0.0022,
"step": 245817
},
{
"epoch": 10.8,
"learning_rate": 3.1097147068520784e-05,
"loss": 0.0022,
"step": 247536
},
{
"epoch": 10.88,
"learning_rate": 3.054149454044724e-05,
"loss": 0.002,
"step": 249255
},
{
"epoch": 10.95,
"learning_rate": 2.9985842012373693e-05,
"loss": 0.0022,
"step": 250974
},
{
"epoch": 11.0,
"eval_loss": 0.03288768604397774,
"eval_max_distance": 8,
"eval_mean_distance": 0,
"eval_runtime": 13.3832,
"eval_samples_per_second": 18.755,
"eval_steps_per_second": 1.27,
"step": 252076
},
{
"epoch": 11.03,
"learning_rate": 2.9430189484300152e-05,
"loss": 0.0022,
"step": 252693
},
{
"epoch": 11.1,
"learning_rate": 2.8874536956226605e-05,
"loss": 0.002,
"step": 254412
},
{
"epoch": 11.18,
"learning_rate": 2.8318884428153065e-05,
"loss": 0.002,
"step": 256131
},
{
"epoch": 11.25,
"learning_rate": 2.7763231900079517e-05,
"loss": 0.002,
"step": 257850
},
{
"epoch": 11.33,
"learning_rate": 2.7207579372005977e-05,
"loss": 0.0021,
"step": 259569
},
{
"epoch": 11.4,
"learning_rate": 2.665192684393243e-05,
"loss": 0.0019,
"step": 261288
},
{
"epoch": 11.48,
"learning_rate": 2.6096274315858886e-05,
"loss": 0.002,
"step": 263007
},
{
"epoch": 11.55,
"learning_rate": 2.5540621787785342e-05,
"loss": 0.002,
"step": 264726
},
{
"epoch": 11.63,
"learning_rate": 2.4984969259711798e-05,
"loss": 0.002,
"step": 266445
},
{
"epoch": 11.7,
"learning_rate": 2.4429316731638254e-05,
"loss": 0.0019,
"step": 268164
},
{
"epoch": 11.78,
"learning_rate": 2.387366420356471e-05,
"loss": 0.0021,
"step": 269883
},
{
"epoch": 11.85,
"learning_rate": 2.3318011675491167e-05,
"loss": 0.0021,
"step": 271602
},
{
"epoch": 11.93,
"learning_rate": 2.2762359147417623e-05,
"loss": 0.0019,
"step": 273321
},
{
"epoch": 12.0,
"eval_loss": 0.03355114161968231,
"eval_max_distance": 8,
"eval_mean_distance": 0,
"eval_runtime": 13.4567,
"eval_samples_per_second": 18.652,
"eval_steps_per_second": 1.263,
"step": 274992
},
{
"epoch": 12.0,
"learning_rate": 2.220670661934408e-05,
"loss": 0.0021,
"step": 275040
},
{
"epoch": 12.08,
"learning_rate": 2.1651054091270535e-05,
"loss": 0.002,
"step": 276759
},
{
"epoch": 12.15,
"learning_rate": 2.109540156319699e-05,
"loss": 0.002,
"step": 278478
},
{
"epoch": 12.23,
"learning_rate": 2.0539749035123444e-05,
"loss": 0.0018,
"step": 280197
},
{
"epoch": 12.3,
"learning_rate": 1.9984096507049904e-05,
"loss": 0.002,
"step": 281916
},
{
"epoch": 12.38,
"learning_rate": 1.942844397897636e-05,
"loss": 0.0019,
"step": 283635
},
{
"epoch": 12.45,
"learning_rate": 1.8872791450902816e-05,
"loss": 0.0018,
"step": 285354
},
{
"epoch": 12.53,
"learning_rate": 1.8317138922829272e-05,
"loss": 0.0019,
"step": 287073
},
{
"epoch": 12.6,
"learning_rate": 1.776148639475573e-05,
"loss": 0.0018,
"step": 288792
},
{
"epoch": 12.68,
"learning_rate": 1.7205833866682185e-05,
"loss": 0.0019,
"step": 290511
},
{
"epoch": 12.75,
"learning_rate": 1.665018133860864e-05,
"loss": 0.0019,
"step": 292230
},
{
"epoch": 12.83,
"learning_rate": 1.6094528810535094e-05,
"loss": 0.0021,
"step": 293949
},
{
"epoch": 12.9,
"learning_rate": 1.553887628246155e-05,
"loss": 0.0019,
"step": 295668
},
{
"epoch": 12.98,
"learning_rate": 1.4983223754388006e-05,
"loss": 0.002,
"step": 297387
},
{
"epoch": 13.0,
"eval_loss": 0.035788267850875854,
"eval_max_distance": 8,
"eval_mean_distance": 0,
"eval_runtime": 13.4958,
"eval_samples_per_second": 18.598,
"eval_steps_per_second": 1.26,
"step": 297908
},
{
"epoch": 13.05,
"learning_rate": 1.4427571226314462e-05,
"loss": 0.0019,
"step": 299106
},
{
"epoch": 13.13,
"learning_rate": 1.3871918698240918e-05,
"loss": 0.0018,
"step": 300825
},
{
"epoch": 13.2,
"learning_rate": 1.3316266170167374e-05,
"loss": 0.0018,
"step": 302544
},
{
"epoch": 13.28,
"learning_rate": 1.276061364209383e-05,
"loss": 0.0018,
"step": 304263
},
{
"epoch": 13.35,
"learning_rate": 1.2204961114020287e-05,
"loss": 0.0018,
"step": 305982
},
{
"epoch": 13.43,
"learning_rate": 1.1649308585946743e-05,
"loss": 0.0018,
"step": 307701
},
{
"epoch": 13.5,
"learning_rate": 1.1093656057873199e-05,
"loss": 0.0019,
"step": 309420
},
{
"epoch": 13.58,
"learning_rate": 1.0538003529799655e-05,
"loss": 0.0018,
"step": 311139
},
{
"epoch": 13.65,
"learning_rate": 9.982351001726111e-06,
"loss": 0.0017,
"step": 312858
},
{
"epoch": 13.73,
"learning_rate": 9.426698473652567e-06,
"loss": 0.0018,
"step": 314577
},
{
"epoch": 13.8,
"learning_rate": 8.871045945579024e-06,
"loss": 0.002,
"step": 316296
},
{
"epoch": 13.88,
"learning_rate": 8.31539341750548e-06,
"loss": 0.0018,
"step": 318015
},
{
"epoch": 13.95,
"learning_rate": 7.759740889431934e-06,
"loss": 0.0018,
"step": 319734
},
{
"epoch": 14.0,
"eval_loss": 0.03550655022263527,
"eval_max_distance": 8,
"eval_mean_distance": 0,
"eval_runtime": 13.4713,
"eval_samples_per_second": 18.632,
"eval_steps_per_second": 1.262,
"step": 320824
},
{
"epoch": 14.03,
"learning_rate": 7.204088361358391e-06,
"loss": 0.0017,
"step": 321453
},
{
"epoch": 14.1,
"learning_rate": 6.648435833284847e-06,
"loss": 0.0018,
"step": 323172
},
{
"epoch": 14.18,
"learning_rate": 6.092783305211304e-06,
"loss": 0.0018,
"step": 324891
},
{
"epoch": 14.25,
"learning_rate": 5.537130777137759e-06,
"loss": 0.0017,
"step": 326610
},
{
"epoch": 14.33,
"learning_rate": 4.981478249064216e-06,
"loss": 0.0018,
"step": 328329
},
{
"epoch": 14.4,
"learning_rate": 4.425825720990671e-06,
"loss": 0.0018,
"step": 330048
},
{
"epoch": 14.48,
"learning_rate": 3.8701731929171274e-06,
"loss": 0.0018,
"step": 331767
},
{
"epoch": 14.55,
"learning_rate": 3.3145206648435836e-06,
"loss": 0.0019,
"step": 333486
},
{
"epoch": 14.63,
"learning_rate": 2.7588681367700398e-06,
"loss": 0.0017,
"step": 335205
},
{
"epoch": 14.7,
"learning_rate": 2.2032156086964955e-06,
"loss": 0.0018,
"step": 336924
},
{
"epoch": 14.78,
"learning_rate": 1.6475630806229517e-06,
"loss": 0.0017,
"step": 338643
},
{
"epoch": 14.85,
"learning_rate": 1.0919105525494076e-06,
"loss": 0.0018,
"step": 340362
},
{
"epoch": 14.93,
"learning_rate": 5.362580244758636e-07,
"loss": 0.0019,
"step": 342081
},
{
"epoch": 15.0,
"eval_loss": 0.03661360964179039,
"eval_max_distance": 8,
"eval_mean_distance": 0,
"eval_runtime": 13.3536,
"eval_samples_per_second": 18.796,
"eval_steps_per_second": 1.273,
"step": 343740
},
{
"epoch": 15.0,
"step": 343740,
"total_flos": 8.727792619277722e+16,
"train_loss": 0.0029792904397642345,
"train_runtime": 24306.5697,
"train_samples_per_second": 212.119,
"train_steps_per_second": 14.142
}
],
"logging_steps": 1719,
"max_steps": 343740,
"num_train_epochs": 15,
"save_steps": 3438,
"total_flos": 8.727792619277722e+16,
"trial_name": null,
"trial_params": null
}