mt_coref_en_ru_coref / trainer_state.json
cattana's picture
add missing files
7acb32a
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"global_step": 232668,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 4.989255075902144e-05,
"loss": 1.8237,
"step": 500
},
{
"epoch": 0.01,
"learning_rate": 4.978510151804288e-05,
"loss": 1.8141,
"step": 1000
},
{
"epoch": 0.02,
"learning_rate": 4.967765227706432e-05,
"loss": 1.7858,
"step": 1500
},
{
"epoch": 0.03,
"learning_rate": 4.9570203036085755e-05,
"loss": 1.7785,
"step": 2000
},
{
"epoch": 0.03,
"learning_rate": 4.946275379510719e-05,
"loss": 1.7828,
"step": 2500
},
{
"epoch": 0.04,
"learning_rate": 4.935530455412863e-05,
"loss": 1.7669,
"step": 3000
},
{
"epoch": 0.05,
"learning_rate": 4.924785531315007e-05,
"loss": 1.7623,
"step": 3500
},
{
"epoch": 0.05,
"learning_rate": 4.914040607217151e-05,
"loss": 1.7643,
"step": 4000
},
{
"epoch": 0.06,
"learning_rate": 4.9032956831192946e-05,
"loss": 1.7587,
"step": 4500
},
{
"epoch": 0.06,
"learning_rate": 4.8925507590214384e-05,
"loss": 1.7611,
"step": 5000
},
{
"epoch": 0.07,
"learning_rate": 4.881805834923582e-05,
"loss": 1.7565,
"step": 5500
},
{
"epoch": 0.08,
"learning_rate": 4.871060910825726e-05,
"loss": 1.745,
"step": 6000
},
{
"epoch": 0.08,
"learning_rate": 4.8603159867278705e-05,
"loss": 1.7496,
"step": 6500
},
{
"epoch": 0.09,
"learning_rate": 4.849571062630014e-05,
"loss": 1.7557,
"step": 7000
},
{
"epoch": 0.1,
"learning_rate": 4.8388261385321575e-05,
"loss": 1.7467,
"step": 7500
},
{
"epoch": 0.1,
"learning_rate": 4.828081214434301e-05,
"loss": 1.739,
"step": 8000
},
{
"epoch": 0.11,
"learning_rate": 4.817336290336446e-05,
"loss": 1.7374,
"step": 8500
},
{
"epoch": 0.12,
"learning_rate": 4.806591366238589e-05,
"loss": 1.7322,
"step": 9000
},
{
"epoch": 0.12,
"learning_rate": 4.795846442140733e-05,
"loss": 1.736,
"step": 9500
},
{
"epoch": 0.13,
"learning_rate": 4.7851015180428766e-05,
"loss": 1.7292,
"step": 10000
},
{
"epoch": 0.14,
"learning_rate": 4.774356593945021e-05,
"loss": 1.7209,
"step": 10500
},
{
"epoch": 0.14,
"learning_rate": 4.763611669847164e-05,
"loss": 1.7185,
"step": 11000
},
{
"epoch": 0.15,
"learning_rate": 4.752866745749308e-05,
"loss": 1.7245,
"step": 11500
},
{
"epoch": 0.15,
"learning_rate": 4.7421218216514525e-05,
"loss": 1.7284,
"step": 12000
},
{
"epoch": 0.16,
"learning_rate": 4.731376897553596e-05,
"loss": 1.7205,
"step": 12500
},
{
"epoch": 0.17,
"learning_rate": 4.7206319734557395e-05,
"loss": 1.7117,
"step": 13000
},
{
"epoch": 0.17,
"learning_rate": 4.709887049357883e-05,
"loss": 1.7213,
"step": 13500
},
{
"epoch": 0.18,
"learning_rate": 4.699142125260028e-05,
"loss": 1.711,
"step": 14000
},
{
"epoch": 0.19,
"learning_rate": 4.6883972011621716e-05,
"loss": 1.7087,
"step": 14500
},
{
"epoch": 0.19,
"learning_rate": 4.677652277064315e-05,
"loss": 1.7137,
"step": 15000
},
{
"epoch": 0.2,
"learning_rate": 4.6669073529664586e-05,
"loss": 1.7007,
"step": 15500
},
{
"epoch": 0.21,
"learning_rate": 4.656162428868603e-05,
"loss": 1.7068,
"step": 16000
},
{
"epoch": 0.21,
"learning_rate": 4.645417504770747e-05,
"loss": 1.6998,
"step": 16500
},
{
"epoch": 0.22,
"learning_rate": 4.63467258067289e-05,
"loss": 1.6966,
"step": 17000
},
{
"epoch": 0.23,
"learning_rate": 4.623927656575034e-05,
"loss": 1.6987,
"step": 17500
},
{
"epoch": 0.23,
"learning_rate": 4.613182732477178e-05,
"loss": 1.7104,
"step": 18000
},
{
"epoch": 0.24,
"learning_rate": 4.602437808379322e-05,
"loss": 1.7054,
"step": 18500
},
{
"epoch": 0.24,
"learning_rate": 4.591692884281465e-05,
"loss": 1.7,
"step": 19000
},
{
"epoch": 0.25,
"learning_rate": 4.58094796018361e-05,
"loss": 1.7117,
"step": 19500
},
{
"epoch": 0.26,
"learning_rate": 4.5702030360857536e-05,
"loss": 1.6933,
"step": 20000
},
{
"epoch": 0.26,
"learning_rate": 4.5594581119878974e-05,
"loss": 1.6761,
"step": 20500
},
{
"epoch": 0.27,
"learning_rate": 4.5487131878900405e-05,
"loss": 1.691,
"step": 21000
},
{
"epoch": 0.28,
"learning_rate": 4.537968263792185e-05,
"loss": 1.6856,
"step": 21500
},
{
"epoch": 0.28,
"learning_rate": 4.527223339694329e-05,
"loss": 1.7046,
"step": 22000
},
{
"epoch": 0.29,
"learning_rate": 4.516478415596473e-05,
"loss": 1.6825,
"step": 22500
},
{
"epoch": 0.3,
"learning_rate": 4.505733491498616e-05,
"loss": 1.6776,
"step": 23000
},
{
"epoch": 0.3,
"learning_rate": 4.49498856740076e-05,
"loss": 1.6698,
"step": 23500
},
{
"epoch": 0.31,
"learning_rate": 4.484243643302904e-05,
"loss": 1.6715,
"step": 24000
},
{
"epoch": 0.32,
"learning_rate": 4.473498719205047e-05,
"loss": 1.6865,
"step": 24500
},
{
"epoch": 0.32,
"learning_rate": 4.462753795107192e-05,
"loss": 1.6776,
"step": 25000
},
{
"epoch": 0.33,
"learning_rate": 4.4520088710093356e-05,
"loss": 1.6831,
"step": 25500
},
{
"epoch": 0.34,
"learning_rate": 4.4412639469114794e-05,
"loss": 1.6748,
"step": 26000
},
{
"epoch": 0.34,
"learning_rate": 4.4305190228136225e-05,
"loss": 1.7095,
"step": 26500
},
{
"epoch": 0.35,
"learning_rate": 4.419774098715767e-05,
"loss": 1.6873,
"step": 27000
},
{
"epoch": 0.35,
"learning_rate": 4.409029174617911e-05,
"loss": 1.6689,
"step": 27500
},
{
"epoch": 0.36,
"learning_rate": 4.3982842505200547e-05,
"loss": 1.6597,
"step": 28000
},
{
"epoch": 0.37,
"learning_rate": 4.387539326422198e-05,
"loss": 1.6801,
"step": 28500
},
{
"epoch": 0.37,
"learning_rate": 4.376794402324342e-05,
"loss": 1.6833,
"step": 29000
},
{
"epoch": 0.38,
"learning_rate": 4.366049478226486e-05,
"loss": 1.6795,
"step": 29500
},
{
"epoch": 0.39,
"learning_rate": 4.35530455412863e-05,
"loss": 1.6787,
"step": 30000
},
{
"epoch": 0.39,
"learning_rate": 4.344559630030774e-05,
"loss": 1.6615,
"step": 30500
},
{
"epoch": 0.4,
"learning_rate": 4.3338147059329176e-05,
"loss": 1.6579,
"step": 31000
},
{
"epoch": 0.41,
"learning_rate": 4.3230697818350614e-05,
"loss": 1.6736,
"step": 31500
},
{
"epoch": 0.41,
"learning_rate": 4.312324857737205e-05,
"loss": 1.6589,
"step": 32000
},
{
"epoch": 0.42,
"learning_rate": 4.301579933639349e-05,
"loss": 1.6583,
"step": 32500
},
{
"epoch": 0.43,
"learning_rate": 4.290835009541493e-05,
"loss": 1.6816,
"step": 33000
},
{
"epoch": 0.43,
"learning_rate": 4.2800900854436366e-05,
"loss": 1.66,
"step": 33500
},
{
"epoch": 0.44,
"learning_rate": 4.2693451613457805e-05,
"loss": 1.6679,
"step": 34000
},
{
"epoch": 0.44,
"learning_rate": 4.258600237247924e-05,
"loss": 1.6608,
"step": 34500
},
{
"epoch": 0.45,
"learning_rate": 4.247855313150068e-05,
"loss": 1.6665,
"step": 35000
},
{
"epoch": 0.46,
"learning_rate": 4.237110389052212e-05,
"loss": 1.651,
"step": 35500
},
{
"epoch": 0.46,
"learning_rate": 4.226365464954356e-05,
"loss": 1.6525,
"step": 36000
},
{
"epoch": 0.47,
"learning_rate": 4.2156205408564995e-05,
"loss": 1.6436,
"step": 36500
},
{
"epoch": 0.48,
"learning_rate": 4.2048756167586434e-05,
"loss": 1.656,
"step": 37000
},
{
"epoch": 0.48,
"learning_rate": 4.194130692660787e-05,
"loss": 1.6529,
"step": 37500
},
{
"epoch": 0.49,
"learning_rate": 4.183385768562931e-05,
"loss": 1.6774,
"step": 38000
},
{
"epoch": 0.5,
"learning_rate": 4.172640844465075e-05,
"loss": 1.6629,
"step": 38500
},
{
"epoch": 0.5,
"learning_rate": 4.1618959203672186e-05,
"loss": 1.6407,
"step": 39000
},
{
"epoch": 0.51,
"learning_rate": 4.1511509962693624e-05,
"loss": 1.6512,
"step": 39500
},
{
"epoch": 0.52,
"learning_rate": 4.140406072171506e-05,
"loss": 1.625,
"step": 40000
},
{
"epoch": 0.52,
"learning_rate": 4.12966114807365e-05,
"loss": 1.6338,
"step": 40500
},
{
"epoch": 0.53,
"learning_rate": 4.118916223975794e-05,
"loss": 1.6484,
"step": 41000
},
{
"epoch": 0.54,
"learning_rate": 4.108171299877938e-05,
"loss": 1.6364,
"step": 41500
},
{
"epoch": 0.54,
"learning_rate": 4.0974263757800815e-05,
"loss": 1.6493,
"step": 42000
},
{
"epoch": 0.55,
"learning_rate": 4.086681451682225e-05,
"loss": 1.6353,
"step": 42500
},
{
"epoch": 0.55,
"learning_rate": 4.075936527584369e-05,
"loss": 1.6503,
"step": 43000
},
{
"epoch": 0.56,
"learning_rate": 4.0651916034865136e-05,
"loss": 1.6469,
"step": 43500
},
{
"epoch": 0.57,
"learning_rate": 4.054446679388657e-05,
"loss": 1.642,
"step": 44000
},
{
"epoch": 0.57,
"learning_rate": 4.0437017552908006e-05,
"loss": 1.6494,
"step": 44500
},
{
"epoch": 0.58,
"learning_rate": 4.0329568311929444e-05,
"loss": 1.6473,
"step": 45000
},
{
"epoch": 0.59,
"learning_rate": 4.022211907095089e-05,
"loss": 1.6371,
"step": 45500
},
{
"epoch": 0.59,
"learning_rate": 4.011466982997232e-05,
"loss": 1.6472,
"step": 46000
},
{
"epoch": 0.6,
"learning_rate": 4.000722058899376e-05,
"loss": 1.6131,
"step": 46500
},
{
"epoch": 0.61,
"learning_rate": 3.98997713480152e-05,
"loss": 1.6361,
"step": 47000
},
{
"epoch": 0.61,
"learning_rate": 3.979232210703664e-05,
"loss": 1.6342,
"step": 47500
},
{
"epoch": 0.62,
"learning_rate": 3.968487286605807e-05,
"loss": 1.6432,
"step": 48000
},
{
"epoch": 0.63,
"learning_rate": 3.957742362507951e-05,
"loss": 1.6355,
"step": 48500
},
{
"epoch": 0.63,
"learning_rate": 3.9469974384100956e-05,
"loss": 1.6419,
"step": 49000
},
{
"epoch": 0.64,
"learning_rate": 3.9362525143122394e-05,
"loss": 1.6376,
"step": 49500
},
{
"epoch": 0.64,
"learning_rate": 3.9255075902143826e-05,
"loss": 1.6357,
"step": 50000
},
{
"epoch": 0.65,
"learning_rate": 3.9147626661165264e-05,
"loss": 1.6393,
"step": 50500
},
{
"epoch": 0.66,
"learning_rate": 3.904017742018671e-05,
"loss": 1.6097,
"step": 51000
},
{
"epoch": 0.66,
"learning_rate": 3.893272817920815e-05,
"loss": 1.6309,
"step": 51500
},
{
"epoch": 0.67,
"learning_rate": 3.882527893822958e-05,
"loss": 1.6229,
"step": 52000
},
{
"epoch": 0.68,
"learning_rate": 3.871782969725102e-05,
"loss": 1.6481,
"step": 52500
},
{
"epoch": 0.68,
"learning_rate": 3.861038045627246e-05,
"loss": 1.6373,
"step": 53000
},
{
"epoch": 0.69,
"learning_rate": 3.85029312152939e-05,
"loss": 1.6124,
"step": 53500
},
{
"epoch": 0.7,
"learning_rate": 3.839548197431533e-05,
"loss": 1.6233,
"step": 54000
},
{
"epoch": 0.7,
"learning_rate": 3.8288032733336776e-05,
"loss": 1.6425,
"step": 54500
},
{
"epoch": 0.71,
"learning_rate": 3.8180583492358214e-05,
"loss": 1.6305,
"step": 55000
},
{
"epoch": 0.72,
"learning_rate": 3.807313425137965e-05,
"loss": 1.6294,
"step": 55500
},
{
"epoch": 0.72,
"learning_rate": 3.7965685010401084e-05,
"loss": 1.6034,
"step": 56000
},
{
"epoch": 0.73,
"learning_rate": 3.785823576942253e-05,
"loss": 1.6365,
"step": 56500
},
{
"epoch": 0.73,
"learning_rate": 3.775078652844397e-05,
"loss": 1.6147,
"step": 57000
},
{
"epoch": 0.74,
"learning_rate": 3.7643337287465405e-05,
"loss": 1.6186,
"step": 57500
},
{
"epoch": 0.75,
"learning_rate": 3.7535888046486837e-05,
"loss": 1.618,
"step": 58000
},
{
"epoch": 0.75,
"learning_rate": 3.742843880550828e-05,
"loss": 1.6273,
"step": 58500
},
{
"epoch": 0.76,
"learning_rate": 3.732098956452972e-05,
"loss": 1.6266,
"step": 59000
},
{
"epoch": 0.77,
"learning_rate": 3.721354032355116e-05,
"loss": 1.6176,
"step": 59500
},
{
"epoch": 0.77,
"learning_rate": 3.7106091082572596e-05,
"loss": 1.597,
"step": 60000
},
{
"epoch": 0.78,
"learning_rate": 3.6998641841594034e-05,
"loss": 1.6108,
"step": 60500
},
{
"epoch": 0.79,
"learning_rate": 3.689119260061547e-05,
"loss": 1.5991,
"step": 61000
},
{
"epoch": 0.79,
"learning_rate": 3.678374335963691e-05,
"loss": 1.6153,
"step": 61500
},
{
"epoch": 0.8,
"learning_rate": 3.667629411865835e-05,
"loss": 1.6101,
"step": 62000
},
{
"epoch": 0.81,
"learning_rate": 3.656884487767979e-05,
"loss": 1.6122,
"step": 62500
},
{
"epoch": 0.81,
"learning_rate": 3.6461395636701225e-05,
"loss": 1.6068,
"step": 63000
},
{
"epoch": 0.82,
"learning_rate": 3.635394639572266e-05,
"loss": 1.6066,
"step": 63500
},
{
"epoch": 0.83,
"learning_rate": 3.62464971547441e-05,
"loss": 1.6106,
"step": 64000
},
{
"epoch": 0.83,
"learning_rate": 3.613904791376554e-05,
"loss": 1.6088,
"step": 64500
},
{
"epoch": 0.84,
"learning_rate": 3.603159867278698e-05,
"loss": 1.6154,
"step": 65000
},
{
"epoch": 0.84,
"learning_rate": 3.5924149431808416e-05,
"loss": 1.6271,
"step": 65500
},
{
"epoch": 0.85,
"learning_rate": 3.5816700190829854e-05,
"loss": 1.5984,
"step": 66000
},
{
"epoch": 0.86,
"learning_rate": 3.570925094985129e-05,
"loss": 1.599,
"step": 66500
},
{
"epoch": 0.86,
"learning_rate": 3.560180170887273e-05,
"loss": 1.6013,
"step": 67000
},
{
"epoch": 0.87,
"learning_rate": 3.549435246789417e-05,
"loss": 1.6089,
"step": 67500
},
{
"epoch": 0.88,
"learning_rate": 3.538690322691561e-05,
"loss": 1.6076,
"step": 68000
},
{
"epoch": 0.88,
"learning_rate": 3.5279453985937045e-05,
"loss": 1.6111,
"step": 68500
},
{
"epoch": 0.89,
"learning_rate": 3.517200474495848e-05,
"loss": 1.5966,
"step": 69000
},
{
"epoch": 0.9,
"learning_rate": 3.506455550397992e-05,
"loss": 1.6066,
"step": 69500
},
{
"epoch": 0.9,
"learning_rate": 3.495710626300136e-05,
"loss": 1.6117,
"step": 70000
},
{
"epoch": 0.91,
"learning_rate": 3.48496570220228e-05,
"loss": 1.6075,
"step": 70500
},
{
"epoch": 0.92,
"learning_rate": 3.4742207781044236e-05,
"loss": 1.6064,
"step": 71000
},
{
"epoch": 0.92,
"learning_rate": 3.4634758540065674e-05,
"loss": 1.6041,
"step": 71500
},
{
"epoch": 0.93,
"learning_rate": 3.452730929908711e-05,
"loss": 1.6069,
"step": 72000
},
{
"epoch": 0.93,
"learning_rate": 3.441986005810855e-05,
"loss": 1.5887,
"step": 72500
},
{
"epoch": 0.94,
"learning_rate": 3.4312410817129995e-05,
"loss": 1.6004,
"step": 73000
},
{
"epoch": 0.95,
"learning_rate": 3.4204961576151426e-05,
"loss": 1.6039,
"step": 73500
},
{
"epoch": 0.95,
"learning_rate": 3.4097512335172865e-05,
"loss": 1.5959,
"step": 74000
},
{
"epoch": 0.96,
"learning_rate": 3.39900630941943e-05,
"loss": 1.5957,
"step": 74500
},
{
"epoch": 0.97,
"learning_rate": 3.388261385321575e-05,
"loss": 1.6003,
"step": 75000
},
{
"epoch": 0.97,
"learning_rate": 3.377516461223718e-05,
"loss": 1.5945,
"step": 75500
},
{
"epoch": 0.98,
"learning_rate": 3.366771537125862e-05,
"loss": 1.5928,
"step": 76000
},
{
"epoch": 0.99,
"learning_rate": 3.3560266130280055e-05,
"loss": 1.5914,
"step": 76500
},
{
"epoch": 0.99,
"learning_rate": 3.34528168893015e-05,
"loss": 1.5959,
"step": 77000
},
{
"epoch": 1.0,
"learning_rate": 3.334536764832293e-05,
"loss": 1.5916,
"step": 77500
},
{
"epoch": 1.01,
"learning_rate": 3.323791840734437e-05,
"loss": 1.5079,
"step": 78000
},
{
"epoch": 1.01,
"learning_rate": 3.3130469166365815e-05,
"loss": 1.4935,
"step": 78500
},
{
"epoch": 1.02,
"learning_rate": 3.3023019925387246e-05,
"loss": 1.4985,
"step": 79000
},
{
"epoch": 1.03,
"learning_rate": 3.2915570684408684e-05,
"loss": 1.4883,
"step": 79500
},
{
"epoch": 1.03,
"learning_rate": 3.280812144343012e-05,
"loss": 1.4964,
"step": 80000
},
{
"epoch": 1.04,
"learning_rate": 3.270067220245157e-05,
"loss": 1.4902,
"step": 80500
},
{
"epoch": 1.04,
"learning_rate": 3.2593222961473e-05,
"loss": 1.4874,
"step": 81000
},
{
"epoch": 1.05,
"learning_rate": 3.248577372049444e-05,
"loss": 1.495,
"step": 81500
},
{
"epoch": 1.06,
"learning_rate": 3.2378324479515875e-05,
"loss": 1.4983,
"step": 82000
},
{
"epoch": 1.06,
"learning_rate": 3.227087523853732e-05,
"loss": 1.4897,
"step": 82500
},
{
"epoch": 1.07,
"learning_rate": 3.216342599755875e-05,
"loss": 1.4987,
"step": 83000
},
{
"epoch": 1.08,
"learning_rate": 3.205597675658019e-05,
"loss": 1.4935,
"step": 83500
},
{
"epoch": 1.08,
"learning_rate": 3.1948527515601635e-05,
"loss": 1.4903,
"step": 84000
},
{
"epoch": 1.09,
"learning_rate": 3.184107827462307e-05,
"loss": 1.5145,
"step": 84500
},
{
"epoch": 1.1,
"learning_rate": 3.1733629033644504e-05,
"loss": 1.5038,
"step": 85000
},
{
"epoch": 1.1,
"learning_rate": 3.162617979266594e-05,
"loss": 1.4882,
"step": 85500
},
{
"epoch": 1.11,
"learning_rate": 3.151873055168739e-05,
"loss": 1.5193,
"step": 86000
},
{
"epoch": 1.12,
"learning_rate": 3.1411281310708826e-05,
"loss": 1.5029,
"step": 86500
},
{
"epoch": 1.12,
"learning_rate": 3.130383206973026e-05,
"loss": 1.4921,
"step": 87000
},
{
"epoch": 1.13,
"learning_rate": 3.1196382828751695e-05,
"loss": 1.4987,
"step": 87500
},
{
"epoch": 1.13,
"learning_rate": 3.108893358777314e-05,
"loss": 1.4979,
"step": 88000
},
{
"epoch": 1.14,
"learning_rate": 3.098148434679458e-05,
"loss": 1.499,
"step": 88500
},
{
"epoch": 1.15,
"learning_rate": 3.087403510581601e-05,
"loss": 1.4909,
"step": 89000
},
{
"epoch": 1.15,
"learning_rate": 3.0766585864837455e-05,
"loss": 1.4813,
"step": 89500
},
{
"epoch": 1.16,
"learning_rate": 3.065913662385889e-05,
"loss": 1.5043,
"step": 90000
},
{
"epoch": 1.17,
"learning_rate": 3.055168738288033e-05,
"loss": 1.4965,
"step": 90500
},
{
"epoch": 1.17,
"learning_rate": 3.0444238141901766e-05,
"loss": 1.496,
"step": 91000
},
{
"epoch": 1.18,
"learning_rate": 3.0336788900923207e-05,
"loss": 1.5061,
"step": 91500
},
{
"epoch": 1.19,
"learning_rate": 3.0229339659944645e-05,
"loss": 1.4969,
"step": 92000
},
{
"epoch": 1.19,
"learning_rate": 3.012189041896608e-05,
"loss": 1.4818,
"step": 92500
},
{
"epoch": 1.2,
"learning_rate": 3.001444117798752e-05,
"loss": 1.5003,
"step": 93000
},
{
"epoch": 1.21,
"learning_rate": 2.990699193700896e-05,
"loss": 1.5095,
"step": 93500
},
{
"epoch": 1.21,
"learning_rate": 2.9799542696030398e-05,
"loss": 1.4885,
"step": 94000
},
{
"epoch": 1.22,
"learning_rate": 2.9692093455051833e-05,
"loss": 1.5077,
"step": 94500
},
{
"epoch": 1.22,
"learning_rate": 2.958464421407327e-05,
"loss": 1.4981,
"step": 95000
},
{
"epoch": 1.23,
"learning_rate": 2.9477194973094713e-05,
"loss": 1.4877,
"step": 95500
},
{
"epoch": 1.24,
"learning_rate": 2.936974573211615e-05,
"loss": 1.5061,
"step": 96000
},
{
"epoch": 1.24,
"learning_rate": 2.9262296491137586e-05,
"loss": 1.4959,
"step": 96500
},
{
"epoch": 1.25,
"learning_rate": 2.9154847250159027e-05,
"loss": 1.4918,
"step": 97000
},
{
"epoch": 1.26,
"learning_rate": 2.9047398009180465e-05,
"loss": 1.5064,
"step": 97500
},
{
"epoch": 1.26,
"learning_rate": 2.8939948768201903e-05,
"loss": 1.5053,
"step": 98000
},
{
"epoch": 1.27,
"learning_rate": 2.8832499527223338e-05,
"loss": 1.4978,
"step": 98500
},
{
"epoch": 1.28,
"learning_rate": 2.872505028624478e-05,
"loss": 1.5012,
"step": 99000
},
{
"epoch": 1.28,
"learning_rate": 2.8617601045266218e-05,
"loss": 1.4973,
"step": 99500
},
{
"epoch": 1.29,
"learning_rate": 2.8510151804287656e-05,
"loss": 1.4979,
"step": 100000
},
{
"epoch": 1.3,
"learning_rate": 2.840270256330909e-05,
"loss": 1.5095,
"step": 100500
},
{
"epoch": 1.3,
"learning_rate": 2.8295253322330532e-05,
"loss": 1.4894,
"step": 101000
},
{
"epoch": 1.31,
"learning_rate": 2.818780408135197e-05,
"loss": 1.507,
"step": 101500
},
{
"epoch": 1.32,
"learning_rate": 2.808035484037341e-05,
"loss": 1.5054,
"step": 102000
},
{
"epoch": 1.32,
"learning_rate": 2.797290559939485e-05,
"loss": 1.4919,
"step": 102500
},
{
"epoch": 1.33,
"learning_rate": 2.7865456358416285e-05,
"loss": 1.4976,
"step": 103000
},
{
"epoch": 1.33,
"learning_rate": 2.7758007117437723e-05,
"loss": 1.4988,
"step": 103500
},
{
"epoch": 1.34,
"learning_rate": 2.765055787645916e-05,
"loss": 1.5031,
"step": 104000
},
{
"epoch": 1.35,
"learning_rate": 2.7543108635480603e-05,
"loss": 1.5058,
"step": 104500
},
{
"epoch": 1.35,
"learning_rate": 2.7435659394502038e-05,
"loss": 1.5091,
"step": 105000
},
{
"epoch": 1.36,
"learning_rate": 2.7328210153523476e-05,
"loss": 1.4957,
"step": 105500
},
{
"epoch": 1.37,
"learning_rate": 2.7220760912544914e-05,
"loss": 1.5023,
"step": 106000
},
{
"epoch": 1.37,
"learning_rate": 2.7113311671566356e-05,
"loss": 1.4898,
"step": 106500
},
{
"epoch": 1.38,
"learning_rate": 2.700586243058779e-05,
"loss": 1.5055,
"step": 107000
},
{
"epoch": 1.39,
"learning_rate": 2.689841318960923e-05,
"loss": 1.4897,
"step": 107500
},
{
"epoch": 1.39,
"learning_rate": 2.679096394863067e-05,
"loss": 1.5013,
"step": 108000
},
{
"epoch": 1.4,
"learning_rate": 2.6683514707652108e-05,
"loss": 1.4905,
"step": 108500
},
{
"epoch": 1.41,
"learning_rate": 2.6576065466673543e-05,
"loss": 1.4862,
"step": 109000
},
{
"epoch": 1.41,
"learning_rate": 2.646861622569498e-05,
"loss": 1.4913,
"step": 109500
},
{
"epoch": 1.42,
"learning_rate": 2.6361166984716423e-05,
"loss": 1.4927,
"step": 110000
},
{
"epoch": 1.42,
"learning_rate": 2.625371774373786e-05,
"loss": 1.484,
"step": 110500
},
{
"epoch": 1.43,
"learning_rate": 2.6146268502759296e-05,
"loss": 1.4896,
"step": 111000
},
{
"epoch": 1.44,
"learning_rate": 2.6038819261780734e-05,
"loss": 1.4967,
"step": 111500
},
{
"epoch": 1.44,
"learning_rate": 2.5931370020802175e-05,
"loss": 1.5012,
"step": 112000
},
{
"epoch": 1.45,
"learning_rate": 2.5823920779823614e-05,
"loss": 1.5021,
"step": 112500
},
{
"epoch": 1.46,
"learning_rate": 2.571647153884505e-05,
"loss": 1.5086,
"step": 113000
},
{
"epoch": 1.46,
"learning_rate": 2.5609022297866493e-05,
"loss": 1.4857,
"step": 113500
},
{
"epoch": 1.47,
"learning_rate": 2.5501573056887928e-05,
"loss": 1.4873,
"step": 114000
},
{
"epoch": 1.48,
"learning_rate": 2.5394123815909366e-05,
"loss": 1.485,
"step": 114500
},
{
"epoch": 1.48,
"learning_rate": 2.52866745749308e-05,
"loss": 1.4755,
"step": 115000
},
{
"epoch": 1.49,
"learning_rate": 2.5179225333952246e-05,
"loss": 1.4852,
"step": 115500
},
{
"epoch": 1.5,
"learning_rate": 2.507177609297368e-05,
"loss": 1.4809,
"step": 116000
},
{
"epoch": 1.5,
"learning_rate": 2.496432685199512e-05,
"loss": 1.4845,
"step": 116500
},
{
"epoch": 1.51,
"learning_rate": 2.4856877611016557e-05,
"loss": 1.487,
"step": 117000
},
{
"epoch": 1.52,
"learning_rate": 2.4749428370037995e-05,
"loss": 1.474,
"step": 117500
},
{
"epoch": 1.52,
"learning_rate": 2.4641979129059433e-05,
"loss": 1.5064,
"step": 118000
},
{
"epoch": 1.53,
"learning_rate": 2.453452988808087e-05,
"loss": 1.491,
"step": 118500
},
{
"epoch": 1.53,
"learning_rate": 2.442708064710231e-05,
"loss": 1.4957,
"step": 119000
},
{
"epoch": 1.54,
"learning_rate": 2.4319631406123748e-05,
"loss": 1.4837,
"step": 119500
},
{
"epoch": 1.55,
"learning_rate": 2.4212182165145186e-05,
"loss": 1.4846,
"step": 120000
},
{
"epoch": 1.55,
"learning_rate": 2.4104732924166624e-05,
"loss": 1.4952,
"step": 120500
},
{
"epoch": 1.56,
"learning_rate": 2.3997283683188062e-05,
"loss": 1.4772,
"step": 121000
},
{
"epoch": 1.57,
"learning_rate": 2.38898344422095e-05,
"loss": 1.4906,
"step": 121500
},
{
"epoch": 1.57,
"learning_rate": 2.378238520123094e-05,
"loss": 1.4916,
"step": 122000
},
{
"epoch": 1.58,
"learning_rate": 2.3674935960252377e-05,
"loss": 1.4888,
"step": 122500
},
{
"epoch": 1.59,
"learning_rate": 2.3567486719273815e-05,
"loss": 1.4971,
"step": 123000
},
{
"epoch": 1.59,
"learning_rate": 2.3460037478295253e-05,
"loss": 1.4929,
"step": 123500
},
{
"epoch": 1.6,
"learning_rate": 2.335258823731669e-05,
"loss": 1.4798,
"step": 124000
},
{
"epoch": 1.61,
"learning_rate": 2.324513899633813e-05,
"loss": 1.4834,
"step": 124500
},
{
"epoch": 1.61,
"learning_rate": 2.313768975535957e-05,
"loss": 1.4751,
"step": 125000
},
{
"epoch": 1.62,
"learning_rate": 2.3030240514381006e-05,
"loss": 1.4937,
"step": 125500
},
{
"epoch": 1.62,
"learning_rate": 2.2922791273402448e-05,
"loss": 1.4854,
"step": 126000
},
{
"epoch": 1.63,
"learning_rate": 2.2815342032423882e-05,
"loss": 1.4865,
"step": 126500
},
{
"epoch": 1.64,
"learning_rate": 2.2707892791445324e-05,
"loss": 1.4818,
"step": 127000
},
{
"epoch": 1.64,
"learning_rate": 2.260044355046676e-05,
"loss": 1.4887,
"step": 127500
},
{
"epoch": 1.65,
"learning_rate": 2.24929943094882e-05,
"loss": 1.4839,
"step": 128000
},
{
"epoch": 1.66,
"learning_rate": 2.2385545068509635e-05,
"loss": 1.4854,
"step": 128500
},
{
"epoch": 1.66,
"learning_rate": 2.2278095827531077e-05,
"loss": 1.4915,
"step": 129000
},
{
"epoch": 1.67,
"learning_rate": 2.217064658655251e-05,
"loss": 1.4768,
"step": 129500
},
{
"epoch": 1.68,
"learning_rate": 2.2063197345573953e-05,
"loss": 1.5006,
"step": 130000
},
{
"epoch": 1.68,
"learning_rate": 2.1955748104595388e-05,
"loss": 1.4758,
"step": 130500
},
{
"epoch": 1.69,
"learning_rate": 2.184829886361683e-05,
"loss": 1.4815,
"step": 131000
},
{
"epoch": 1.7,
"learning_rate": 2.1740849622638267e-05,
"loss": 1.4841,
"step": 131500
},
{
"epoch": 1.7,
"learning_rate": 2.1633400381659706e-05,
"loss": 1.4846,
"step": 132000
},
{
"epoch": 1.71,
"learning_rate": 2.1525951140681144e-05,
"loss": 1.4824,
"step": 132500
},
{
"epoch": 1.71,
"learning_rate": 2.1418501899702582e-05,
"loss": 1.4792,
"step": 133000
},
{
"epoch": 1.72,
"learning_rate": 2.131105265872402e-05,
"loss": 1.4694,
"step": 133500
},
{
"epoch": 1.73,
"learning_rate": 2.1203603417745458e-05,
"loss": 1.4811,
"step": 134000
},
{
"epoch": 1.73,
"learning_rate": 2.1096154176766896e-05,
"loss": 1.4829,
"step": 134500
},
{
"epoch": 1.74,
"learning_rate": 2.0988704935788335e-05,
"loss": 1.4703,
"step": 135000
},
{
"epoch": 1.75,
"learning_rate": 2.0881255694809773e-05,
"loss": 1.4804,
"step": 135500
},
{
"epoch": 1.75,
"learning_rate": 2.077380645383121e-05,
"loss": 1.4774,
"step": 136000
},
{
"epoch": 1.76,
"learning_rate": 2.066635721285265e-05,
"loss": 1.4853,
"step": 136500
},
{
"epoch": 1.77,
"learning_rate": 2.055890797187409e-05,
"loss": 1.4725,
"step": 137000
},
{
"epoch": 1.77,
"learning_rate": 2.0451458730895525e-05,
"loss": 1.4884,
"step": 137500
},
{
"epoch": 1.78,
"learning_rate": 2.0344009489916967e-05,
"loss": 1.4692,
"step": 138000
},
{
"epoch": 1.79,
"learning_rate": 2.02365602489384e-05,
"loss": 1.4681,
"step": 138500
},
{
"epoch": 1.79,
"learning_rate": 2.0129111007959843e-05,
"loss": 1.4807,
"step": 139000
},
{
"epoch": 1.8,
"learning_rate": 2.0021661766981278e-05,
"loss": 1.4707,
"step": 139500
},
{
"epoch": 1.81,
"learning_rate": 1.991421252600272e-05,
"loss": 1.4773,
"step": 140000
},
{
"epoch": 1.81,
"learning_rate": 1.9806763285024154e-05,
"loss": 1.4763,
"step": 140500
},
{
"epoch": 1.82,
"learning_rate": 1.9699314044045593e-05,
"loss": 1.466,
"step": 141000
},
{
"epoch": 1.82,
"learning_rate": 1.959186480306703e-05,
"loss": 1.4824,
"step": 141500
},
{
"epoch": 1.83,
"learning_rate": 1.948441556208847e-05,
"loss": 1.4726,
"step": 142000
},
{
"epoch": 1.84,
"learning_rate": 1.9376966321109907e-05,
"loss": 1.4636,
"step": 142500
},
{
"epoch": 1.84,
"learning_rate": 1.9269517080131345e-05,
"loss": 1.4791,
"step": 143000
},
{
"epoch": 1.85,
"learning_rate": 1.9162067839152787e-05,
"loss": 1.4701,
"step": 143500
},
{
"epoch": 1.86,
"learning_rate": 1.905461859817422e-05,
"loss": 1.4937,
"step": 144000
},
{
"epoch": 1.86,
"learning_rate": 1.8947169357195663e-05,
"loss": 1.493,
"step": 144500
},
{
"epoch": 1.87,
"learning_rate": 1.8839720116217098e-05,
"loss": 1.479,
"step": 145000
},
{
"epoch": 1.88,
"learning_rate": 1.873227087523854e-05,
"loss": 1.4797,
"step": 145500
},
{
"epoch": 1.88,
"learning_rate": 1.8624821634259974e-05,
"loss": 1.4672,
"step": 146000
},
{
"epoch": 1.89,
"learning_rate": 1.8517372393281416e-05,
"loss": 1.4731,
"step": 146500
},
{
"epoch": 1.9,
"learning_rate": 1.840992315230285e-05,
"loss": 1.4639,
"step": 147000
},
{
"epoch": 1.9,
"learning_rate": 1.8302473911324292e-05,
"loss": 1.4727,
"step": 147500
},
{
"epoch": 1.91,
"learning_rate": 1.8195024670345727e-05,
"loss": 1.4823,
"step": 148000
},
{
"epoch": 1.91,
"learning_rate": 1.808757542936717e-05,
"loss": 1.473,
"step": 148500
},
{
"epoch": 1.92,
"learning_rate": 1.7980126188388607e-05,
"loss": 1.4702,
"step": 149000
},
{
"epoch": 1.93,
"learning_rate": 1.7872676947410045e-05,
"loss": 1.4748,
"step": 149500
},
{
"epoch": 1.93,
"learning_rate": 1.7765227706431483e-05,
"loss": 1.4712,
"step": 150000
},
{
"epoch": 1.94,
"learning_rate": 1.765777846545292e-05,
"loss": 1.4874,
"step": 150500
},
{
"epoch": 1.95,
"learning_rate": 1.755032922447436e-05,
"loss": 1.4633,
"step": 151000
},
{
"epoch": 1.95,
"learning_rate": 1.7442879983495797e-05,
"loss": 1.4532,
"step": 151500
},
{
"epoch": 1.96,
"learning_rate": 1.7335430742517236e-05,
"loss": 1.4656,
"step": 152000
},
{
"epoch": 1.97,
"learning_rate": 1.7227981501538674e-05,
"loss": 1.4622,
"step": 152500
},
{
"epoch": 1.97,
"learning_rate": 1.7120532260560112e-05,
"loss": 1.4802,
"step": 153000
},
{
"epoch": 1.98,
"learning_rate": 1.701308301958155e-05,
"loss": 1.4652,
"step": 153500
},
{
"epoch": 1.99,
"learning_rate": 1.6905633778602988e-05,
"loss": 1.476,
"step": 154000
},
{
"epoch": 1.99,
"learning_rate": 1.6798184537624426e-05,
"loss": 1.4731,
"step": 154500
},
{
"epoch": 2.0,
"learning_rate": 1.6690735296645865e-05,
"loss": 1.4571,
"step": 155000
},
{
"epoch": 2.01,
"learning_rate": 1.6583286055667306e-05,
"loss": 1.3858,
"step": 155500
},
{
"epoch": 2.01,
"learning_rate": 1.647583681468874e-05,
"loss": 1.39,
"step": 156000
},
{
"epoch": 2.02,
"learning_rate": 1.6368387573710182e-05,
"loss": 1.3756,
"step": 156500
},
{
"epoch": 2.02,
"learning_rate": 1.6260938332731617e-05,
"loss": 1.3864,
"step": 157000
},
{
"epoch": 2.03,
"learning_rate": 1.615348909175306e-05,
"loss": 1.3862,
"step": 157500
},
{
"epoch": 2.04,
"learning_rate": 1.6046039850774494e-05,
"loss": 1.3787,
"step": 158000
},
{
"epoch": 2.04,
"learning_rate": 1.5938590609795935e-05,
"loss": 1.3957,
"step": 158500
},
{
"epoch": 2.05,
"learning_rate": 1.583114136881737e-05,
"loss": 1.3872,
"step": 159000
},
{
"epoch": 2.06,
"learning_rate": 1.572369212783881e-05,
"loss": 1.3876,
"step": 159500
},
{
"epoch": 2.06,
"learning_rate": 1.5616242886860246e-05,
"loss": 1.3809,
"step": 160000
},
{
"epoch": 2.07,
"learning_rate": 1.5508793645881688e-05,
"loss": 1.3876,
"step": 160500
},
{
"epoch": 2.08,
"learning_rate": 1.5401344404903126e-05,
"loss": 1.3877,
"step": 161000
},
{
"epoch": 2.08,
"learning_rate": 1.5293895163924564e-05,
"loss": 1.3849,
"step": 161500
},
{
"epoch": 2.09,
"learning_rate": 1.5186445922946002e-05,
"loss": 1.3926,
"step": 162000
},
{
"epoch": 2.1,
"learning_rate": 1.5078996681967439e-05,
"loss": 1.3987,
"step": 162500
},
{
"epoch": 2.1,
"learning_rate": 1.4971547440988879e-05,
"loss": 1.3969,
"step": 163000
},
{
"epoch": 2.11,
"learning_rate": 1.4864098200010315e-05,
"loss": 1.3839,
"step": 163500
},
{
"epoch": 2.11,
"learning_rate": 1.4756648959031755e-05,
"loss": 1.3856,
"step": 164000
},
{
"epoch": 2.12,
"learning_rate": 1.4649199718053191e-05,
"loss": 1.3782,
"step": 164500
},
{
"epoch": 2.13,
"learning_rate": 1.4541750477074631e-05,
"loss": 1.3977,
"step": 165000
},
{
"epoch": 2.13,
"learning_rate": 1.4434301236096068e-05,
"loss": 1.3821,
"step": 165500
},
{
"epoch": 2.14,
"learning_rate": 1.4326851995117508e-05,
"loss": 1.3767,
"step": 166000
},
{
"epoch": 2.15,
"learning_rate": 1.4219402754138944e-05,
"loss": 1.3843,
"step": 166500
},
{
"epoch": 2.15,
"learning_rate": 1.4111953513160384e-05,
"loss": 1.4,
"step": 167000
},
{
"epoch": 2.16,
"learning_rate": 1.4004504272181824e-05,
"loss": 1.3805,
"step": 167500
},
{
"epoch": 2.17,
"learning_rate": 1.389705503120326e-05,
"loss": 1.3773,
"step": 168000
},
{
"epoch": 2.17,
"learning_rate": 1.37896057902247e-05,
"loss": 1.3999,
"step": 168500
},
{
"epoch": 2.18,
"learning_rate": 1.3682156549246137e-05,
"loss": 1.4037,
"step": 169000
},
{
"epoch": 2.19,
"learning_rate": 1.3574707308267577e-05,
"loss": 1.3802,
"step": 169500
},
{
"epoch": 2.19,
"learning_rate": 1.3467258067289013e-05,
"loss": 1.3954,
"step": 170000
},
{
"epoch": 2.2,
"learning_rate": 1.3359808826310453e-05,
"loss": 1.3859,
"step": 170500
},
{
"epoch": 2.2,
"learning_rate": 1.325235958533189e-05,
"loss": 1.3907,
"step": 171000
},
{
"epoch": 2.21,
"learning_rate": 1.314491034435333e-05,
"loss": 1.3807,
"step": 171500
},
{
"epoch": 2.22,
"learning_rate": 1.3037461103374766e-05,
"loss": 1.386,
"step": 172000
},
{
"epoch": 2.22,
"learning_rate": 1.2930011862396206e-05,
"loss": 1.3791,
"step": 172500
},
{
"epoch": 2.23,
"learning_rate": 1.2822562621417644e-05,
"loss": 1.3836,
"step": 173000
},
{
"epoch": 2.24,
"learning_rate": 1.271511338043908e-05,
"loss": 1.3988,
"step": 173500
},
{
"epoch": 2.24,
"learning_rate": 1.260766413946052e-05,
"loss": 1.3981,
"step": 174000
},
{
"epoch": 2.25,
"learning_rate": 1.2500214898481956e-05,
"loss": 1.384,
"step": 174500
},
{
"epoch": 2.26,
"learning_rate": 1.2392765657503395e-05,
"loss": 1.3789,
"step": 175000
},
{
"epoch": 2.26,
"learning_rate": 1.2285316416524835e-05,
"loss": 1.3788,
"step": 175500
},
{
"epoch": 2.27,
"learning_rate": 1.2177867175546273e-05,
"loss": 1.3833,
"step": 176000
},
{
"epoch": 2.28,
"learning_rate": 1.207041793456771e-05,
"loss": 1.395,
"step": 176500
},
{
"epoch": 2.28,
"learning_rate": 1.1962968693589149e-05,
"loss": 1.3814,
"step": 177000
},
{
"epoch": 2.29,
"learning_rate": 1.1855519452610587e-05,
"loss": 1.3891,
"step": 177500
},
{
"epoch": 2.3,
"learning_rate": 1.1748070211632025e-05,
"loss": 1.3862,
"step": 178000
},
{
"epoch": 2.3,
"learning_rate": 1.1640620970653464e-05,
"loss": 1.3825,
"step": 178500
},
{
"epoch": 2.31,
"learning_rate": 1.1533171729674902e-05,
"loss": 1.3829,
"step": 179000
},
{
"epoch": 2.31,
"learning_rate": 1.142572248869634e-05,
"loss": 1.3902,
"step": 179500
},
{
"epoch": 2.32,
"learning_rate": 1.1318273247717778e-05,
"loss": 1.3772,
"step": 180000
},
{
"epoch": 2.33,
"learning_rate": 1.1210824006739216e-05,
"loss": 1.3812,
"step": 180500
},
{
"epoch": 2.33,
"learning_rate": 1.1103374765760654e-05,
"loss": 1.3847,
"step": 181000
},
{
"epoch": 2.34,
"learning_rate": 1.0995925524782094e-05,
"loss": 1.3717,
"step": 181500
},
{
"epoch": 2.35,
"learning_rate": 1.0888476283803532e-05,
"loss": 1.3807,
"step": 182000
},
{
"epoch": 2.35,
"learning_rate": 1.078102704282497e-05,
"loss": 1.386,
"step": 182500
},
{
"epoch": 2.36,
"learning_rate": 1.0673577801846409e-05,
"loss": 1.3868,
"step": 183000
},
{
"epoch": 2.37,
"learning_rate": 1.0566128560867847e-05,
"loss": 1.381,
"step": 183500
},
{
"epoch": 2.37,
"learning_rate": 1.0458679319889285e-05,
"loss": 1.3821,
"step": 184000
},
{
"epoch": 2.38,
"learning_rate": 1.0351230078910723e-05,
"loss": 1.3698,
"step": 184500
},
{
"epoch": 2.39,
"learning_rate": 1.0243780837932161e-05,
"loss": 1.3901,
"step": 185000
},
{
"epoch": 2.39,
"learning_rate": 1.01363315969536e-05,
"loss": 1.3899,
"step": 185500
},
{
"epoch": 2.4,
"learning_rate": 1.0028882355975038e-05,
"loss": 1.4006,
"step": 186000
},
{
"epoch": 2.4,
"learning_rate": 9.921433114996476e-06,
"loss": 1.3882,
"step": 186500
},
{
"epoch": 2.41,
"learning_rate": 9.813983874017914e-06,
"loss": 1.3883,
"step": 187000
},
{
"epoch": 2.42,
"learning_rate": 9.706534633039354e-06,
"loss": 1.3821,
"step": 187500
},
{
"epoch": 2.42,
"learning_rate": 9.599085392060792e-06,
"loss": 1.3829,
"step": 188000
},
{
"epoch": 2.43,
"learning_rate": 9.49163615108223e-06,
"loss": 1.3901,
"step": 188500
},
{
"epoch": 2.44,
"learning_rate": 9.384186910103668e-06,
"loss": 1.3812,
"step": 189000
},
{
"epoch": 2.44,
"learning_rate": 9.276737669125107e-06,
"loss": 1.3726,
"step": 189500
},
{
"epoch": 2.45,
"learning_rate": 9.169288428146545e-06,
"loss": 1.3926,
"step": 190000
},
{
"epoch": 2.46,
"learning_rate": 9.061839187167983e-06,
"loss": 1.3834,
"step": 190500
},
{
"epoch": 2.46,
"learning_rate": 8.954389946189421e-06,
"loss": 1.3774,
"step": 191000
},
{
"epoch": 2.47,
"learning_rate": 8.84694070521086e-06,
"loss": 1.3773,
"step": 191500
},
{
"epoch": 2.48,
"learning_rate": 8.739491464232297e-06,
"loss": 1.3942,
"step": 192000
},
{
"epoch": 2.48,
"learning_rate": 8.632042223253736e-06,
"loss": 1.385,
"step": 192500
},
{
"epoch": 2.49,
"learning_rate": 8.524592982275174e-06,
"loss": 1.3853,
"step": 193000
},
{
"epoch": 2.49,
"learning_rate": 8.417143741296612e-06,
"loss": 1.3675,
"step": 193500
},
{
"epoch": 2.5,
"learning_rate": 8.30969450031805e-06,
"loss": 1.3836,
"step": 194000
},
{
"epoch": 2.51,
"learning_rate": 8.202245259339488e-06,
"loss": 1.3815,
"step": 194500
},
{
"epoch": 2.51,
"learning_rate": 8.094796018360926e-06,
"loss": 1.3839,
"step": 195000
},
{
"epoch": 2.52,
"learning_rate": 7.987346777382365e-06,
"loss": 1.376,
"step": 195500
},
{
"epoch": 2.53,
"learning_rate": 7.879897536403803e-06,
"loss": 1.3788,
"step": 196000
},
{
"epoch": 2.53,
"learning_rate": 7.772448295425241e-06,
"loss": 1.3846,
"step": 196500
},
{
"epoch": 2.54,
"learning_rate": 7.664999054446679e-06,
"loss": 1.3923,
"step": 197000
},
{
"epoch": 2.55,
"learning_rate": 7.557549813468118e-06,
"loss": 1.3731,
"step": 197500
},
{
"epoch": 2.55,
"learning_rate": 7.450100572489556e-06,
"loss": 1.3886,
"step": 198000
},
{
"epoch": 2.56,
"learning_rate": 7.342651331510994e-06,
"loss": 1.3817,
"step": 198500
},
{
"epoch": 2.57,
"learning_rate": 7.2352020905324326e-06,
"loss": 1.3872,
"step": 199000
},
{
"epoch": 2.57,
"learning_rate": 7.127752849553872e-06,
"loss": 1.3788,
"step": 199500
},
{
"epoch": 2.58,
"learning_rate": 7.02030360857531e-06,
"loss": 1.3854,
"step": 200000
},
{
"epoch": 2.59,
"learning_rate": 6.912854367596748e-06,
"loss": 1.3679,
"step": 200500
},
{
"epoch": 2.59,
"learning_rate": 6.805405126618186e-06,
"loss": 1.381,
"step": 201000
},
{
"epoch": 2.6,
"learning_rate": 6.697955885639624e-06,
"loss": 1.383,
"step": 201500
},
{
"epoch": 2.6,
"learning_rate": 6.5905066446610624e-06,
"loss": 1.3757,
"step": 202000
},
{
"epoch": 2.61,
"learning_rate": 6.483057403682501e-06,
"loss": 1.3687,
"step": 202500
},
{
"epoch": 2.62,
"learning_rate": 6.375608162703939e-06,
"loss": 1.3739,
"step": 203000
},
{
"epoch": 2.62,
"learning_rate": 6.268158921725377e-06,
"loss": 1.3737,
"step": 203500
},
{
"epoch": 2.63,
"learning_rate": 6.160709680746816e-06,
"loss": 1.3787,
"step": 204000
},
{
"epoch": 2.64,
"learning_rate": 6.053260439768254e-06,
"loss": 1.3832,
"step": 204500
},
{
"epoch": 2.64,
"learning_rate": 5.945811198789692e-06,
"loss": 1.3734,
"step": 205000
},
{
"epoch": 2.65,
"learning_rate": 5.8383619578111304e-06,
"loss": 1.3847,
"step": 205500
},
{
"epoch": 2.66,
"learning_rate": 5.730912716832569e-06,
"loss": 1.3635,
"step": 206000
},
{
"epoch": 2.66,
"learning_rate": 5.623463475854007e-06,
"loss": 1.3768,
"step": 206500
},
{
"epoch": 2.67,
"learning_rate": 5.516014234875446e-06,
"loss": 1.3644,
"step": 207000
},
{
"epoch": 2.68,
"learning_rate": 5.408564993896884e-06,
"loss": 1.3766,
"step": 207500
},
{
"epoch": 2.68,
"learning_rate": 5.301115752918321e-06,
"loss": 1.3745,
"step": 208000
},
{
"epoch": 2.69,
"learning_rate": 5.1936665119397594e-06,
"loss": 1.3779,
"step": 208500
},
{
"epoch": 2.69,
"learning_rate": 5.086217270961198e-06,
"loss": 1.3693,
"step": 209000
},
{
"epoch": 2.7,
"learning_rate": 4.978768029982636e-06,
"loss": 1.366,
"step": 209500
},
{
"epoch": 2.71,
"learning_rate": 4.871318789004075e-06,
"loss": 1.3817,
"step": 210000
},
{
"epoch": 2.71,
"learning_rate": 4.763869548025513e-06,
"loss": 1.384,
"step": 210500
},
{
"epoch": 2.72,
"learning_rate": 4.656420307046951e-06,
"loss": 1.3692,
"step": 211000
},
{
"epoch": 2.73,
"learning_rate": 4.548971066068389e-06,
"loss": 1.3751,
"step": 211500
},
{
"epoch": 2.73,
"learning_rate": 4.4415218250898275e-06,
"loss": 1.3787,
"step": 212000
},
{
"epoch": 2.74,
"learning_rate": 4.334072584111266e-06,
"loss": 1.3943,
"step": 212500
},
{
"epoch": 2.75,
"learning_rate": 4.226623343132705e-06,
"loss": 1.3679,
"step": 213000
},
{
"epoch": 2.75,
"learning_rate": 4.119174102154143e-06,
"loss": 1.3642,
"step": 213500
},
{
"epoch": 2.76,
"learning_rate": 4.011724861175581e-06,
"loss": 1.3677,
"step": 214000
},
{
"epoch": 2.77,
"learning_rate": 3.904275620197019e-06,
"loss": 1.3725,
"step": 214500
},
{
"epoch": 2.77,
"learning_rate": 3.7968263792184573e-06,
"loss": 1.3699,
"step": 215000
},
{
"epoch": 2.78,
"learning_rate": 3.6893771382398955e-06,
"loss": 1.3676,
"step": 215500
},
{
"epoch": 2.79,
"learning_rate": 3.581927897261334e-06,
"loss": 1.3639,
"step": 216000
},
{
"epoch": 2.79,
"learning_rate": 3.4744786562827722e-06,
"loss": 1.3669,
"step": 216500
},
{
"epoch": 2.8,
"learning_rate": 3.3670294153042104e-06,
"loss": 1.3725,
"step": 217000
},
{
"epoch": 2.8,
"learning_rate": 3.2595801743256486e-06,
"loss": 1.3801,
"step": 217500
},
{
"epoch": 2.81,
"learning_rate": 3.1521309333470867e-06,
"loss": 1.3683,
"step": 218000
},
{
"epoch": 2.82,
"learning_rate": 3.0446816923685253e-06,
"loss": 1.3764,
"step": 218500
},
{
"epoch": 2.82,
"learning_rate": 2.9372324513899635e-06,
"loss": 1.3641,
"step": 219000
},
{
"epoch": 2.83,
"learning_rate": 2.829783210411402e-06,
"loss": 1.3707,
"step": 219500
},
{
"epoch": 2.84,
"learning_rate": 2.7223339694328402e-06,
"loss": 1.3741,
"step": 220000
},
{
"epoch": 2.84,
"learning_rate": 2.6148847284542784e-06,
"loss": 1.3637,
"step": 220500
},
{
"epoch": 2.85,
"learning_rate": 2.5074354874757166e-06,
"loss": 1.363,
"step": 221000
},
{
"epoch": 2.86,
"learning_rate": 2.3999862464971547e-06,
"loss": 1.3676,
"step": 221500
},
{
"epoch": 2.86,
"learning_rate": 2.292537005518593e-06,
"loss": 1.3681,
"step": 222000
},
{
"epoch": 2.87,
"learning_rate": 2.1850877645400315e-06,
"loss": 1.3713,
"step": 222500
},
{
"epoch": 2.88,
"learning_rate": 2.0776385235614697e-06,
"loss": 1.3704,
"step": 223000
},
{
"epoch": 2.88,
"learning_rate": 1.970189282582908e-06,
"loss": 1.3762,
"step": 223500
},
{
"epoch": 2.89,
"learning_rate": 1.8627400416043464e-06,
"loss": 1.3644,
"step": 224000
},
{
"epoch": 2.89,
"learning_rate": 1.7552908006257844e-06,
"loss": 1.3815,
"step": 224500
},
{
"epoch": 2.9,
"learning_rate": 1.6478415596472225e-06,
"loss": 1.3706,
"step": 225000
},
{
"epoch": 2.91,
"learning_rate": 1.540392318668661e-06,
"loss": 1.3705,
"step": 225500
},
{
"epoch": 2.91,
"learning_rate": 1.4329430776900993e-06,
"loss": 1.376,
"step": 226000
},
{
"epoch": 2.92,
"learning_rate": 1.3254938367115377e-06,
"loss": 1.3704,
"step": 226500
},
{
"epoch": 2.93,
"learning_rate": 1.2180445957329758e-06,
"loss": 1.3691,
"step": 227000
},
{
"epoch": 2.93,
"learning_rate": 1.110595354754414e-06,
"loss": 1.3753,
"step": 227500
},
{
"epoch": 2.94,
"learning_rate": 1.0031461137758524e-06,
"loss": 1.3553,
"step": 228000
},
{
"epoch": 2.95,
"learning_rate": 8.956968727972906e-07,
"loss": 1.3726,
"step": 228500
},
{
"epoch": 2.95,
"learning_rate": 7.882476318187288e-07,
"loss": 1.3704,
"step": 229000
},
{
"epoch": 2.96,
"learning_rate": 6.807983908401671e-07,
"loss": 1.3674,
"step": 229500
},
{
"epoch": 2.97,
"learning_rate": 5.733491498616055e-07,
"loss": 1.3578,
"step": 230000
},
{
"epoch": 2.97,
"learning_rate": 4.658999088830437e-07,
"loss": 1.3761,
"step": 230500
},
{
"epoch": 2.98,
"learning_rate": 3.5845066790448197e-07,
"loss": 1.3656,
"step": 231000
},
{
"epoch": 2.98,
"learning_rate": 2.510014269259202e-07,
"loss": 1.3663,
"step": 231500
},
{
"epoch": 2.99,
"learning_rate": 1.4355218594735846e-07,
"loss": 1.3605,
"step": 232000
},
{
"epoch": 3.0,
"learning_rate": 3.610294496879674e-08,
"loss": 1.367,
"step": 232500
},
{
"epoch": 3.0,
"step": 232668,
"total_flos": 6.971496629325005e+16,
"train_loss": 1.5098053402305296,
"train_runtime": 78093.1182,
"train_samples_per_second": 47.669,
"train_steps_per_second": 2.979
}
],
"max_steps": 232668,
"num_train_epochs": 3,
"total_flos": 6.971496629325005e+16,
"trial_name": null,
"trial_params": null
}