|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.9998452192082965, |
|
"eval_steps": 12921, |
|
"global_step": 103368, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003869519792593739, |
|
"grad_norm": 773.3179931640625, |
|
"learning_rate": 1.9992260960414815e-05, |
|
"loss": 17.8357, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.007739039585187478, |
|
"grad_norm": 327.2199401855469, |
|
"learning_rate": 1.9984521920829628e-05, |
|
"loss": 11.1504, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.011608559377781217, |
|
"grad_norm": 78.84376525878906, |
|
"learning_rate": 1.9976782881244438e-05, |
|
"loss": 8.0846, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.015478079170374956, |
|
"grad_norm": 78.74186706542969, |
|
"learning_rate": 1.9969043841659254e-05, |
|
"loss": 6.1805, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.019347598962968695, |
|
"grad_norm": 10.000066757202148, |
|
"learning_rate": 1.9961304802074064e-05, |
|
"loss": 4.9342, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.023217118755562434, |
|
"grad_norm": 11.822905540466309, |
|
"learning_rate": 1.9953565762488877e-05, |
|
"loss": 4.1502, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.027086638548156173, |
|
"grad_norm": 7.014156818389893, |
|
"learning_rate": 1.994582672290369e-05, |
|
"loss": 3.7055, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.030956158340749912, |
|
"grad_norm": 9.273381233215332, |
|
"learning_rate": 1.99380876833185e-05, |
|
"loss": 3.3966, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.034825678133343654, |
|
"grad_norm": 8.681159973144531, |
|
"learning_rate": 1.9930348643733316e-05, |
|
"loss": 3.1075, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.03869519792593739, |
|
"grad_norm": 5.212049961090088, |
|
"learning_rate": 1.9922609604148126e-05, |
|
"loss": 2.9555, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.04256471771853113, |
|
"grad_norm": 5.840594291687012, |
|
"learning_rate": 1.991487056456294e-05, |
|
"loss": 2.8528, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.04643423751112487, |
|
"grad_norm": 5.89320707321167, |
|
"learning_rate": 1.9907131524977752e-05, |
|
"loss": 2.6982, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.05030375730371861, |
|
"grad_norm": 9.517736434936523, |
|
"learning_rate": 1.9899392485392565e-05, |
|
"loss": 2.5849, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.054173277096312346, |
|
"grad_norm": 10.960714340209961, |
|
"learning_rate": 1.989165344580738e-05, |
|
"loss": 2.5084, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.05804279688890609, |
|
"grad_norm": 5.337602615356445, |
|
"learning_rate": 1.9883914406222188e-05, |
|
"loss": 2.4456, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.061912316681499824, |
|
"grad_norm": 7.735531806945801, |
|
"learning_rate": 1.9876175366637e-05, |
|
"loss": 2.33, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.06578183647409357, |
|
"grad_norm": 7.02548885345459, |
|
"learning_rate": 1.9868436327051814e-05, |
|
"loss": 2.3016, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.06965135626668731, |
|
"grad_norm": 4.705838680267334, |
|
"learning_rate": 1.9860697287466627e-05, |
|
"loss": 2.2375, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.07352087605928104, |
|
"grad_norm": 4.2120256423950195, |
|
"learning_rate": 1.985295824788144e-05, |
|
"loss": 2.2007, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.07739039585187478, |
|
"grad_norm": 6.315709114074707, |
|
"learning_rate": 1.984521920829625e-05, |
|
"loss": 2.161, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.08125991564446852, |
|
"grad_norm": 5.227051734924316, |
|
"learning_rate": 1.9837480168711067e-05, |
|
"loss": 2.0787, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.08512943543706226, |
|
"grad_norm": 7.011941432952881, |
|
"learning_rate": 1.9829741129125876e-05, |
|
"loss": 2.0563, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.088998955229656, |
|
"grad_norm": 6.2574143409729, |
|
"learning_rate": 1.982200208954069e-05, |
|
"loss": 1.9629, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.09286847502224974, |
|
"grad_norm": 6.2574381828308105, |
|
"learning_rate": 1.9814263049955503e-05, |
|
"loss": 1.9871, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.09673799481484348, |
|
"grad_norm": 6.882974147796631, |
|
"learning_rate": 1.9806524010370312e-05, |
|
"loss": 1.9569, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.10060751460743722, |
|
"grad_norm": 7.7587199211120605, |
|
"learning_rate": 1.979878497078513e-05, |
|
"loss": 1.8799, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.10447703440003096, |
|
"grad_norm": 4.561262607574463, |
|
"learning_rate": 1.979104593119994e-05, |
|
"loss": 1.8605, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.10834655419262469, |
|
"grad_norm": 7.243245601654053, |
|
"learning_rate": 1.978330689161475e-05, |
|
"loss": 1.8433, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.11221607398521843, |
|
"grad_norm": 5.04033088684082, |
|
"learning_rate": 1.9775567852029565e-05, |
|
"loss": 1.8112, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.11608559377781218, |
|
"grad_norm": 4.307803630828857, |
|
"learning_rate": 1.9767828812444378e-05, |
|
"loss": 1.7924, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.11995511357040592, |
|
"grad_norm": 5.578695774078369, |
|
"learning_rate": 1.976008977285919e-05, |
|
"loss": 1.8002, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.12382463336299965, |
|
"grad_norm": 5.035937309265137, |
|
"learning_rate": 1.9752350733274e-05, |
|
"loss": 1.7425, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.1276941531555934, |
|
"grad_norm": 5.792506694793701, |
|
"learning_rate": 1.9744611693688817e-05, |
|
"loss": 1.6971, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.13156367294818713, |
|
"grad_norm": 3.9911370277404785, |
|
"learning_rate": 1.9736872654103627e-05, |
|
"loss": 1.673, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.13543319274078086, |
|
"grad_norm": 6.748035907745361, |
|
"learning_rate": 1.972913361451844e-05, |
|
"loss": 1.673, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.13930271253337462, |
|
"grad_norm": 6.547277450561523, |
|
"learning_rate": 1.9721394574933253e-05, |
|
"loss": 1.6237, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.14317223232596835, |
|
"grad_norm": 4.524691581726074, |
|
"learning_rate": 1.9713655535348063e-05, |
|
"loss": 1.6557, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.14704175211856207, |
|
"grad_norm": 4.387489318847656, |
|
"learning_rate": 1.970591649576288e-05, |
|
"loss": 1.611, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.15091127191115583, |
|
"grad_norm": 4.25224494934082, |
|
"learning_rate": 1.969817745617769e-05, |
|
"loss": 1.5658, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.15478079170374956, |
|
"grad_norm": 3.9016988277435303, |
|
"learning_rate": 1.9690438416592502e-05, |
|
"loss": 1.5737, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.15865031149634332, |
|
"grad_norm": 4.547232151031494, |
|
"learning_rate": 1.9682699377007315e-05, |
|
"loss": 1.5481, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.16251983128893704, |
|
"grad_norm": 5.446467399597168, |
|
"learning_rate": 1.9674960337422125e-05, |
|
"loss": 1.5275, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.16638935108153077, |
|
"grad_norm": 5.614628791809082, |
|
"learning_rate": 1.966722129783694e-05, |
|
"loss": 1.519, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.17025887087412453, |
|
"grad_norm": 9.042234420776367, |
|
"learning_rate": 1.965948225825175e-05, |
|
"loss": 1.4762, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.17412839066671826, |
|
"grad_norm": 3.2087180614471436, |
|
"learning_rate": 1.9651743218666564e-05, |
|
"loss": 1.5133, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.177997910459312, |
|
"grad_norm": 4.113743305206299, |
|
"learning_rate": 1.9644004179081377e-05, |
|
"loss": 1.4572, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.18186743025190574, |
|
"grad_norm": 3.658928155899048, |
|
"learning_rate": 1.963626513949619e-05, |
|
"loss": 1.4591, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.18573695004449947, |
|
"grad_norm": 3.779625177383423, |
|
"learning_rate": 1.9628526099911004e-05, |
|
"loss": 1.4307, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.18960646983709323, |
|
"grad_norm": 3.7647228240966797, |
|
"learning_rate": 1.9620787060325813e-05, |
|
"loss": 1.4438, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.19347598962968696, |
|
"grad_norm": 3.0192203521728516, |
|
"learning_rate": 1.961304802074063e-05, |
|
"loss": 1.53, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.19734550942228068, |
|
"grad_norm": 4.725248336791992, |
|
"learning_rate": 1.960530898115544e-05, |
|
"loss": 1.3875, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.20121502921487444, |
|
"grad_norm": 4.87757682800293, |
|
"learning_rate": 1.9597569941570253e-05, |
|
"loss": 1.3919, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.20508454900746817, |
|
"grad_norm": 6.212441921234131, |
|
"learning_rate": 1.9589830901985066e-05, |
|
"loss": 1.3626, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.20895406880006193, |
|
"grad_norm": 6.060988903045654, |
|
"learning_rate": 1.9582091862399875e-05, |
|
"loss": 1.3816, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.21282358859265565, |
|
"grad_norm": 3.0998196601867676, |
|
"learning_rate": 1.9574352822814692e-05, |
|
"loss": 1.2983, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.21669310838524938, |
|
"grad_norm": 9.51986026763916, |
|
"learning_rate": 1.95666137832295e-05, |
|
"loss": 1.306, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.22056262817784314, |
|
"grad_norm": 3.9112932682037354, |
|
"learning_rate": 1.9558874743644315e-05, |
|
"loss": 1.2809, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.22443214797043687, |
|
"grad_norm": 3.84000563621521, |
|
"learning_rate": 1.9551135704059128e-05, |
|
"loss": 1.3083, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.2283016677630306, |
|
"grad_norm": 4.876347541809082, |
|
"learning_rate": 1.954339666447394e-05, |
|
"loss": 1.2847, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.23217118755562435, |
|
"grad_norm": 4.156209468841553, |
|
"learning_rate": 1.9535657624888754e-05, |
|
"loss": 1.2499, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.23604070734821808, |
|
"grad_norm": 4.460844039916992, |
|
"learning_rate": 1.9527918585303564e-05, |
|
"loss": 1.2547, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.23991022714081184, |
|
"grad_norm": 3.557577133178711, |
|
"learning_rate": 1.9520179545718377e-05, |
|
"loss": 1.2362, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.24377974693340557, |
|
"grad_norm": 4.56212854385376, |
|
"learning_rate": 1.951244050613319e-05, |
|
"loss": 1.2362, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.2476492667259993, |
|
"grad_norm": 2.713700294494629, |
|
"learning_rate": 1.9504701466548003e-05, |
|
"loss": 1.2355, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.25151878651859305, |
|
"grad_norm": 3.288674831390381, |
|
"learning_rate": 1.9496962426962816e-05, |
|
"loss": 1.2082, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.2553883063111868, |
|
"grad_norm": 3.3763837814331055, |
|
"learning_rate": 1.9489223387377626e-05, |
|
"loss": 1.2144, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.2592578261037805, |
|
"grad_norm": 4.2473297119140625, |
|
"learning_rate": 1.9481484347792442e-05, |
|
"loss": 1.2034, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.26312734589637427, |
|
"grad_norm": 3.2804677486419678, |
|
"learning_rate": 1.9473745308207252e-05, |
|
"loss": 1.1913, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.266996865688968, |
|
"grad_norm": 4.3531060218811035, |
|
"learning_rate": 1.9466006268622065e-05, |
|
"loss": 1.1794, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.2708663854815617, |
|
"grad_norm": 4.2846503257751465, |
|
"learning_rate": 1.9458267229036878e-05, |
|
"loss": 1.2, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.2747359052741555, |
|
"grad_norm": 4.860626220703125, |
|
"learning_rate": 1.945052818945169e-05, |
|
"loss": 1.1758, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.27860542506674923, |
|
"grad_norm": 5.262426376342773, |
|
"learning_rate": 1.9442789149866504e-05, |
|
"loss": 1.1542, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.28247494485934294, |
|
"grad_norm": 2.705568552017212, |
|
"learning_rate": 1.9435050110281314e-05, |
|
"loss": 1.1487, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.2863444646519367, |
|
"grad_norm": 2.9500765800476074, |
|
"learning_rate": 1.9427311070696127e-05, |
|
"loss": 1.15, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.29021398444453045, |
|
"grad_norm": 3.6077990531921387, |
|
"learning_rate": 1.941957203111094e-05, |
|
"loss": 1.1282, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.29408350423712415, |
|
"grad_norm": 4.17680549621582, |
|
"learning_rate": 1.9411832991525753e-05, |
|
"loss": 1.122, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.2979530240297179, |
|
"grad_norm": 4.31356954574585, |
|
"learning_rate": 1.9404093951940567e-05, |
|
"loss": 1.1312, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.30182254382231166, |
|
"grad_norm": 4.263576984405518, |
|
"learning_rate": 1.9396354912355376e-05, |
|
"loss": 1.0749, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.30569206361490536, |
|
"grad_norm": 5.233335971832275, |
|
"learning_rate": 1.9388615872770193e-05, |
|
"loss": 1.1188, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.3095615834074991, |
|
"grad_norm": 4.0638747215271, |
|
"learning_rate": 1.9380876833185003e-05, |
|
"loss": 1.0796, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.3134311032000929, |
|
"grad_norm": 3.467556953430176, |
|
"learning_rate": 1.9373137793599816e-05, |
|
"loss": 1.0611, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.31730062299268663, |
|
"grad_norm": 3.4867913722991943, |
|
"learning_rate": 1.936539875401463e-05, |
|
"loss": 1.1273, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.32117014278528033, |
|
"grad_norm": 5.139125347137451, |
|
"learning_rate": 1.935765971442944e-05, |
|
"loss": 1.076, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.3250396625778741, |
|
"grad_norm": 2.705803632736206, |
|
"learning_rate": 1.9349920674844255e-05, |
|
"loss": 1.0684, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.32890918237046785, |
|
"grad_norm": 3.1156651973724365, |
|
"learning_rate": 1.9342181635259065e-05, |
|
"loss": 1.046, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.33277870216306155, |
|
"grad_norm": 2.9857335090637207, |
|
"learning_rate": 1.9334442595673878e-05, |
|
"loss": 1.057, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.3366482219556553, |
|
"grad_norm": 4.149692535400391, |
|
"learning_rate": 1.932670355608869e-05, |
|
"loss": 1.0911, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.34051774174824906, |
|
"grad_norm": 3.3451409339904785, |
|
"learning_rate": 1.9318964516503504e-05, |
|
"loss": 1.0637, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.34438726154084276, |
|
"grad_norm": 4.110273361206055, |
|
"learning_rate": 1.9311225476918317e-05, |
|
"loss": 1.0242, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.3482567813334365, |
|
"grad_norm": 2.951718807220459, |
|
"learning_rate": 1.9303486437333127e-05, |
|
"loss": 1.0712, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.35212630112603027, |
|
"grad_norm": 3.8159780502319336, |
|
"learning_rate": 1.929574739774794e-05, |
|
"loss": 1.0474, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.355995820918624, |
|
"grad_norm": 2.9727439880371094, |
|
"learning_rate": 1.9288008358162753e-05, |
|
"loss": 1.0247, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.35986534071121773, |
|
"grad_norm": 3.576489210128784, |
|
"learning_rate": 1.9280269318577566e-05, |
|
"loss": 0.995, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.3637348605038115, |
|
"grad_norm": 2.537593364715576, |
|
"learning_rate": 1.927253027899238e-05, |
|
"loss": 1.0405, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.36760438029640524, |
|
"grad_norm": 5.178198337554932, |
|
"learning_rate": 1.926479123940719e-05, |
|
"loss": 1.0071, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.37147390008899894, |
|
"grad_norm": 3.5340261459350586, |
|
"learning_rate": 1.9257052199822005e-05, |
|
"loss": 1.0146, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.3753434198815927, |
|
"grad_norm": 4.672996997833252, |
|
"learning_rate": 1.9249313160236815e-05, |
|
"loss": 1.0172, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.37921293967418646, |
|
"grad_norm": 2.453150510787964, |
|
"learning_rate": 1.9241574120651628e-05, |
|
"loss": 0.9778, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.38308245946678016, |
|
"grad_norm": 2.8062591552734375, |
|
"learning_rate": 1.923383508106644e-05, |
|
"loss": 1.0202, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.3869519792593739, |
|
"grad_norm": 3.405714750289917, |
|
"learning_rate": 1.9226096041481254e-05, |
|
"loss": 0.967, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.39082149905196767, |
|
"grad_norm": 2.818267583847046, |
|
"learning_rate": 1.9218357001896068e-05, |
|
"loss": 0.9691, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.39469101884456137, |
|
"grad_norm": 3.632185935974121, |
|
"learning_rate": 1.9210617962310877e-05, |
|
"loss": 0.9607, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.3985605386371551, |
|
"grad_norm": 4.022298812866211, |
|
"learning_rate": 1.920287892272569e-05, |
|
"loss": 0.9908, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.4024300584297489, |
|
"grad_norm": 3.376326322555542, |
|
"learning_rate": 1.9195139883140503e-05, |
|
"loss": 0.9677, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.4062995782223426, |
|
"grad_norm": 4.996184825897217, |
|
"learning_rate": 1.9187400843555317e-05, |
|
"loss": 0.9734, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.41016909801493634, |
|
"grad_norm": 3.2259700298309326, |
|
"learning_rate": 1.917966180397013e-05, |
|
"loss": 0.9614, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.4140386178075301, |
|
"grad_norm": 2.585890531539917, |
|
"learning_rate": 1.917192276438494e-05, |
|
"loss": 0.9764, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.41790813760012385, |
|
"grad_norm": 4.138501167297363, |
|
"learning_rate": 1.9164183724799756e-05, |
|
"loss": 0.9556, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.42177765739271755, |
|
"grad_norm": 3.0705041885375977, |
|
"learning_rate": 1.9156444685214566e-05, |
|
"loss": 0.9427, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.4256471771853113, |
|
"grad_norm": 2.94376802444458, |
|
"learning_rate": 1.914870564562938e-05, |
|
"loss": 0.9299, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.42951669697790507, |
|
"grad_norm": 3.2788572311401367, |
|
"learning_rate": 1.9140966606044192e-05, |
|
"loss": 0.9562, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.43338621677049877, |
|
"grad_norm": 2.8515307903289795, |
|
"learning_rate": 1.9133227566459005e-05, |
|
"loss": 0.9731, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.4372557365630925, |
|
"grad_norm": 4.263568878173828, |
|
"learning_rate": 1.9125488526873818e-05, |
|
"loss": 0.9388, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.4411252563556863, |
|
"grad_norm": 2.5887269973754883, |
|
"learning_rate": 1.9117749487288628e-05, |
|
"loss": 0.943, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.44499477614828, |
|
"grad_norm": 4.111998081207275, |
|
"learning_rate": 1.911001044770344e-05, |
|
"loss": 0.935, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.44886429594087374, |
|
"grad_norm": 3.5757904052734375, |
|
"learning_rate": 1.9102271408118254e-05, |
|
"loss": 0.9166, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.4527338157334675, |
|
"grad_norm": 3.2234575748443604, |
|
"learning_rate": 1.9094532368533067e-05, |
|
"loss": 0.9086, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.4566033355260612, |
|
"grad_norm": 2.5244109630584717, |
|
"learning_rate": 1.908679332894788e-05, |
|
"loss": 0.8732, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.46047285531865495, |
|
"grad_norm": 2.221653699874878, |
|
"learning_rate": 1.907905428936269e-05, |
|
"loss": 0.9143, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.4643423751112487, |
|
"grad_norm": 3.898876428604126, |
|
"learning_rate": 1.9071315249777503e-05, |
|
"loss": 0.9297, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.4682118949038424, |
|
"grad_norm": 3.2549328804016113, |
|
"learning_rate": 1.9063576210192316e-05, |
|
"loss": 0.8753, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.47208141469643616, |
|
"grad_norm": 4.246420383453369, |
|
"learning_rate": 1.905583717060713e-05, |
|
"loss": 0.9222, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.4759509344890299, |
|
"grad_norm": 2.8742761611938477, |
|
"learning_rate": 1.9048098131021942e-05, |
|
"loss": 0.9314, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.4798204542816237, |
|
"grad_norm": 3.1842613220214844, |
|
"learning_rate": 1.9040359091436755e-05, |
|
"loss": 0.9167, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.4836899740742174, |
|
"grad_norm": 2.5616085529327393, |
|
"learning_rate": 1.903262005185157e-05, |
|
"loss": 0.9089, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.48755949386681113, |
|
"grad_norm": 2.4772250652313232, |
|
"learning_rate": 1.9024881012266378e-05, |
|
"loss": 0.8641, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.4914290136594049, |
|
"grad_norm": 2.5282464027404785, |
|
"learning_rate": 1.901714197268119e-05, |
|
"loss": 0.8717, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.4952985334519986, |
|
"grad_norm": 3.940554618835449, |
|
"learning_rate": 1.9009402933096004e-05, |
|
"loss": 0.9078, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.49916805324459235, |
|
"grad_norm": 4.964881420135498, |
|
"learning_rate": 1.9001663893510818e-05, |
|
"loss": 0.8716, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.499980652401037, |
|
"eval_loss": 0.560674250125885, |
|
"eval_runtime": 73.0307, |
|
"eval_samples_per_second": 28.604, |
|
"eval_steps_per_second": 3.588, |
|
"step": 12921 |
|
}, |
|
{ |
|
"epoch": 0.5030375730371861, |
|
"grad_norm": 3.7385573387145996, |
|
"learning_rate": 1.899392485392563e-05, |
|
"loss": 0.8925, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.5069070928297799, |
|
"grad_norm": 2.8294308185577393, |
|
"learning_rate": 1.898618581434044e-05, |
|
"loss": 0.8573, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.5107766126223736, |
|
"grad_norm": 2.6944656372070312, |
|
"learning_rate": 1.8978446774755253e-05, |
|
"loss": 0.8607, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.5146461324149673, |
|
"grad_norm": 3.5395185947418213, |
|
"learning_rate": 1.8970707735170067e-05, |
|
"loss": 0.8417, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.518515652207561, |
|
"grad_norm": 3.0177690982818604, |
|
"learning_rate": 1.896296869558488e-05, |
|
"loss": 0.8779, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.5223851720001548, |
|
"grad_norm": 2.879098653793335, |
|
"learning_rate": 1.8955229655999693e-05, |
|
"loss": 0.8702, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.5262546917927485, |
|
"grad_norm": 3.593395709991455, |
|
"learning_rate": 1.8947490616414506e-05, |
|
"loss": 0.8575, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.5301242115853423, |
|
"grad_norm": 4.0405120849609375, |
|
"learning_rate": 1.8939751576829316e-05, |
|
"loss": 0.8643, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.533993731377936, |
|
"grad_norm": 3.6186561584472656, |
|
"learning_rate": 1.893201253724413e-05, |
|
"loss": 0.8688, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.5378632511705297, |
|
"grad_norm": 3.501770496368408, |
|
"learning_rate": 1.8924273497658942e-05, |
|
"loss": 0.8513, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.5417327709631234, |
|
"grad_norm": 3.8322582244873047, |
|
"learning_rate": 1.8916534458073755e-05, |
|
"loss": 0.8346, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.5456022907557172, |
|
"grad_norm": 4.2960076332092285, |
|
"learning_rate": 1.8908795418488568e-05, |
|
"loss": 0.8712, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.549471810548311, |
|
"grad_norm": 2.9287805557250977, |
|
"learning_rate": 1.890105637890338e-05, |
|
"loss": 0.8124, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.5533413303409047, |
|
"grad_norm": 3.2405707836151123, |
|
"learning_rate": 1.889331733931819e-05, |
|
"loss": 0.8415, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.5572108501334985, |
|
"grad_norm": 3.691862106323242, |
|
"learning_rate": 1.8885578299733004e-05, |
|
"loss": 0.8619, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.5610803699260922, |
|
"grad_norm": 2.882659912109375, |
|
"learning_rate": 1.8877839260147817e-05, |
|
"loss": 0.8455, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.5649498897186859, |
|
"grad_norm": 2.3164443969726562, |
|
"learning_rate": 1.887010022056263e-05, |
|
"loss": 0.8698, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.5688194095112796, |
|
"grad_norm": 3.544872760772705, |
|
"learning_rate": 1.8862361180977443e-05, |
|
"loss": 0.8286, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.5726889293038734, |
|
"grad_norm": 3.035102605819702, |
|
"learning_rate": 1.8854622141392253e-05, |
|
"loss": 0.8389, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.5765584490964671, |
|
"grad_norm": 2.7800323963165283, |
|
"learning_rate": 1.8846883101807066e-05, |
|
"loss": 0.8363, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.5804279688890609, |
|
"grad_norm": 3.638786554336548, |
|
"learning_rate": 1.883914406222188e-05, |
|
"loss": 0.8323, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.5842974886816547, |
|
"grad_norm": 2.910550117492676, |
|
"learning_rate": 1.8831405022636692e-05, |
|
"loss": 0.8237, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.5881670084742483, |
|
"grad_norm": 4.035897254943848, |
|
"learning_rate": 1.8823665983051505e-05, |
|
"loss": 0.8124, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.592036528266842, |
|
"grad_norm": 3.7201926708221436, |
|
"learning_rate": 1.881592694346632e-05, |
|
"loss": 0.8191, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.5959060480594358, |
|
"grad_norm": 2.1671581268310547, |
|
"learning_rate": 1.880818790388113e-05, |
|
"loss": 0.8263, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.5997755678520296, |
|
"grad_norm": 4.754736423492432, |
|
"learning_rate": 1.880044886429594e-05, |
|
"loss": 0.8119, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.6036450876446233, |
|
"grad_norm": 2.688183069229126, |
|
"learning_rate": 1.8792709824710754e-05, |
|
"loss": 0.7925, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.6075146074372171, |
|
"grad_norm": 2.091325283050537, |
|
"learning_rate": 1.8784970785125567e-05, |
|
"loss": 0.8011, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.6113841272298107, |
|
"grad_norm": 3.1910102367401123, |
|
"learning_rate": 1.877723174554038e-05, |
|
"loss": 0.7847, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.6152536470224045, |
|
"grad_norm": 2.0929980278015137, |
|
"learning_rate": 1.8769492705955194e-05, |
|
"loss": 0.8159, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.6191231668149982, |
|
"grad_norm": 3.227985382080078, |
|
"learning_rate": 1.8761753666370003e-05, |
|
"loss": 0.7704, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.622992686607592, |
|
"grad_norm": 3.1534597873687744, |
|
"learning_rate": 1.8754014626784817e-05, |
|
"loss": 0.8024, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.6268622064001858, |
|
"grad_norm": 3.8482093811035156, |
|
"learning_rate": 1.874627558719963e-05, |
|
"loss": 0.7787, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.6307317261927795, |
|
"grad_norm": 3.0121939182281494, |
|
"learning_rate": 1.8738536547614443e-05, |
|
"loss": 0.79, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.6346012459853733, |
|
"grad_norm": 2.6242785453796387, |
|
"learning_rate": 1.8730797508029256e-05, |
|
"loss": 0.7814, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.6384707657779669, |
|
"grad_norm": 5.697305679321289, |
|
"learning_rate": 1.872305846844407e-05, |
|
"loss": 0.7978, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.6423402855705607, |
|
"grad_norm": 2.5282928943634033, |
|
"learning_rate": 1.871531942885888e-05, |
|
"loss": 0.7724, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.6462098053631544, |
|
"grad_norm": 3.4947257041931152, |
|
"learning_rate": 1.8707580389273692e-05, |
|
"loss": 0.7707, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.6500793251557482, |
|
"grad_norm": 3.6135666370391846, |
|
"learning_rate": 1.8699841349688505e-05, |
|
"loss": 0.7615, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.6539488449483419, |
|
"grad_norm": 3.657271385192871, |
|
"learning_rate": 1.8692102310103318e-05, |
|
"loss": 0.7916, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.6578183647409357, |
|
"grad_norm": 2.918696403503418, |
|
"learning_rate": 1.868436327051813e-05, |
|
"loss": 0.7914, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.6616878845335293, |
|
"grad_norm": 2.597545862197876, |
|
"learning_rate": 1.8676624230932944e-05, |
|
"loss": 0.7772, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.6655574043261231, |
|
"grad_norm": 2.5721652507781982, |
|
"learning_rate": 1.8668885191347754e-05, |
|
"loss": 0.8055, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.6694269241187168, |
|
"grad_norm": 2.5288245677948, |
|
"learning_rate": 1.8661146151762567e-05, |
|
"loss": 0.7746, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.6732964439113106, |
|
"grad_norm": 3.3261797428131104, |
|
"learning_rate": 1.865340711217738e-05, |
|
"loss": 0.7615, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.6771659637039044, |
|
"grad_norm": 3.3527183532714844, |
|
"learning_rate": 1.8645668072592193e-05, |
|
"loss": 0.7824, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.6810354834964981, |
|
"grad_norm": 3.0245845317840576, |
|
"learning_rate": 1.8637929033007006e-05, |
|
"loss": 0.7901, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.6849050032890919, |
|
"grad_norm": 4.284315586090088, |
|
"learning_rate": 1.863018999342182e-05, |
|
"loss": 0.7635, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.6887745230816855, |
|
"grad_norm": 2.9628872871398926, |
|
"learning_rate": 1.862245095383663e-05, |
|
"loss": 0.7715, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.6926440428742793, |
|
"grad_norm": 3.209158182144165, |
|
"learning_rate": 1.8614711914251442e-05, |
|
"loss": 0.7549, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.696513562666873, |
|
"grad_norm": 2.8341355323791504, |
|
"learning_rate": 1.8606972874666255e-05, |
|
"loss": 0.745, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.7003830824594668, |
|
"grad_norm": 3.9047954082489014, |
|
"learning_rate": 1.859923383508107e-05, |
|
"loss": 0.7398, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.7042526022520605, |
|
"grad_norm": 2.646315097808838, |
|
"learning_rate": 1.859149479549588e-05, |
|
"loss": 0.7381, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.7081221220446543, |
|
"grad_norm": 2.5596678256988525, |
|
"learning_rate": 1.858375575591069e-05, |
|
"loss": 0.746, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.711991641837248, |
|
"grad_norm": 2.578354597091675, |
|
"learning_rate": 1.8576016716325504e-05, |
|
"loss": 0.7123, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.7158611616298417, |
|
"grad_norm": 2.1996634006500244, |
|
"learning_rate": 1.8568277676740317e-05, |
|
"loss": 0.75, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.7197306814224355, |
|
"grad_norm": 3.1088805198669434, |
|
"learning_rate": 1.856053863715513e-05, |
|
"loss": 0.7563, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.7236002012150292, |
|
"grad_norm": 2.6386542320251465, |
|
"learning_rate": 1.8552799597569944e-05, |
|
"loss": 0.7485, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.727469721007623, |
|
"grad_norm": 3.407057523727417, |
|
"learning_rate": 1.8545060557984757e-05, |
|
"loss": 0.7177, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.7313392408002167, |
|
"grad_norm": 3.8691439628601074, |
|
"learning_rate": 1.853732151839957e-05, |
|
"loss": 0.7687, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.7352087605928105, |
|
"grad_norm": 1.9367045164108276, |
|
"learning_rate": 1.852958247881438e-05, |
|
"loss": 0.7425, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.7390782803854041, |
|
"grad_norm": 6.34794807434082, |
|
"learning_rate": 1.8521843439229193e-05, |
|
"loss": 0.7474, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.7429478001779979, |
|
"grad_norm": 2.6807470321655273, |
|
"learning_rate": 1.8514104399644006e-05, |
|
"loss": 0.7401, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.7468173199705916, |
|
"grad_norm": 1.9673856496810913, |
|
"learning_rate": 1.850636536005882e-05, |
|
"loss": 0.7242, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.7506868397631854, |
|
"grad_norm": 3.6721489429473877, |
|
"learning_rate": 1.8498626320473632e-05, |
|
"loss": 0.7148, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.7545563595557792, |
|
"grad_norm": 2.662858724594116, |
|
"learning_rate": 1.8490887280888442e-05, |
|
"loss": 0.7486, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.7584258793483729, |
|
"grad_norm": 2.0240514278411865, |
|
"learning_rate": 1.8483148241303255e-05, |
|
"loss": 0.7493, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.7622953991409666, |
|
"grad_norm": 2.8386764526367188, |
|
"learning_rate": 1.8475409201718068e-05, |
|
"loss": 0.7272, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.7661649189335603, |
|
"grad_norm": 4.325782299041748, |
|
"learning_rate": 1.846767016213288e-05, |
|
"loss": 0.7411, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.7700344387261541, |
|
"grad_norm": 2.5033533573150635, |
|
"learning_rate": 1.8459931122547694e-05, |
|
"loss": 0.7289, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.7739039585187478, |
|
"grad_norm": 2.9453632831573486, |
|
"learning_rate": 1.8452192082962507e-05, |
|
"loss": 0.7278, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.7777734783113416, |
|
"grad_norm": 2.145141124725342, |
|
"learning_rate": 1.844445304337732e-05, |
|
"loss": 0.7131, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.7816429981039353, |
|
"grad_norm": 4.372563362121582, |
|
"learning_rate": 1.843671400379213e-05, |
|
"loss": 0.6891, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.7855125178965291, |
|
"grad_norm": 2.579414129257202, |
|
"learning_rate": 1.8428974964206943e-05, |
|
"loss": 0.7312, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.7893820376891227, |
|
"grad_norm": 2.5996174812316895, |
|
"learning_rate": 1.8421235924621756e-05, |
|
"loss": 0.7158, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.7932515574817165, |
|
"grad_norm": 2.689061164855957, |
|
"learning_rate": 1.841349688503657e-05, |
|
"loss": 0.7067, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.7971210772743103, |
|
"grad_norm": 3.0453779697418213, |
|
"learning_rate": 1.8405757845451382e-05, |
|
"loss": 0.7022, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.800990597066904, |
|
"grad_norm": 2.452270746231079, |
|
"learning_rate": 1.8398018805866192e-05, |
|
"loss": 0.6891, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.8048601168594978, |
|
"grad_norm": 2.2127304077148438, |
|
"learning_rate": 1.8390279766281005e-05, |
|
"loss": 0.7126, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.8087296366520915, |
|
"grad_norm": 3.050323486328125, |
|
"learning_rate": 1.838254072669582e-05, |
|
"loss": 0.7361, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.8125991564446852, |
|
"grad_norm": 3.5222108364105225, |
|
"learning_rate": 1.837480168711063e-05, |
|
"loss": 0.6958, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.8164686762372789, |
|
"grad_norm": 2.7050528526306152, |
|
"learning_rate": 1.8367062647525445e-05, |
|
"loss": 0.6882, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.8203381960298727, |
|
"grad_norm": 2.8139567375183105, |
|
"learning_rate": 1.8359323607940254e-05, |
|
"loss": 0.6871, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.8242077158224664, |
|
"grad_norm": 2.5572404861450195, |
|
"learning_rate": 1.835158456835507e-05, |
|
"loss": 0.7388, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.8280772356150602, |
|
"grad_norm": 3.583036184310913, |
|
"learning_rate": 1.834384552876988e-05, |
|
"loss": 0.687, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.831946755407654, |
|
"grad_norm": 2.7720303535461426, |
|
"learning_rate": 1.8336106489184694e-05, |
|
"loss": 0.69, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.8358162752002477, |
|
"grad_norm": 2.2616991996765137, |
|
"learning_rate": 1.8328367449599507e-05, |
|
"loss": 0.7008, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.8396857949928414, |
|
"grad_norm": 2.4385766983032227, |
|
"learning_rate": 1.832062841001432e-05, |
|
"loss": 0.7132, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.8435553147854351, |
|
"grad_norm": 3.018012046813965, |
|
"learning_rate": 1.8312889370429133e-05, |
|
"loss": 0.6931, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.8474248345780289, |
|
"grad_norm": 5.071914196014404, |
|
"learning_rate": 1.8305150330843943e-05, |
|
"loss": 0.7222, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.8512943543706226, |
|
"grad_norm": 2.837449312210083, |
|
"learning_rate": 1.8297411291258756e-05, |
|
"loss": 0.6806, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.8551638741632164, |
|
"grad_norm": 2.692207098007202, |
|
"learning_rate": 1.828967225167357e-05, |
|
"loss": 0.6858, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.8590333939558101, |
|
"grad_norm": 5.077603816986084, |
|
"learning_rate": 1.8281933212088382e-05, |
|
"loss": 0.6851, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.8629029137484038, |
|
"grad_norm": 2.468109607696533, |
|
"learning_rate": 1.8274194172503195e-05, |
|
"loss": 0.7078, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.8667724335409975, |
|
"grad_norm": 2.6579232215881348, |
|
"learning_rate": 1.8266455132918005e-05, |
|
"loss": 0.6966, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.8706419533335913, |
|
"grad_norm": 2.528069496154785, |
|
"learning_rate": 1.8258716093332818e-05, |
|
"loss": 0.6702, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.874511473126185, |
|
"grad_norm": 2.341118812561035, |
|
"learning_rate": 1.825097705374763e-05, |
|
"loss": 0.6622, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.8783809929187788, |
|
"grad_norm": 2.7627336978912354, |
|
"learning_rate": 1.8243238014162444e-05, |
|
"loss": 0.6657, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.8822505127113726, |
|
"grad_norm": 3.725227117538452, |
|
"learning_rate": 1.8235498974577257e-05, |
|
"loss": 0.6722, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.8861200325039662, |
|
"grad_norm": 2.631901741027832, |
|
"learning_rate": 1.822775993499207e-05, |
|
"loss": 0.6866, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.88998955229656, |
|
"grad_norm": 2.7313873767852783, |
|
"learning_rate": 1.8220020895406883e-05, |
|
"loss": 0.6865, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.8938590720891537, |
|
"grad_norm": 2.1421585083007812, |
|
"learning_rate": 1.8212281855821693e-05, |
|
"loss": 0.6835, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.8977285918817475, |
|
"grad_norm": 3.489210605621338, |
|
"learning_rate": 1.8204542816236506e-05, |
|
"loss": 0.6839, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.9015981116743412, |
|
"grad_norm": 2.160252809524536, |
|
"learning_rate": 1.819680377665132e-05, |
|
"loss": 0.6474, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.905467631466935, |
|
"grad_norm": 2.6864516735076904, |
|
"learning_rate": 1.8189064737066132e-05, |
|
"loss": 0.685, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.9093371512595287, |
|
"grad_norm": 1.3721705675125122, |
|
"learning_rate": 1.8181325697480946e-05, |
|
"loss": 0.6555, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.9132066710521224, |
|
"grad_norm": 2.154467821121216, |
|
"learning_rate": 1.8173586657895755e-05, |
|
"loss": 0.6683, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.9170761908447161, |
|
"grad_norm": 2.175413131713867, |
|
"learning_rate": 1.816584761831057e-05, |
|
"loss": 0.6489, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.9209457106373099, |
|
"grad_norm": 2.504239082336426, |
|
"learning_rate": 1.815810857872538e-05, |
|
"loss": 0.6364, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.9248152304299037, |
|
"grad_norm": 2.6821277141571045, |
|
"learning_rate": 1.8150369539140195e-05, |
|
"loss": 0.6678, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.9286847502224974, |
|
"grad_norm": 2.505697011947632, |
|
"learning_rate": 1.8142630499555008e-05, |
|
"loss": 0.6755, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.9325542700150912, |
|
"grad_norm": 2.164247989654541, |
|
"learning_rate": 1.8134891459969817e-05, |
|
"loss": 0.6612, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.9364237898076848, |
|
"grad_norm": 1.8794877529144287, |
|
"learning_rate": 1.8127152420384634e-05, |
|
"loss": 0.6422, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.9402933096002786, |
|
"grad_norm": 2.1299638748168945, |
|
"learning_rate": 1.8119413380799444e-05, |
|
"loss": 0.655, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.9441628293928723, |
|
"grad_norm": 2.3705379962921143, |
|
"learning_rate": 1.8111674341214257e-05, |
|
"loss": 0.6435, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.9480323491854661, |
|
"grad_norm": 3.0839450359344482, |
|
"learning_rate": 1.810393530162907e-05, |
|
"loss": 0.7019, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.9519018689780598, |
|
"grad_norm": 1.8064953088760376, |
|
"learning_rate": 1.8096196262043883e-05, |
|
"loss": 0.648, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.9557713887706536, |
|
"grad_norm": 2.46791934967041, |
|
"learning_rate": 1.8088457222458696e-05, |
|
"loss": 0.6398, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.9596409085632474, |
|
"grad_norm": 3.4168615341186523, |
|
"learning_rate": 1.8080718182873506e-05, |
|
"loss": 0.685, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.963510428355841, |
|
"grad_norm": 2.9070920944213867, |
|
"learning_rate": 1.807297914328832e-05, |
|
"loss": 0.6559, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.9673799481484348, |
|
"grad_norm": 2.403107166290283, |
|
"learning_rate": 1.8065240103703132e-05, |
|
"loss": 0.6845, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.9712494679410285, |
|
"grad_norm": 2.1251277923583984, |
|
"learning_rate": 1.8057501064117945e-05, |
|
"loss": 0.6268, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.9751189877336223, |
|
"grad_norm": 3.04496693611145, |
|
"learning_rate": 1.8049762024532758e-05, |
|
"loss": 0.6734, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.978988507526216, |
|
"grad_norm": 4.796090602874756, |
|
"learning_rate": 1.8042022984947568e-05, |
|
"loss": 0.6522, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.9828580273188098, |
|
"grad_norm": 2.2436540126800537, |
|
"learning_rate": 1.8034283945362384e-05, |
|
"loss": 0.6638, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.9867275471114034, |
|
"grad_norm": 2.5630242824554443, |
|
"learning_rate": 1.8026544905777194e-05, |
|
"loss": 0.6387, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.9905970669039972, |
|
"grad_norm": 2.3005053997039795, |
|
"learning_rate": 1.8018805866192007e-05, |
|
"loss": 0.6322, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.9944665866965909, |
|
"grad_norm": 1.7634360790252686, |
|
"learning_rate": 1.801106682660682e-05, |
|
"loss": 0.6459, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.9983361064891847, |
|
"grad_norm": 2.2708933353424072, |
|
"learning_rate": 1.800332778702163e-05, |
|
"loss": 0.6625, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.999961304802074, |
|
"eval_loss": 0.418006956577301, |
|
"eval_runtime": 72.9007, |
|
"eval_samples_per_second": 28.655, |
|
"eval_steps_per_second": 3.594, |
|
"step": 25842 |
|
}, |
|
{ |
|
"epoch": 1.0022056262817784, |
|
"grad_norm": 2.6418962478637695, |
|
"learning_rate": 1.9995511357040595e-05, |
|
"loss": 0.6466, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 1.0060751460743722, |
|
"grad_norm": 3.1957106590270996, |
|
"learning_rate": 1.9987772317455404e-05, |
|
"loss": 0.6523, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.009944665866966, |
|
"grad_norm": 3.396048069000244, |
|
"learning_rate": 1.9980033277870218e-05, |
|
"loss": 0.6683, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 1.0138141856595597, |
|
"grad_norm": 2.8312299251556396, |
|
"learning_rate": 1.997229423828503e-05, |
|
"loss": 0.645, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 1.0176837054521535, |
|
"grad_norm": 3.423259973526001, |
|
"learning_rate": 1.996455519869984e-05, |
|
"loss": 0.6086, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 1.0215532252447472, |
|
"grad_norm": 2.7330431938171387, |
|
"learning_rate": 1.9956816159114657e-05, |
|
"loss": 0.6612, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 1.0254227450373408, |
|
"grad_norm": 2.3465893268585205, |
|
"learning_rate": 1.9949077119529467e-05, |
|
"loss": 0.6338, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.0292922648299345, |
|
"grad_norm": 2.6004831790924072, |
|
"learning_rate": 1.994133807994428e-05, |
|
"loss": 0.6199, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 1.0331617846225283, |
|
"grad_norm": 2.838761806488037, |
|
"learning_rate": 1.9933599040359093e-05, |
|
"loss": 0.6242, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 1.037031304415122, |
|
"grad_norm": 3.6770079135894775, |
|
"learning_rate": 1.9925860000773906e-05, |
|
"loss": 0.6146, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 1.0409008242077158, |
|
"grad_norm": 2.4731369018554688, |
|
"learning_rate": 1.991812096118872e-05, |
|
"loss": 0.6218, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 1.0447703440003095, |
|
"grad_norm": 1.8442339897155762, |
|
"learning_rate": 1.991038192160353e-05, |
|
"loss": 0.6436, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.0486398637929033, |
|
"grad_norm": 3.833077907562256, |
|
"learning_rate": 1.9902642882018345e-05, |
|
"loss": 0.6166, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 1.052509383585497, |
|
"grad_norm": 3.100231647491455, |
|
"learning_rate": 1.9894903842433155e-05, |
|
"loss": 0.6176, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 1.0563789033780908, |
|
"grad_norm": 1.8669284582138062, |
|
"learning_rate": 1.9887164802847968e-05, |
|
"loss": 0.6091, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 1.0602484231706846, |
|
"grad_norm": 1.897378921508789, |
|
"learning_rate": 1.987942576326278e-05, |
|
"loss": 0.652, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 1.0641179429632783, |
|
"grad_norm": 1.5283644199371338, |
|
"learning_rate": 1.987168672367759e-05, |
|
"loss": 0.6318, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.067987462755872, |
|
"grad_norm": 2.560490131378174, |
|
"learning_rate": 1.9863947684092407e-05, |
|
"loss": 0.6215, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 1.0718569825484656, |
|
"grad_norm": 2.0089356899261475, |
|
"learning_rate": 1.9856208644507217e-05, |
|
"loss": 0.6024, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 1.0757265023410594, |
|
"grad_norm": 3.221628427505493, |
|
"learning_rate": 1.984846960492203e-05, |
|
"loss": 0.6451, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 1.0795960221336531, |
|
"grad_norm": 2.0573599338531494, |
|
"learning_rate": 1.9840730565336843e-05, |
|
"loss": 0.6564, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 1.0834655419262469, |
|
"grad_norm": 2.207869052886963, |
|
"learning_rate": 1.9832991525751656e-05, |
|
"loss": 0.5918, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.0873350617188406, |
|
"grad_norm": 2.5935580730438232, |
|
"learning_rate": 1.982525248616647e-05, |
|
"loss": 0.6011, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 1.0912045815114344, |
|
"grad_norm": 3.1886303424835205, |
|
"learning_rate": 1.981751344658128e-05, |
|
"loss": 0.6018, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 1.0950741013040282, |
|
"grad_norm": 3.0881826877593994, |
|
"learning_rate": 1.9809774406996096e-05, |
|
"loss": 0.6491, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 1.098943621096622, |
|
"grad_norm": 2.027204751968384, |
|
"learning_rate": 1.9802035367410905e-05, |
|
"loss": 0.584, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 1.1028131408892157, |
|
"grad_norm": 2.9437708854675293, |
|
"learning_rate": 1.979429632782572e-05, |
|
"loss": 0.599, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.1066826606818094, |
|
"grad_norm": 2.136786460876465, |
|
"learning_rate": 1.978655728824053e-05, |
|
"loss": 0.5996, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 1.1105521804744032, |
|
"grad_norm": 2.343502998352051, |
|
"learning_rate": 1.977881824865534e-05, |
|
"loss": 0.6006, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 1.114421700266997, |
|
"grad_norm": 2.804229497909546, |
|
"learning_rate": 1.9771079209070158e-05, |
|
"loss": 0.6317, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 1.1182912200595907, |
|
"grad_norm": 2.8518316745758057, |
|
"learning_rate": 1.9763340169484968e-05, |
|
"loss": 0.6092, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 1.1221607398521845, |
|
"grad_norm": 2.5474793910980225, |
|
"learning_rate": 1.975560112989978e-05, |
|
"loss": 0.6016, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.126030259644778, |
|
"grad_norm": 3.378596544265747, |
|
"learning_rate": 1.9747862090314594e-05, |
|
"loss": 0.5828, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 1.1298997794373717, |
|
"grad_norm": 2.2915539741516113, |
|
"learning_rate": 1.9740123050729403e-05, |
|
"loss": 0.5981, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 1.1337692992299655, |
|
"grad_norm": 4.275195598602295, |
|
"learning_rate": 1.973238401114422e-05, |
|
"loss": 0.5924, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 1.1376388190225593, |
|
"grad_norm": 3.948652744293213, |
|
"learning_rate": 1.972464497155903e-05, |
|
"loss": 0.605, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 1.141508338815153, |
|
"grad_norm": 1.9093629121780396, |
|
"learning_rate": 1.9716905931973843e-05, |
|
"loss": 0.6062, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.1453778586077468, |
|
"grad_norm": 2.5479652881622314, |
|
"learning_rate": 1.9709166892388656e-05, |
|
"loss": 0.5875, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 1.1492473784003405, |
|
"grad_norm": 1.8700230121612549, |
|
"learning_rate": 1.970142785280347e-05, |
|
"loss": 0.5877, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 1.1531168981929343, |
|
"grad_norm": 1.7103559970855713, |
|
"learning_rate": 1.9693688813218282e-05, |
|
"loss": 0.5917, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 1.156986417985528, |
|
"grad_norm": 2.3256266117095947, |
|
"learning_rate": 1.9685949773633092e-05, |
|
"loss": 0.5866, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 1.1608559377781218, |
|
"grad_norm": 2.2812767028808594, |
|
"learning_rate": 1.9678210734047908e-05, |
|
"loss": 0.5746, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.1647254575707155, |
|
"grad_norm": 1.945586919784546, |
|
"learning_rate": 1.9670471694462718e-05, |
|
"loss": 0.5772, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 1.1685949773633093, |
|
"grad_norm": 2.1316816806793213, |
|
"learning_rate": 1.966273265487753e-05, |
|
"loss": 0.5692, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 1.1724644971559028, |
|
"grad_norm": 1.6511510610580444, |
|
"learning_rate": 1.9654993615292344e-05, |
|
"loss": 0.5888, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 1.1763340169484966, |
|
"grad_norm": 2.1600418090820312, |
|
"learning_rate": 1.9647254575707154e-05, |
|
"loss": 0.5957, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 1.1802035367410904, |
|
"grad_norm": 2.488708257675171, |
|
"learning_rate": 1.963951553612197e-05, |
|
"loss": 0.5742, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 1.184073056533684, |
|
"grad_norm": 4.75523042678833, |
|
"learning_rate": 1.963177649653678e-05, |
|
"loss": 0.5512, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 1.1879425763262779, |
|
"grad_norm": 2.335571050643921, |
|
"learning_rate": 1.9624037456951593e-05, |
|
"loss": 0.5665, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 1.1918120961188716, |
|
"grad_norm": 2.083780527114868, |
|
"learning_rate": 1.9616298417366406e-05, |
|
"loss": 0.5682, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 1.1956816159114654, |
|
"grad_norm": 2.2006747722625732, |
|
"learning_rate": 1.960855937778122e-05, |
|
"loss": 0.5837, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 1.1995511357040591, |
|
"grad_norm": 2.8674488067626953, |
|
"learning_rate": 1.9600820338196033e-05, |
|
"loss": 0.5921, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.203420655496653, |
|
"grad_norm": 1.7168868780136108, |
|
"learning_rate": 1.9593081298610842e-05, |
|
"loss": 0.5833, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 1.2072901752892466, |
|
"grad_norm": 1.9584147930145264, |
|
"learning_rate": 1.9585342259025655e-05, |
|
"loss": 0.6038, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 1.2111596950818404, |
|
"grad_norm": 1.6358076333999634, |
|
"learning_rate": 1.957760321944047e-05, |
|
"loss": 0.5733, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 1.2150292148744342, |
|
"grad_norm": 2.02459454536438, |
|
"learning_rate": 1.956986417985528e-05, |
|
"loss": 0.5727, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 1.218898734667028, |
|
"grad_norm": 3.0293195247650146, |
|
"learning_rate": 1.9562125140270095e-05, |
|
"loss": 0.5738, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 1.2227682544596217, |
|
"grad_norm": 1.8469349145889282, |
|
"learning_rate": 1.9554386100684904e-05, |
|
"loss": 0.5921, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 1.2266377742522152, |
|
"grad_norm": 2.891789436340332, |
|
"learning_rate": 1.954664706109972e-05, |
|
"loss": 0.5722, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 1.230507294044809, |
|
"grad_norm": 1.9393624067306519, |
|
"learning_rate": 1.953890802151453e-05, |
|
"loss": 0.5969, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 1.2343768138374027, |
|
"grad_norm": 2.441889524459839, |
|
"learning_rate": 1.9531168981929344e-05, |
|
"loss": 0.5962, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 1.2382463336299965, |
|
"grad_norm": 2.32602596282959, |
|
"learning_rate": 1.9523429942344157e-05, |
|
"loss": 0.5867, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.2421158534225902, |
|
"grad_norm": 2.894043445587158, |
|
"learning_rate": 1.951569090275897e-05, |
|
"loss": 0.5841, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 1.245985373215184, |
|
"grad_norm": 1.8625471591949463, |
|
"learning_rate": 1.9507951863173783e-05, |
|
"loss": 0.5541, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 1.2498548930077777, |
|
"grad_norm": 2.3896780014038086, |
|
"learning_rate": 1.9500212823588593e-05, |
|
"loss": 0.5876, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 1.2537244128003715, |
|
"grad_norm": 1.956554651260376, |
|
"learning_rate": 1.9492473784003406e-05, |
|
"loss": 0.569, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 1.2575939325929653, |
|
"grad_norm": 2.6784517765045166, |
|
"learning_rate": 1.948473474441822e-05, |
|
"loss": 0.5661, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 1.261463452385559, |
|
"grad_norm": 1.7353438138961792, |
|
"learning_rate": 1.9476995704833032e-05, |
|
"loss": 0.5699, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 1.2653329721781528, |
|
"grad_norm": 2.6139369010925293, |
|
"learning_rate": 1.9469256665247845e-05, |
|
"loss": 0.5857, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 1.2692024919707463, |
|
"grad_norm": 1.978928804397583, |
|
"learning_rate": 1.9461517625662655e-05, |
|
"loss": 0.568, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 1.27307201176334, |
|
"grad_norm": 1.856897234916687, |
|
"learning_rate": 1.945377858607747e-05, |
|
"loss": 0.5629, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 1.2769415315559338, |
|
"grad_norm": 2.1292757987976074, |
|
"learning_rate": 1.944603954649228e-05, |
|
"loss": 0.5509, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.2808110513485276, |
|
"grad_norm": 4.36674690246582, |
|
"learning_rate": 1.9438300506907094e-05, |
|
"loss": 0.5796, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 1.2846805711411213, |
|
"grad_norm": 2.971160888671875, |
|
"learning_rate": 1.9430561467321907e-05, |
|
"loss": 0.5735, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 1.288550090933715, |
|
"grad_norm": 2.1826820373535156, |
|
"learning_rate": 1.942282242773672e-05, |
|
"loss": 0.5478, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 1.2924196107263088, |
|
"grad_norm": 2.4867353439331055, |
|
"learning_rate": 1.9415083388151533e-05, |
|
"loss": 0.5311, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 1.2962891305189026, |
|
"grad_norm": 3.6826677322387695, |
|
"learning_rate": 1.9407344348566343e-05, |
|
"loss": 0.5687, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 1.3001586503114964, |
|
"grad_norm": 1.6112810373306274, |
|
"learning_rate": 1.9399605308981156e-05, |
|
"loss": 0.5808, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 1.3040281701040901, |
|
"grad_norm": 1.9377580881118774, |
|
"learning_rate": 1.939186626939597e-05, |
|
"loss": 0.5936, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 1.3078976898966839, |
|
"grad_norm": 2.1743500232696533, |
|
"learning_rate": 1.9384127229810783e-05, |
|
"loss": 0.5494, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 1.3117672096892776, |
|
"grad_norm": 2.9613547325134277, |
|
"learning_rate": 1.9376388190225596e-05, |
|
"loss": 0.5454, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 1.3156367294818714, |
|
"grad_norm": 1.6620279550552368, |
|
"learning_rate": 1.9368649150640405e-05, |
|
"loss": 0.5754, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.3195062492744651, |
|
"grad_norm": 1.6309536695480347, |
|
"learning_rate": 1.936091011105522e-05, |
|
"loss": 0.5504, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 1.323375769067059, |
|
"grad_norm": 2.2999184131622314, |
|
"learning_rate": 1.935317107147003e-05, |
|
"loss": 0.5596, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 1.3272452888596526, |
|
"grad_norm": 1.74318528175354, |
|
"learning_rate": 1.9345432031884845e-05, |
|
"loss": 0.5507, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 1.3311148086522462, |
|
"grad_norm": 2.797492265701294, |
|
"learning_rate": 1.9337692992299658e-05, |
|
"loss": 0.5815, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 1.33498432844484, |
|
"grad_norm": 2.596480369567871, |
|
"learning_rate": 1.9329953952714467e-05, |
|
"loss": 0.5624, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 1.3388538482374337, |
|
"grad_norm": 2.3296291828155518, |
|
"learning_rate": 1.9322214913129284e-05, |
|
"loss": 0.5508, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 1.3427233680300275, |
|
"grad_norm": 2.2664413452148438, |
|
"learning_rate": 1.9314475873544094e-05, |
|
"loss": 0.5614, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 1.3465928878226212, |
|
"grad_norm": 2.1926095485687256, |
|
"learning_rate": 1.9306736833958907e-05, |
|
"loss": 0.528, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 1.350462407615215, |
|
"grad_norm": 3.2874293327331543, |
|
"learning_rate": 1.929899779437372e-05, |
|
"loss": 0.558, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 1.3543319274078087, |
|
"grad_norm": 3.320584774017334, |
|
"learning_rate": 1.9291258754788533e-05, |
|
"loss": 0.557, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.3582014472004025, |
|
"grad_norm": 1.9394633769989014, |
|
"learning_rate": 1.9283519715203346e-05, |
|
"loss": 0.544, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 1.3620709669929962, |
|
"grad_norm": 2.928828001022339, |
|
"learning_rate": 1.9275780675618156e-05, |
|
"loss": 0.5797, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 1.36594048678559, |
|
"grad_norm": 1.899062991142273, |
|
"learning_rate": 1.926804163603297e-05, |
|
"loss": 0.541, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 1.3698100065781835, |
|
"grad_norm": 1.9945220947265625, |
|
"learning_rate": 1.9260302596447782e-05, |
|
"loss": 0.5584, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 1.3736795263707773, |
|
"grad_norm": 2.7123208045959473, |
|
"learning_rate": 1.9252563556862595e-05, |
|
"loss": 0.5514, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 1.377549046163371, |
|
"grad_norm": 1.9963657855987549, |
|
"learning_rate": 1.9244824517277408e-05, |
|
"loss": 0.5278, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 1.3814185659559648, |
|
"grad_norm": 2.3080806732177734, |
|
"learning_rate": 1.9237085477692218e-05, |
|
"loss": 0.5283, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 1.3852880857485586, |
|
"grad_norm": 1.7134689092636108, |
|
"learning_rate": 1.922934643810703e-05, |
|
"loss": 0.5618, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 1.3891576055411523, |
|
"grad_norm": 2.130807876586914, |
|
"learning_rate": 1.9221607398521844e-05, |
|
"loss": 0.5177, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 1.393027125333746, |
|
"grad_norm": 1.8442771434783936, |
|
"learning_rate": 1.9213868358936657e-05, |
|
"loss": 0.5406, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.3968966451263398, |
|
"grad_norm": 2.7372751235961914, |
|
"learning_rate": 1.920612931935147e-05, |
|
"loss": 0.5312, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 1.4007661649189336, |
|
"grad_norm": 1.7819504737854004, |
|
"learning_rate": 1.9198390279766283e-05, |
|
"loss": 0.5563, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 1.4046356847115273, |
|
"grad_norm": 2.3207108974456787, |
|
"learning_rate": 1.9190651240181097e-05, |
|
"loss": 0.5494, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 1.408505204504121, |
|
"grad_norm": 2.2460434436798096, |
|
"learning_rate": 1.9182912200595906e-05, |
|
"loss": 0.5537, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 1.4123747242967148, |
|
"grad_norm": 2.508028984069824, |
|
"learning_rate": 1.917517316101072e-05, |
|
"loss": 0.5317, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 1.4162442440893086, |
|
"grad_norm": 2.5418314933776855, |
|
"learning_rate": 1.9167434121425532e-05, |
|
"loss": 0.5132, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 1.4201137638819024, |
|
"grad_norm": 2.6335184574127197, |
|
"learning_rate": 1.9159695081840346e-05, |
|
"loss": 0.5628, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 1.4239832836744961, |
|
"grad_norm": 2.0406432151794434, |
|
"learning_rate": 1.915195604225516e-05, |
|
"loss": 0.5245, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 1.4278528034670899, |
|
"grad_norm": 1.7628612518310547, |
|
"learning_rate": 1.914421700266997e-05, |
|
"loss": 0.5331, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 1.4317223232596834, |
|
"grad_norm": 1.38942551612854, |
|
"learning_rate": 1.913647796308478e-05, |
|
"loss": 0.54, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.4355918430522772, |
|
"grad_norm": 2.143207550048828, |
|
"learning_rate": 1.9128738923499595e-05, |
|
"loss": 0.5553, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 1.439461362844871, |
|
"grad_norm": 3.6318371295928955, |
|
"learning_rate": 1.9120999883914408e-05, |
|
"loss": 0.4981, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 1.4433308826374647, |
|
"grad_norm": 1.8823308944702148, |
|
"learning_rate": 1.911326084432922e-05, |
|
"loss": 0.5185, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 1.4472004024300584, |
|
"grad_norm": 2.1262166500091553, |
|
"learning_rate": 1.9105521804744034e-05, |
|
"loss": 0.5126, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 1.4510699222226522, |
|
"grad_norm": 2.445730686187744, |
|
"learning_rate": 1.9097782765158847e-05, |
|
"loss": 0.5327, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.454939442015246, |
|
"grad_norm": 3.0408647060394287, |
|
"learning_rate": 1.9090043725573657e-05, |
|
"loss": 0.5152, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 1.4588089618078397, |
|
"grad_norm": 2.2298266887664795, |
|
"learning_rate": 1.908230468598847e-05, |
|
"loss": 0.5477, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 1.4626784816004335, |
|
"grad_norm": 1.3029440641403198, |
|
"learning_rate": 1.9074565646403283e-05, |
|
"loss": 0.5292, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 1.466548001393027, |
|
"grad_norm": 2.163135051727295, |
|
"learning_rate": 1.9066826606818096e-05, |
|
"loss": 0.5339, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 1.4704175211856207, |
|
"grad_norm": 2.660160541534424, |
|
"learning_rate": 1.905908756723291e-05, |
|
"loss": 0.5302, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.4742870409782145, |
|
"grad_norm": 2.235914468765259, |
|
"learning_rate": 1.905134852764772e-05, |
|
"loss": 0.5436, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 1.4781565607708083, |
|
"grad_norm": 2.123150110244751, |
|
"learning_rate": 1.9043609488062532e-05, |
|
"loss": 0.5442, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 1.482026080563402, |
|
"grad_norm": 2.917724609375, |
|
"learning_rate": 1.9035870448477345e-05, |
|
"loss": 0.5253, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 1.4858956003559958, |
|
"grad_norm": 1.8136122226715088, |
|
"learning_rate": 1.9028131408892158e-05, |
|
"loss": 0.5259, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 1.4897651201485895, |
|
"grad_norm": 1.409419298171997, |
|
"learning_rate": 1.902039236930697e-05, |
|
"loss": 0.5412, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 1.4936346399411833, |
|
"grad_norm": 1.6381382942199707, |
|
"learning_rate": 1.9012653329721784e-05, |
|
"loss": 0.5253, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 1.497504159733777, |
|
"grad_norm": 1.4886516332626343, |
|
"learning_rate": 1.9004914290136594e-05, |
|
"loss": 0.505, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 1.4999419572031112, |
|
"eval_loss": 0.3584839403629303, |
|
"eval_runtime": 75.7902, |
|
"eval_samples_per_second": 27.563, |
|
"eval_steps_per_second": 3.457, |
|
"step": 38763 |
|
}, |
|
{ |
|
"epoch": 1.5013736795263708, |
|
"grad_norm": 4.64426851272583, |
|
"learning_rate": 1.8997175250551407e-05, |
|
"loss": 0.533, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 1.5052431993189646, |
|
"grad_norm": 2.312692403793335, |
|
"learning_rate": 1.898943621096622e-05, |
|
"loss": 0.506, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 1.5091127191115583, |
|
"grad_norm": 2.6338064670562744, |
|
"learning_rate": 1.8981697171381033e-05, |
|
"loss": 0.5318, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.512982238904152, |
|
"grad_norm": 2.5822842121124268, |
|
"learning_rate": 1.8973958131795847e-05, |
|
"loss": 0.5192, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 1.5168517586967458, |
|
"grad_norm": 2.04563570022583, |
|
"learning_rate": 1.896621909221066e-05, |
|
"loss": 0.5187, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 1.5207212784893396, |
|
"grad_norm": 2.4310240745544434, |
|
"learning_rate": 1.895848005262547e-05, |
|
"loss": 0.5087, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 1.5245907982819333, |
|
"grad_norm": 2.6476080417633057, |
|
"learning_rate": 1.8950741013040282e-05, |
|
"loss": 0.5202, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 1.528460318074527, |
|
"grad_norm": 2.638237237930298, |
|
"learning_rate": 1.8943001973455096e-05, |
|
"loss": 0.526, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 1.5323298378671208, |
|
"grad_norm": 1.1704139709472656, |
|
"learning_rate": 1.893526293386991e-05, |
|
"loss": 0.5215, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 1.5361993576597144, |
|
"grad_norm": 2.730487108230591, |
|
"learning_rate": 1.8927523894284722e-05, |
|
"loss": 0.5539, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 1.5400688774523081, |
|
"grad_norm": 2.1526124477386475, |
|
"learning_rate": 1.8919784854699535e-05, |
|
"loss": 0.5102, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 1.543938397244902, |
|
"grad_norm": 1.5470048189163208, |
|
"learning_rate": 1.8912045815114345e-05, |
|
"loss": 0.5037, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 1.5478079170374957, |
|
"grad_norm": 1.6453921794891357, |
|
"learning_rate": 1.8904306775529158e-05, |
|
"loss": 0.5125, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.5516774368300894, |
|
"grad_norm": 1.8560413122177124, |
|
"learning_rate": 1.889656773594397e-05, |
|
"loss": 0.519, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 1.5555469566226832, |
|
"grad_norm": 1.750705361366272, |
|
"learning_rate": 1.8888828696358784e-05, |
|
"loss": 0.5526, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 1.5594164764152767, |
|
"grad_norm": 1.9431952238082886, |
|
"learning_rate": 1.8881089656773597e-05, |
|
"loss": 0.4997, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 1.5632859962078705, |
|
"grad_norm": 3.076261043548584, |
|
"learning_rate": 1.887335061718841e-05, |
|
"loss": 0.517, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 1.5671555160004642, |
|
"grad_norm": 3.281338930130005, |
|
"learning_rate": 1.886561157760322e-05, |
|
"loss": 0.5084, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 1.571025035793058, |
|
"grad_norm": 2.3248159885406494, |
|
"learning_rate": 1.8857872538018033e-05, |
|
"loss": 0.5265, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 1.5748945555856517, |
|
"grad_norm": 1.9274684190750122, |
|
"learning_rate": 1.8850133498432846e-05, |
|
"loss": 0.5286, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 1.5787640753782455, |
|
"grad_norm": 2.3014674186706543, |
|
"learning_rate": 1.884239445884766e-05, |
|
"loss": 0.5212, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 1.5826335951708392, |
|
"grad_norm": 1.7328417301177979, |
|
"learning_rate": 1.8834655419262472e-05, |
|
"loss": 0.4932, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 1.586503114963433, |
|
"grad_norm": 1.4878865480422974, |
|
"learning_rate": 1.8826916379677285e-05, |
|
"loss": 0.5002, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.5903726347560267, |
|
"grad_norm": 2.281660556793213, |
|
"learning_rate": 1.8819177340092095e-05, |
|
"loss": 0.5135, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 1.5942421545486205, |
|
"grad_norm": 2.6435060501098633, |
|
"learning_rate": 1.8811438300506908e-05, |
|
"loss": 0.5195, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 1.5981116743412143, |
|
"grad_norm": 2.7956855297088623, |
|
"learning_rate": 1.880369926092172e-05, |
|
"loss": 0.5206, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 1.601981194133808, |
|
"grad_norm": 2.0349161624908447, |
|
"learning_rate": 1.8795960221336534e-05, |
|
"loss": 0.5372, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 1.6058507139264018, |
|
"grad_norm": 2.1461424827575684, |
|
"learning_rate": 1.8788221181751347e-05, |
|
"loss": 0.4901, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 1.6097202337189955, |
|
"grad_norm": 1.6793510913848877, |
|
"learning_rate": 1.8780482142166157e-05, |
|
"loss": 0.5153, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 1.6135897535115893, |
|
"grad_norm": 1.605312466621399, |
|
"learning_rate": 1.877274310258097e-05, |
|
"loss": 0.5136, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 1.617459273304183, |
|
"grad_norm": 1.3838826417922974, |
|
"learning_rate": 1.8765004062995783e-05, |
|
"loss": 0.5016, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 1.6213287930967768, |
|
"grad_norm": 1.7461504936218262, |
|
"learning_rate": 1.8757265023410597e-05, |
|
"loss": 0.5174, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 1.6251983128893706, |
|
"grad_norm": 1.5110478401184082, |
|
"learning_rate": 1.874952598382541e-05, |
|
"loss": 0.4928, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.6290678326819643, |
|
"grad_norm": 4.541932106018066, |
|
"learning_rate": 1.8741786944240223e-05, |
|
"loss": 0.502, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 1.632937352474558, |
|
"grad_norm": 2.1037495136260986, |
|
"learning_rate": 1.8734047904655032e-05, |
|
"loss": 0.5133, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 1.6368068722671516, |
|
"grad_norm": 1.9825454950332642, |
|
"learning_rate": 1.8726308865069846e-05, |
|
"loss": 0.4841, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 1.6406763920597454, |
|
"grad_norm": 1.887780785560608, |
|
"learning_rate": 1.871856982548466e-05, |
|
"loss": 0.5447, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 1.6445459118523391, |
|
"grad_norm": 2.2040176391601562, |
|
"learning_rate": 1.8710830785899472e-05, |
|
"loss": 0.5181, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 1.6484154316449329, |
|
"grad_norm": 2.889693260192871, |
|
"learning_rate": 1.8703091746314285e-05, |
|
"loss": 0.4811, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 1.6522849514375266, |
|
"grad_norm": 1.1301201581954956, |
|
"learning_rate": 1.8695352706729098e-05, |
|
"loss": 0.5074, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 1.6561544712301204, |
|
"grad_norm": 2.004110097885132, |
|
"learning_rate": 1.8687613667143908e-05, |
|
"loss": 0.4964, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 1.660023991022714, |
|
"grad_norm": 2.496898889541626, |
|
"learning_rate": 1.867987462755872e-05, |
|
"loss": 0.5143, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 1.6638935108153077, |
|
"grad_norm": 1.8562402725219727, |
|
"learning_rate": 1.8672135587973534e-05, |
|
"loss": 0.4961, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.6677630306079014, |
|
"grad_norm": 2.016951084136963, |
|
"learning_rate": 1.8664396548388347e-05, |
|
"loss": 0.4953, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 1.6716325504004952, |
|
"grad_norm": 1.9758374691009521, |
|
"learning_rate": 1.865665750880316e-05, |
|
"loss": 0.4871, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 1.675502070193089, |
|
"grad_norm": 1.7189594507217407, |
|
"learning_rate": 1.864891846921797e-05, |
|
"loss": 0.5054, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 1.6793715899856827, |
|
"grad_norm": 2.1537132263183594, |
|
"learning_rate": 1.8641179429632783e-05, |
|
"loss": 0.5048, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 1.6832411097782765, |
|
"grad_norm": 1.0246325731277466, |
|
"learning_rate": 1.8633440390047596e-05, |
|
"loss": 0.5053, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 1.6871106295708702, |
|
"grad_norm": 2.4667491912841797, |
|
"learning_rate": 1.862570135046241e-05, |
|
"loss": 0.4947, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 1.690980149363464, |
|
"grad_norm": 2.4239518642425537, |
|
"learning_rate": 1.8617962310877222e-05, |
|
"loss": 0.4928, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 1.6948496691560577, |
|
"grad_norm": 2.4990153312683105, |
|
"learning_rate": 1.8610223271292035e-05, |
|
"loss": 0.492, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 1.6987191889486515, |
|
"grad_norm": 1.7751882076263428, |
|
"learning_rate": 1.860248423170685e-05, |
|
"loss": 0.5012, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 1.7025887087412452, |
|
"grad_norm": 1.9940294027328491, |
|
"learning_rate": 1.8594745192121658e-05, |
|
"loss": 0.4956, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.706458228533839, |
|
"grad_norm": 1.631549596786499, |
|
"learning_rate": 1.858700615253647e-05, |
|
"loss": 0.5031, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 1.7103277483264328, |
|
"grad_norm": 2.021336078643799, |
|
"learning_rate": 1.8579267112951284e-05, |
|
"loss": 0.5237, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 1.7141972681190265, |
|
"grad_norm": 1.925374150276184, |
|
"learning_rate": 1.8571528073366097e-05, |
|
"loss": 0.5043, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 1.7180667879116203, |
|
"grad_norm": 2.0028488636016846, |
|
"learning_rate": 1.856378903378091e-05, |
|
"loss": 0.4828, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 1.721936307704214, |
|
"grad_norm": 2.185070037841797, |
|
"learning_rate": 1.855604999419572e-05, |
|
"loss": 0.4947, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 1.7258058274968078, |
|
"grad_norm": 1.4172900915145874, |
|
"learning_rate": 1.8548310954610533e-05, |
|
"loss": 0.4766, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 1.7296753472894015, |
|
"grad_norm": 3.3415722846984863, |
|
"learning_rate": 1.8540571915025346e-05, |
|
"loss": 0.4867, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 1.7335448670819953, |
|
"grad_norm": 3.0978989601135254, |
|
"learning_rate": 1.853283287544016e-05, |
|
"loss": 0.4803, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 1.7374143868745888, |
|
"grad_norm": 1.9211913347244263, |
|
"learning_rate": 1.8525093835854973e-05, |
|
"loss": 0.4932, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 1.7412839066671826, |
|
"grad_norm": 2.969252347946167, |
|
"learning_rate": 1.8517354796269786e-05, |
|
"loss": 0.4797, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.7451534264597763, |
|
"grad_norm": 2.3203649520874023, |
|
"learning_rate": 1.85096157566846e-05, |
|
"loss": 0.4946, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 1.74902294625237, |
|
"grad_norm": 3.472598075866699, |
|
"learning_rate": 1.850187671709941e-05, |
|
"loss": 0.4943, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 1.7528924660449638, |
|
"grad_norm": 1.84355890750885, |
|
"learning_rate": 1.8494137677514222e-05, |
|
"loss": 0.4862, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 1.7567619858375576, |
|
"grad_norm": 1.6597728729248047, |
|
"learning_rate": 1.8486398637929035e-05, |
|
"loss": 0.4693, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 1.7606315056301511, |
|
"grad_norm": 2.034503221511841, |
|
"learning_rate": 1.8478659598343848e-05, |
|
"loss": 0.4865, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 1.764501025422745, |
|
"grad_norm": 3.27135968208313, |
|
"learning_rate": 1.847092055875866e-05, |
|
"loss": 0.4848, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 1.7683705452153387, |
|
"grad_norm": 1.4343475103378296, |
|
"learning_rate": 1.846318151917347e-05, |
|
"loss": 0.486, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 1.7722400650079324, |
|
"grad_norm": 1.9744470119476318, |
|
"learning_rate": 1.8455442479588284e-05, |
|
"loss": 0.4903, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 1.7761095848005262, |
|
"grad_norm": 1.4793157577514648, |
|
"learning_rate": 1.8447703440003097e-05, |
|
"loss": 0.5093, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 1.77997910459312, |
|
"grad_norm": 2.11100697517395, |
|
"learning_rate": 1.843996440041791e-05, |
|
"loss": 0.4802, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.7838486243857137, |
|
"grad_norm": 3.300123929977417, |
|
"learning_rate": 1.8432225360832723e-05, |
|
"loss": 0.4801, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 1.7877181441783074, |
|
"grad_norm": 1.9946368932724, |
|
"learning_rate": 1.8424486321247533e-05, |
|
"loss": 0.4729, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 1.7915876639709012, |
|
"grad_norm": 1.8352553844451904, |
|
"learning_rate": 1.841674728166235e-05, |
|
"loss": 0.4678, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 1.795457183763495, |
|
"grad_norm": 1.6375458240509033, |
|
"learning_rate": 1.840900824207716e-05, |
|
"loss": 0.4885, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 1.7993267035560887, |
|
"grad_norm": 1.59699285030365, |
|
"learning_rate": 1.8401269202491972e-05, |
|
"loss": 0.4748, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 1.8031962233486825, |
|
"grad_norm": 2.2244873046875, |
|
"learning_rate": 1.8393530162906785e-05, |
|
"loss": 0.4846, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 1.8070657431412762, |
|
"grad_norm": 0.9236389994621277, |
|
"learning_rate": 1.83857911233216e-05, |
|
"loss": 0.4758, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 1.81093526293387, |
|
"grad_norm": 1.8354111909866333, |
|
"learning_rate": 1.837805208373641e-05, |
|
"loss": 0.4941, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 1.8148047827264637, |
|
"grad_norm": 3.5753133296966553, |
|
"learning_rate": 1.837031304415122e-05, |
|
"loss": 0.504, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 1.8186743025190575, |
|
"grad_norm": 3.344614028930664, |
|
"learning_rate": 1.8362574004566034e-05, |
|
"loss": 0.492, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.8225438223116512, |
|
"grad_norm": 3.517702102661133, |
|
"learning_rate": 1.8354834964980847e-05, |
|
"loss": 0.4926, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 1.826413342104245, |
|
"grad_norm": 1.7364025115966797, |
|
"learning_rate": 1.834709592539566e-05, |
|
"loss": 0.4879, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 1.8302828618968388, |
|
"grad_norm": 3.0614891052246094, |
|
"learning_rate": 1.8339356885810474e-05, |
|
"loss": 0.4827, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 1.8341523816894323, |
|
"grad_norm": 2.4689478874206543, |
|
"learning_rate": 1.8331617846225283e-05, |
|
"loss": 0.4704, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 1.838021901482026, |
|
"grad_norm": 1.7155258655548096, |
|
"learning_rate": 1.83238788066401e-05, |
|
"loss": 0.4671, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 1.8418914212746198, |
|
"grad_norm": 6.756774425506592, |
|
"learning_rate": 1.831613976705491e-05, |
|
"loss": 0.4776, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 1.8457609410672136, |
|
"grad_norm": 2.346327543258667, |
|
"learning_rate": 1.8308400727469723e-05, |
|
"loss": 0.4973, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 1.8496304608598073, |
|
"grad_norm": 2.0925145149230957, |
|
"learning_rate": 1.8300661687884536e-05, |
|
"loss": 0.4738, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 1.853499980652401, |
|
"grad_norm": 2.5207793712615967, |
|
"learning_rate": 1.8292922648299345e-05, |
|
"loss": 0.5101, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 1.8573695004449948, |
|
"grad_norm": 1.3195054531097412, |
|
"learning_rate": 1.8285183608714162e-05, |
|
"loss": 0.4742, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.8612390202375884, |
|
"grad_norm": 3.062666654586792, |
|
"learning_rate": 1.827744456912897e-05, |
|
"loss": 0.4849, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 1.8651085400301821, |
|
"grad_norm": 2.738952398300171, |
|
"learning_rate": 1.8269705529543785e-05, |
|
"loss": 0.4841, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 1.8689780598227759, |
|
"grad_norm": 2.114544630050659, |
|
"learning_rate": 1.8261966489958598e-05, |
|
"loss": 0.4789, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 1.8728475796153696, |
|
"grad_norm": 3.1391451358795166, |
|
"learning_rate": 1.825422745037341e-05, |
|
"loss": 0.4871, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 1.8767170994079634, |
|
"grad_norm": 1.8246567249298096, |
|
"learning_rate": 1.8246488410788224e-05, |
|
"loss": 0.4748, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 1.8805866192005571, |
|
"grad_norm": 1.226166009902954, |
|
"learning_rate": 1.8238749371203034e-05, |
|
"loss": 0.4734, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 1.884456138993151, |
|
"grad_norm": 1.876904010772705, |
|
"learning_rate": 1.8231010331617847e-05, |
|
"loss": 0.4901, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 1.8883256587857447, |
|
"grad_norm": 1.992308259010315, |
|
"learning_rate": 1.822327129203266e-05, |
|
"loss": 0.4525, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 1.8921951785783384, |
|
"grad_norm": 1.9081039428710938, |
|
"learning_rate": 1.8215532252447473e-05, |
|
"loss": 0.4649, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 1.8960646983709322, |
|
"grad_norm": 1.3478918075561523, |
|
"learning_rate": 1.8207793212862286e-05, |
|
"loss": 0.4687, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.899934218163526, |
|
"grad_norm": 1.5510190725326538, |
|
"learning_rate": 1.8200054173277096e-05, |
|
"loss": 0.4593, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 1.9038037379561197, |
|
"grad_norm": 2.338334798812866, |
|
"learning_rate": 1.8192315133691912e-05, |
|
"loss": 0.4713, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 1.9076732577487134, |
|
"grad_norm": 1.5012677907943726, |
|
"learning_rate": 1.8184576094106722e-05, |
|
"loss": 0.4682, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 1.9115427775413072, |
|
"grad_norm": 1.439362645149231, |
|
"learning_rate": 1.8176837054521535e-05, |
|
"loss": 0.4751, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 1.915412297333901, |
|
"grad_norm": 1.9710556268692017, |
|
"learning_rate": 1.816909801493635e-05, |
|
"loss": 0.4823, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 1.9192818171264947, |
|
"grad_norm": 1.8966295719146729, |
|
"learning_rate": 1.816135897535116e-05, |
|
"loss": 0.4524, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 1.9231513369190885, |
|
"grad_norm": 1.6913188695907593, |
|
"learning_rate": 1.8153619935765975e-05, |
|
"loss": 0.458, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 1.9270208567116822, |
|
"grad_norm": 2.3188533782958984, |
|
"learning_rate": 1.8145880896180784e-05, |
|
"loss": 0.4672, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 1.930890376504276, |
|
"grad_norm": 1.5947829484939575, |
|
"learning_rate": 1.8138141856595597e-05, |
|
"loss": 0.4703, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 1.9347598962968695, |
|
"grad_norm": 1.9424033164978027, |
|
"learning_rate": 1.813040281701041e-05, |
|
"loss": 0.4808, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.9386294160894633, |
|
"grad_norm": 1.3641841411590576, |
|
"learning_rate": 1.8122663777425224e-05, |
|
"loss": 0.4624, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 1.942498935882057, |
|
"grad_norm": 1.4691535234451294, |
|
"learning_rate": 1.8114924737840037e-05, |
|
"loss": 0.4741, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 1.9463684556746508, |
|
"grad_norm": 0.9599714279174805, |
|
"learning_rate": 1.8107185698254846e-05, |
|
"loss": 0.4379, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 1.9502379754672445, |
|
"grad_norm": 2.234408140182495, |
|
"learning_rate": 1.8099446658669663e-05, |
|
"loss": 0.4594, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 1.9541074952598383, |
|
"grad_norm": 1.6762405633926392, |
|
"learning_rate": 1.8091707619084473e-05, |
|
"loss": 0.4767, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 1.957977015052432, |
|
"grad_norm": 2.0577871799468994, |
|
"learning_rate": 1.8083968579499286e-05, |
|
"loss": 0.4282, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 1.9618465348450256, |
|
"grad_norm": 1.48188054561615, |
|
"learning_rate": 1.80762295399141e-05, |
|
"loss": 0.4656, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 1.9657160546376193, |
|
"grad_norm": 2.1647377014160156, |
|
"learning_rate": 1.806849050032891e-05, |
|
"loss": 0.4531, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 1.969585574430213, |
|
"grad_norm": 1.387678861618042, |
|
"learning_rate": 1.8060751460743725e-05, |
|
"loss": 0.4693, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 1.9734550942228068, |
|
"grad_norm": 0.9403946995735168, |
|
"learning_rate": 1.8053012421158535e-05, |
|
"loss": 0.4632, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.9773246140154006, |
|
"grad_norm": 2.03277325630188, |
|
"learning_rate": 1.8045273381573348e-05, |
|
"loss": 0.4539, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 1.9811941338079944, |
|
"grad_norm": 2.1548171043395996, |
|
"learning_rate": 1.803753434198816e-05, |
|
"loss": 0.4661, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 1.9850636536005881, |
|
"grad_norm": 1.6806986331939697, |
|
"learning_rate": 1.8029795302402974e-05, |
|
"loss": 0.4735, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 1.9889331733931819, |
|
"grad_norm": 2.099877119064331, |
|
"learning_rate": 1.8022056262817787e-05, |
|
"loss": 0.4638, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 1.9928026931857756, |
|
"grad_norm": 1.8525362014770508, |
|
"learning_rate": 1.8014317223232597e-05, |
|
"loss": 0.4678, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 1.9966722129783694, |
|
"grad_norm": 2.842991590499878, |
|
"learning_rate": 1.8006578183647413e-05, |
|
"loss": 0.4516, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 1.9999226096041482, |
|
"eval_loss": 0.31903380155563354, |
|
"eval_runtime": 75.5346, |
|
"eval_samples_per_second": 27.656, |
|
"eval_steps_per_second": 3.469, |
|
"step": 51684 |
|
}, |
|
{ |
|
"epoch": 2.000541732770963, |
|
"grad_norm": 1.955477237701416, |
|
"learning_rate": 1.999876175366637e-05, |
|
"loss": 0.4118, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 2.004411252563557, |
|
"grad_norm": 1.9310007095336914, |
|
"learning_rate": 1.9991022714081184e-05, |
|
"loss": 0.4665, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 2.0082807723561507, |
|
"grad_norm": 1.7999227046966553, |
|
"learning_rate": 1.9983283674495998e-05, |
|
"loss": 0.4763, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 2.0121502921487444, |
|
"grad_norm": 1.753142237663269, |
|
"learning_rate": 1.9975544634910807e-05, |
|
"loss": 0.4447, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.016019811941338, |
|
"grad_norm": 2.3983778953552246, |
|
"learning_rate": 1.9967805595325624e-05, |
|
"loss": 0.4397, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 2.019889331733932, |
|
"grad_norm": 2.5045669078826904, |
|
"learning_rate": 1.9960066555740433e-05, |
|
"loss": 0.4474, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 2.0237588515265257, |
|
"grad_norm": 1.4538400173187256, |
|
"learning_rate": 1.9952327516155247e-05, |
|
"loss": 0.4422, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 2.0276283713191194, |
|
"grad_norm": 1.072337031364441, |
|
"learning_rate": 1.994458847657006e-05, |
|
"loss": 0.4726, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 2.031497891111713, |
|
"grad_norm": 1.7496392726898193, |
|
"learning_rate": 1.993684943698487e-05, |
|
"loss": 0.4579, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 2.035367410904307, |
|
"grad_norm": 3.286799192428589, |
|
"learning_rate": 1.9929110397399686e-05, |
|
"loss": 0.4537, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 2.0392369306969007, |
|
"grad_norm": 2.7570724487304688, |
|
"learning_rate": 1.9921371357814496e-05, |
|
"loss": 0.4816, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 2.0431064504894945, |
|
"grad_norm": 2.1981394290924072, |
|
"learning_rate": 1.991363231822931e-05, |
|
"loss": 0.4685, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 2.0469759702820878, |
|
"grad_norm": 1.7801116704940796, |
|
"learning_rate": 1.9905893278644122e-05, |
|
"loss": 0.4573, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 2.0508454900746815, |
|
"grad_norm": 4.362318992614746, |
|
"learning_rate": 1.9898154239058935e-05, |
|
"loss": 0.4489, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.0547150098672753, |
|
"grad_norm": 3.178088665008545, |
|
"learning_rate": 1.9890415199473748e-05, |
|
"loss": 0.4531, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 2.058584529659869, |
|
"grad_norm": 2.0428311824798584, |
|
"learning_rate": 1.9882676159888558e-05, |
|
"loss": 0.4446, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 2.062454049452463, |
|
"grad_norm": 2.213200569152832, |
|
"learning_rate": 1.9874937120303374e-05, |
|
"loss": 0.4666, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 2.0663235692450566, |
|
"grad_norm": 2.4708657264709473, |
|
"learning_rate": 1.9867198080718184e-05, |
|
"loss": 0.4441, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 2.0701930890376503, |
|
"grad_norm": 1.6680482625961304, |
|
"learning_rate": 1.9859459041132997e-05, |
|
"loss": 0.451, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 2.074062608830244, |
|
"grad_norm": 1.9106837511062622, |
|
"learning_rate": 1.985172000154781e-05, |
|
"loss": 0.4398, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 2.077932128622838, |
|
"grad_norm": 1.2974762916564941, |
|
"learning_rate": 1.984398096196262e-05, |
|
"loss": 0.4365, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 2.0818016484154316, |
|
"grad_norm": 2.707754135131836, |
|
"learning_rate": 1.9836241922377436e-05, |
|
"loss": 0.4387, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 2.0856711682080253, |
|
"grad_norm": 1.648790955543518, |
|
"learning_rate": 1.9828502882792246e-05, |
|
"loss": 0.4734, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 2.089540688000619, |
|
"grad_norm": 1.6504909992218018, |
|
"learning_rate": 1.982076384320706e-05, |
|
"loss": 0.4239, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.093410207793213, |
|
"grad_norm": 1.8103519678115845, |
|
"learning_rate": 1.9813024803621872e-05, |
|
"loss": 0.4551, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 2.0972797275858066, |
|
"grad_norm": 1.9078305959701538, |
|
"learning_rate": 1.9805285764036682e-05, |
|
"loss": 0.45, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 2.1011492473784004, |
|
"grad_norm": 2.079587459564209, |
|
"learning_rate": 1.97975467244515e-05, |
|
"loss": 0.4478, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 2.105018767170994, |
|
"grad_norm": 1.3618863821029663, |
|
"learning_rate": 1.9789807684866308e-05, |
|
"loss": 0.4285, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 2.108888286963588, |
|
"grad_norm": 3.3195624351501465, |
|
"learning_rate": 1.978206864528112e-05, |
|
"loss": 0.4545, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 2.1127578067561816, |
|
"grad_norm": 1.7043200731277466, |
|
"learning_rate": 1.9774329605695934e-05, |
|
"loss": 0.4574, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 2.1166273265487754, |
|
"grad_norm": 1.7037144899368286, |
|
"learning_rate": 1.9766590566110748e-05, |
|
"loss": 0.436, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 2.120496846341369, |
|
"grad_norm": 1.7071598768234253, |
|
"learning_rate": 1.975885152652556e-05, |
|
"loss": 0.4453, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 2.124366366133963, |
|
"grad_norm": 1.7206696271896362, |
|
"learning_rate": 1.975111248694037e-05, |
|
"loss": 0.4679, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 2.1282358859265567, |
|
"grad_norm": 1.6118768453598022, |
|
"learning_rate": 1.9743373447355187e-05, |
|
"loss": 0.4371, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 2.1321054057191504, |
|
"grad_norm": 1.6849126815795898, |
|
"learning_rate": 1.9735634407769997e-05, |
|
"loss": 0.4159, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 2.135974925511744, |
|
"grad_norm": 1.7709155082702637, |
|
"learning_rate": 1.972789536818481e-05, |
|
"loss": 0.4229, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 2.139844445304338, |
|
"grad_norm": 1.504991888999939, |
|
"learning_rate": 1.9720156328599623e-05, |
|
"loss": 0.4459, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 2.1437139650969312, |
|
"grad_norm": 2.0437350273132324, |
|
"learning_rate": 1.9712417289014432e-05, |
|
"loss": 0.4507, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 2.147583484889525, |
|
"grad_norm": 1.811646580696106, |
|
"learning_rate": 1.970467824942925e-05, |
|
"loss": 0.4348, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 2.1514530046821188, |
|
"grad_norm": 2.6153831481933594, |
|
"learning_rate": 1.969693920984406e-05, |
|
"loss": 0.4296, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 2.1553225244747125, |
|
"grad_norm": 2.2653005123138428, |
|
"learning_rate": 1.9689200170258872e-05, |
|
"loss": 0.4495, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 2.1591920442673063, |
|
"grad_norm": 1.3044402599334717, |
|
"learning_rate": 1.9681461130673685e-05, |
|
"loss": 0.4353, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 2.1630615640599, |
|
"grad_norm": 1.2863844633102417, |
|
"learning_rate": 1.9673722091088498e-05, |
|
"loss": 0.4343, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 2.1669310838524938, |
|
"grad_norm": 2.1832025051116943, |
|
"learning_rate": 1.966598305150331e-05, |
|
"loss": 0.4521, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.1708006036450875, |
|
"grad_norm": 10.471029281616211, |
|
"learning_rate": 1.965824401191812e-05, |
|
"loss": 0.437, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 2.1746701234376813, |
|
"grad_norm": 1.3444117307662964, |
|
"learning_rate": 1.9650504972332934e-05, |
|
"loss": 0.4287, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 2.178539643230275, |
|
"grad_norm": 1.4190716743469238, |
|
"learning_rate": 1.9642765932747747e-05, |
|
"loss": 0.4384, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 2.182409163022869, |
|
"grad_norm": 2.5630593299865723, |
|
"learning_rate": 1.963502689316256e-05, |
|
"loss": 0.4437, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 2.1862786828154626, |
|
"grad_norm": 4.250194549560547, |
|
"learning_rate": 1.9627287853577373e-05, |
|
"loss": 0.4253, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 2.1901482026080563, |
|
"grad_norm": 2.147238254547119, |
|
"learning_rate": 1.9619548813992183e-05, |
|
"loss": 0.4451, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 2.19401772240065, |
|
"grad_norm": 1.2069705724716187, |
|
"learning_rate": 1.9611809774407e-05, |
|
"loss": 0.431, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 2.197887242193244, |
|
"grad_norm": 1.8252161741256714, |
|
"learning_rate": 1.960407073482181e-05, |
|
"loss": 0.4358, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 2.2017567619858376, |
|
"grad_norm": 1.5236437320709229, |
|
"learning_rate": 1.9596331695236622e-05, |
|
"loss": 0.4291, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 2.2056262817784313, |
|
"grad_norm": 1.3067554235458374, |
|
"learning_rate": 1.9588592655651435e-05, |
|
"loss": 0.4462, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 2.209495801571025, |
|
"grad_norm": 3.1271252632141113, |
|
"learning_rate": 1.958085361606625e-05, |
|
"loss": 0.4308, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 2.213365321363619, |
|
"grad_norm": 1.5025999546051025, |
|
"learning_rate": 1.957311457648106e-05, |
|
"loss": 0.4454, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 2.2172348411562126, |
|
"grad_norm": 2.4508609771728516, |
|
"learning_rate": 1.956537553689587e-05, |
|
"loss": 0.4294, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 2.2211043609488064, |
|
"grad_norm": 1.9247980117797852, |
|
"learning_rate": 1.9557636497310684e-05, |
|
"loss": 0.4388, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 2.2249738807414, |
|
"grad_norm": 1.0998092889785767, |
|
"learning_rate": 1.9549897457725497e-05, |
|
"loss": 0.4249, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 2.228843400533994, |
|
"grad_norm": 2.039179563522339, |
|
"learning_rate": 1.954215841814031e-05, |
|
"loss": 0.4138, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 2.2327129203265876, |
|
"grad_norm": 1.4328453540802002, |
|
"learning_rate": 1.9534419378555124e-05, |
|
"loss": 0.4485, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 2.2365824401191814, |
|
"grad_norm": 1.4221556186676025, |
|
"learning_rate": 1.9526680338969933e-05, |
|
"loss": 0.4372, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 2.240451959911775, |
|
"grad_norm": 1.6141443252563477, |
|
"learning_rate": 1.951894129938475e-05, |
|
"loss": 0.4193, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 2.244321479704369, |
|
"grad_norm": 1.6460309028625488, |
|
"learning_rate": 1.951120225979956e-05, |
|
"loss": 0.4289, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.2481909994969627, |
|
"grad_norm": 1.906775951385498, |
|
"learning_rate": 1.9503463220214373e-05, |
|
"loss": 0.426, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 2.252060519289556, |
|
"grad_norm": 1.4007736444473267, |
|
"learning_rate": 1.9495724180629186e-05, |
|
"loss": 0.4237, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 2.2559300390821497, |
|
"grad_norm": 1.6321462392807007, |
|
"learning_rate": 1.9487985141044e-05, |
|
"loss": 0.442, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 2.2597995588747435, |
|
"grad_norm": 2.8257906436920166, |
|
"learning_rate": 1.9480246101458812e-05, |
|
"loss": 0.4309, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 2.2636690786673372, |
|
"grad_norm": 1.8106814622879028, |
|
"learning_rate": 1.9472507061873622e-05, |
|
"loss": 0.4431, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 2.267538598459931, |
|
"grad_norm": 1.142861247062683, |
|
"learning_rate": 1.9464768022288435e-05, |
|
"loss": 0.415, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 2.2714081182525248, |
|
"grad_norm": 1.88276207447052, |
|
"learning_rate": 1.9457028982703248e-05, |
|
"loss": 0.4004, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 2.2752776380451185, |
|
"grad_norm": 1.45915687084198, |
|
"learning_rate": 1.944928994311806e-05, |
|
"loss": 0.4146, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 2.2791471578377123, |
|
"grad_norm": 1.1579883098602295, |
|
"learning_rate": 1.9441550903532874e-05, |
|
"loss": 0.4161, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 2.283016677630306, |
|
"grad_norm": 1.4199334383010864, |
|
"learning_rate": 1.9433811863947684e-05, |
|
"loss": 0.4284, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.2868861974229, |
|
"grad_norm": 3.017756938934326, |
|
"learning_rate": 1.9426072824362497e-05, |
|
"loss": 0.4229, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 2.2907557172154935, |
|
"grad_norm": 1.9444090127944946, |
|
"learning_rate": 1.941833378477731e-05, |
|
"loss": 0.4283, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 2.2946252370080873, |
|
"grad_norm": 2.231217622756958, |
|
"learning_rate": 1.9410594745192123e-05, |
|
"loss": 0.4295, |
|
"step": 59300 |
|
}, |
|
{ |
|
"epoch": 2.298494756800681, |
|
"grad_norm": 2.147974729537964, |
|
"learning_rate": 1.9402855705606936e-05, |
|
"loss": 0.4158, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 2.302364276593275, |
|
"grad_norm": 1.0102565288543701, |
|
"learning_rate": 1.939511666602175e-05, |
|
"loss": 0.4279, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 2.3062337963858686, |
|
"grad_norm": 6.695295333862305, |
|
"learning_rate": 1.9387377626436562e-05, |
|
"loss": 0.4093, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 2.3101033161784623, |
|
"grad_norm": 1.369629979133606, |
|
"learning_rate": 1.9379638586851372e-05, |
|
"loss": 0.4441, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 2.313972835971056, |
|
"grad_norm": 1.9461331367492676, |
|
"learning_rate": 1.9371899547266185e-05, |
|
"loss": 0.433, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 2.31784235576365, |
|
"grad_norm": 1.8734780550003052, |
|
"learning_rate": 1.9364160507681e-05, |
|
"loss": 0.4381, |
|
"step": 59900 |
|
}, |
|
{ |
|
"epoch": 2.3217118755562436, |
|
"grad_norm": 1.843590259552002, |
|
"learning_rate": 1.935642146809581e-05, |
|
"loss": 0.4186, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.3255813953488373, |
|
"grad_norm": 2.5406510829925537, |
|
"learning_rate": 1.9348682428510625e-05, |
|
"loss": 0.4241, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 2.329450915141431, |
|
"grad_norm": 2.618091344833374, |
|
"learning_rate": 1.9340943388925434e-05, |
|
"loss": 0.4123, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 2.333320434934025, |
|
"grad_norm": 1.8404749631881714, |
|
"learning_rate": 1.9333204349340247e-05, |
|
"loss": 0.4171, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 2.3371899547266186, |
|
"grad_norm": 1.55924391746521, |
|
"learning_rate": 1.932546530975506e-05, |
|
"loss": 0.4149, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 2.341059474519212, |
|
"grad_norm": 2.034311532974243, |
|
"learning_rate": 1.9317726270169874e-05, |
|
"loss": 0.4127, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 2.3449289943118057, |
|
"grad_norm": 1.4090275764465332, |
|
"learning_rate": 1.9309987230584687e-05, |
|
"loss": 0.3978, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 2.3487985141043994, |
|
"grad_norm": 2.1726934909820557, |
|
"learning_rate": 1.93022481909995e-05, |
|
"loss": 0.4252, |
|
"step": 60700 |
|
}, |
|
{ |
|
"epoch": 2.352668033896993, |
|
"grad_norm": 4.539790630340576, |
|
"learning_rate": 1.929450915141431e-05, |
|
"loss": 0.4298, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 2.356537553689587, |
|
"grad_norm": 1.1041001081466675, |
|
"learning_rate": 1.9286770111829123e-05, |
|
"loss": 0.421, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 2.3604070734821807, |
|
"grad_norm": 1.8974961042404175, |
|
"learning_rate": 1.9279031072243936e-05, |
|
"loss": 0.4353, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.3642765932747745, |
|
"grad_norm": 2.053619384765625, |
|
"learning_rate": 1.927129203265875e-05, |
|
"loss": 0.4134, |
|
"step": 61100 |
|
}, |
|
{ |
|
"epoch": 2.368146113067368, |
|
"grad_norm": 1.9047490358352661, |
|
"learning_rate": 1.9263552993073562e-05, |
|
"loss": 0.4421, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 2.372015632859962, |
|
"grad_norm": 2.2807693481445312, |
|
"learning_rate": 1.9255813953488375e-05, |
|
"loss": 0.4173, |
|
"step": 61300 |
|
}, |
|
{ |
|
"epoch": 2.3758851526525557, |
|
"grad_norm": 1.544053077697754, |
|
"learning_rate": 1.9248074913903185e-05, |
|
"loss": 0.4289, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 2.3797546724451495, |
|
"grad_norm": 1.469085693359375, |
|
"learning_rate": 1.9240335874317998e-05, |
|
"loss": 0.4094, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 2.3836241922377432, |
|
"grad_norm": 1.9199841022491455, |
|
"learning_rate": 1.923259683473281e-05, |
|
"loss": 0.425, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 2.387493712030337, |
|
"grad_norm": 1.388945460319519, |
|
"learning_rate": 1.9224857795147624e-05, |
|
"loss": 0.4238, |
|
"step": 61700 |
|
}, |
|
{ |
|
"epoch": 2.3913632318229308, |
|
"grad_norm": 1.136607050895691, |
|
"learning_rate": 1.9217118755562437e-05, |
|
"loss": 0.4242, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 2.3952327516155245, |
|
"grad_norm": 2.213928699493408, |
|
"learning_rate": 1.9209379715977247e-05, |
|
"loss": 0.41, |
|
"step": 61900 |
|
}, |
|
{ |
|
"epoch": 2.3991022714081183, |
|
"grad_norm": 1.337274193763733, |
|
"learning_rate": 1.920164067639206e-05, |
|
"loss": 0.4051, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.402971791200712, |
|
"grad_norm": 1.7968541383743286, |
|
"learning_rate": 1.9193901636806873e-05, |
|
"loss": 0.4264, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 2.406841310993306, |
|
"grad_norm": 2.299100160598755, |
|
"learning_rate": 1.9186162597221686e-05, |
|
"loss": 0.4316, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 2.4107108307858995, |
|
"grad_norm": 1.3097466230392456, |
|
"learning_rate": 1.91784235576365e-05, |
|
"loss": 0.416, |
|
"step": 62300 |
|
}, |
|
{ |
|
"epoch": 2.4145803505784933, |
|
"grad_norm": 1.9384897947311401, |
|
"learning_rate": 1.9170684518051312e-05, |
|
"loss": 0.398, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 2.418449870371087, |
|
"grad_norm": 1.8852757215499878, |
|
"learning_rate": 1.9162945478466126e-05, |
|
"loss": 0.4158, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 2.422319390163681, |
|
"grad_norm": 3.9488649368286133, |
|
"learning_rate": 1.9155206438880935e-05, |
|
"loss": 0.4113, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 2.4261889099562746, |
|
"grad_norm": 1.9499768018722534, |
|
"learning_rate": 1.914746739929575e-05, |
|
"loss": 0.4163, |
|
"step": 62700 |
|
}, |
|
{ |
|
"epoch": 2.4300584297488683, |
|
"grad_norm": 1.1540164947509766, |
|
"learning_rate": 1.913972835971056e-05, |
|
"loss": 0.3977, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 2.433927949541462, |
|
"grad_norm": 2.318495750427246, |
|
"learning_rate": 1.9131989320125375e-05, |
|
"loss": 0.4323, |
|
"step": 62900 |
|
}, |
|
{ |
|
"epoch": 2.437797469334056, |
|
"grad_norm": 1.8283582925796509, |
|
"learning_rate": 1.9124250280540188e-05, |
|
"loss": 0.3982, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.4416669891266496, |
|
"grad_norm": 1.4836108684539795, |
|
"learning_rate": 1.9116511240954997e-05, |
|
"loss": 0.4079, |
|
"step": 63100 |
|
}, |
|
{ |
|
"epoch": 2.4455365089192433, |
|
"grad_norm": 1.6268258094787598, |
|
"learning_rate": 1.910877220136981e-05, |
|
"loss": 0.4099, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 2.449406028711837, |
|
"grad_norm": 1.655819296836853, |
|
"learning_rate": 1.9101033161784624e-05, |
|
"loss": 0.4139, |
|
"step": 63300 |
|
}, |
|
{ |
|
"epoch": 2.4532755485044304, |
|
"grad_norm": 3.3714959621429443, |
|
"learning_rate": 1.9093294122199437e-05, |
|
"loss": 0.4258, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 2.457145068297024, |
|
"grad_norm": 1.9959139823913574, |
|
"learning_rate": 1.908555508261425e-05, |
|
"loss": 0.4071, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 2.461014588089618, |
|
"grad_norm": 1.5244359970092773, |
|
"learning_rate": 1.9077816043029063e-05, |
|
"loss": 0.4129, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 2.4648841078822117, |
|
"grad_norm": 1.6724839210510254, |
|
"learning_rate": 1.9070077003443873e-05, |
|
"loss": 0.3886, |
|
"step": 63700 |
|
}, |
|
{ |
|
"epoch": 2.4687536276748054, |
|
"grad_norm": 1.5094096660614014, |
|
"learning_rate": 1.9062337963858686e-05, |
|
"loss": 0.3924, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 2.472623147467399, |
|
"grad_norm": 1.6173244714736938, |
|
"learning_rate": 1.90545989242735e-05, |
|
"loss": 0.4284, |
|
"step": 63900 |
|
}, |
|
{ |
|
"epoch": 2.476492667259993, |
|
"grad_norm": 1.7004306316375732, |
|
"learning_rate": 1.9046859884688312e-05, |
|
"loss": 0.3927, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.4803621870525867, |
|
"grad_norm": 3.754396677017212, |
|
"learning_rate": 1.9039120845103125e-05, |
|
"loss": 0.4147, |
|
"step": 64100 |
|
}, |
|
{ |
|
"epoch": 2.4842317068451805, |
|
"grad_norm": 1.993538737297058, |
|
"learning_rate": 1.9031381805517938e-05, |
|
"loss": 0.4388, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 2.488101226637774, |
|
"grad_norm": 2.2600507736206055, |
|
"learning_rate": 1.9023642765932748e-05, |
|
"loss": 0.4126, |
|
"step": 64300 |
|
}, |
|
{ |
|
"epoch": 2.491970746430368, |
|
"grad_norm": 2.028188943862915, |
|
"learning_rate": 1.901590372634756e-05, |
|
"loss": 0.4029, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 2.4958402662229617, |
|
"grad_norm": 1.467639446258545, |
|
"learning_rate": 1.9008164686762374e-05, |
|
"loss": 0.424, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 2.4997097860155555, |
|
"grad_norm": 1.876083493232727, |
|
"learning_rate": 1.9000425647177187e-05, |
|
"loss": 0.4178, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 2.4999032620051853, |
|
"eval_loss": 0.28442877531051636, |
|
"eval_runtime": 71.3333, |
|
"eval_samples_per_second": 29.285, |
|
"eval_steps_per_second": 3.673, |
|
"step": 64605 |
|
}, |
|
{ |
|
"epoch": 2.5035793058081492, |
|
"grad_norm": 1.7402297258377075, |
|
"learning_rate": 1.8992686607592e-05, |
|
"loss": 0.4084, |
|
"step": 64700 |
|
}, |
|
{ |
|
"epoch": 2.507448825600743, |
|
"grad_norm": 1.4921330213546753, |
|
"learning_rate": 1.8984947568006813e-05, |
|
"loss": 0.4246, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 2.5113183453933368, |
|
"grad_norm": 2.096726179122925, |
|
"learning_rate": 1.8977208528421623e-05, |
|
"loss": 0.411, |
|
"step": 64900 |
|
}, |
|
{ |
|
"epoch": 2.5151878651859305, |
|
"grad_norm": 1.0920313596725464, |
|
"learning_rate": 1.8969469488836436e-05, |
|
"loss": 0.416, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.5190573849785243, |
|
"grad_norm": 1.7869759798049927, |
|
"learning_rate": 1.896173044925125e-05, |
|
"loss": 0.4182, |
|
"step": 65100 |
|
}, |
|
{ |
|
"epoch": 2.522926904771118, |
|
"grad_norm": 1.4348342418670654, |
|
"learning_rate": 1.8953991409666062e-05, |
|
"loss": 0.4053, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 2.526796424563712, |
|
"grad_norm": 1.2327197790145874, |
|
"learning_rate": 1.8946252370080876e-05, |
|
"loss": 0.3838, |
|
"step": 65300 |
|
}, |
|
{ |
|
"epoch": 2.5306659443563055, |
|
"grad_norm": 1.9678161144256592, |
|
"learning_rate": 1.893851333049569e-05, |
|
"loss": 0.4049, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 2.534535464148899, |
|
"grad_norm": 1.6470235586166382, |
|
"learning_rate": 1.89307742909105e-05, |
|
"loss": 0.4429, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 2.5384049839414926, |
|
"grad_norm": 1.9904509782791138, |
|
"learning_rate": 1.892303525132531e-05, |
|
"loss": 0.3835, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 2.5422745037340864, |
|
"grad_norm": 1.6979745626449585, |
|
"learning_rate": 1.8915296211740125e-05, |
|
"loss": 0.4051, |
|
"step": 65700 |
|
}, |
|
{ |
|
"epoch": 2.54614402352668, |
|
"grad_norm": 1.5223089456558228, |
|
"learning_rate": 1.8907557172154938e-05, |
|
"loss": 0.3995, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 2.550013543319274, |
|
"grad_norm": 1.0474973917007446, |
|
"learning_rate": 1.889981813256975e-05, |
|
"loss": 0.3959, |
|
"step": 65900 |
|
}, |
|
{ |
|
"epoch": 2.5538830631118676, |
|
"grad_norm": 1.2941759824752808, |
|
"learning_rate": 1.8892079092984564e-05, |
|
"loss": 0.4084, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.5577525829044614, |
|
"grad_norm": 2.273611545562744, |
|
"learning_rate": 1.8884340053399374e-05, |
|
"loss": 0.4051, |
|
"step": 66100 |
|
}, |
|
{ |
|
"epoch": 2.561622102697055, |
|
"grad_norm": 2.124966859817505, |
|
"learning_rate": 1.8876601013814187e-05, |
|
"loss": 0.4022, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 2.565491622489649, |
|
"grad_norm": 2.5645906925201416, |
|
"learning_rate": 1.8868861974229e-05, |
|
"loss": 0.4108, |
|
"step": 66300 |
|
}, |
|
{ |
|
"epoch": 2.5693611422822427, |
|
"grad_norm": 1.5864262580871582, |
|
"learning_rate": 1.8861122934643813e-05, |
|
"loss": 0.402, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 2.5732306620748364, |
|
"grad_norm": 1.5741071701049805, |
|
"learning_rate": 1.8853383895058626e-05, |
|
"loss": 0.4196, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 2.57710018186743, |
|
"grad_norm": 1.3481132984161377, |
|
"learning_rate": 1.8845644855473436e-05, |
|
"loss": 0.4125, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 2.580969701660024, |
|
"grad_norm": 2.292896032333374, |
|
"learning_rate": 1.883790581588825e-05, |
|
"loss": 0.3986, |
|
"step": 66700 |
|
}, |
|
{ |
|
"epoch": 2.5848392214526177, |
|
"grad_norm": 1.835115909576416, |
|
"learning_rate": 1.8830166776303062e-05, |
|
"loss": 0.4114, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 2.5887087412452114, |
|
"grad_norm": 2.9787259101867676, |
|
"learning_rate": 1.8822427736717875e-05, |
|
"loss": 0.4039, |
|
"step": 66900 |
|
}, |
|
{ |
|
"epoch": 2.592578261037805, |
|
"grad_norm": 1.9280844926834106, |
|
"learning_rate": 1.8814688697132688e-05, |
|
"loss": 0.4032, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 2.596447780830399, |
|
"grad_norm": 1.6325422525405884, |
|
"learning_rate": 1.88069496575475e-05, |
|
"loss": 0.42, |
|
"step": 67100 |
|
}, |
|
{ |
|
"epoch": 2.6003173006229927, |
|
"grad_norm": 1.9253751039505005, |
|
"learning_rate": 1.8799210617962314e-05, |
|
"loss": 0.4082, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 2.6041868204155865, |
|
"grad_norm": 2.5450963973999023, |
|
"learning_rate": 1.8791471578377124e-05, |
|
"loss": 0.4055, |
|
"step": 67300 |
|
}, |
|
{ |
|
"epoch": 2.6080563402081802, |
|
"grad_norm": 1.7384142875671387, |
|
"learning_rate": 1.8783732538791937e-05, |
|
"loss": 0.4253, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 2.611925860000774, |
|
"grad_norm": 2.371424913406372, |
|
"learning_rate": 1.877599349920675e-05, |
|
"loss": 0.4062, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 2.6157953797933677, |
|
"grad_norm": 1.8545641899108887, |
|
"learning_rate": 1.8768254459621563e-05, |
|
"loss": 0.4301, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 2.6196648995859615, |
|
"grad_norm": 2.011378765106201, |
|
"learning_rate": 1.8760515420036376e-05, |
|
"loss": 0.4081, |
|
"step": 67700 |
|
}, |
|
{ |
|
"epoch": 2.6235344193785552, |
|
"grad_norm": 1.7614511251449585, |
|
"learning_rate": 1.8752776380451186e-05, |
|
"loss": 0.3925, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 2.627403939171149, |
|
"grad_norm": 2.2085518836975098, |
|
"learning_rate": 1.8745037340866e-05, |
|
"loss": 0.4054, |
|
"step": 67900 |
|
}, |
|
{ |
|
"epoch": 2.6312734589637428, |
|
"grad_norm": 1.7185721397399902, |
|
"learning_rate": 1.8737298301280812e-05, |
|
"loss": 0.4194, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 2.6351429787563365, |
|
"grad_norm": 1.3710312843322754, |
|
"learning_rate": 1.8729559261695626e-05, |
|
"loss": 0.3902, |
|
"step": 68100 |
|
}, |
|
{ |
|
"epoch": 2.6390124985489303, |
|
"grad_norm": 2.2787375450134277, |
|
"learning_rate": 1.872182022211044e-05, |
|
"loss": 0.4027, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 2.642882018341524, |
|
"grad_norm": 1.1309306621551514, |
|
"learning_rate": 1.871408118252525e-05, |
|
"loss": 0.3825, |
|
"step": 68300 |
|
}, |
|
{ |
|
"epoch": 2.646751538134118, |
|
"grad_norm": 1.4846229553222656, |
|
"learning_rate": 1.870634214294006e-05, |
|
"loss": 0.4127, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 2.6506210579267115, |
|
"grad_norm": 1.4689130783081055, |
|
"learning_rate": 1.8698603103354875e-05, |
|
"loss": 0.4235, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 2.6544905777193053, |
|
"grad_norm": 2.0043463706970215, |
|
"learning_rate": 1.8690864063769688e-05, |
|
"loss": 0.4108, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 2.6583600975118986, |
|
"grad_norm": 1.7952332496643066, |
|
"learning_rate": 1.86831250241845e-05, |
|
"loss": 0.4145, |
|
"step": 68700 |
|
}, |
|
{ |
|
"epoch": 2.6622296173044924, |
|
"grad_norm": 1.4483826160430908, |
|
"learning_rate": 1.8675385984599314e-05, |
|
"loss": 0.3894, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 2.666099137097086, |
|
"grad_norm": 1.6657880544662476, |
|
"learning_rate": 1.8667646945014127e-05, |
|
"loss": 0.3807, |
|
"step": 68900 |
|
}, |
|
{ |
|
"epoch": 2.66996865688968, |
|
"grad_norm": 1.7913455963134766, |
|
"learning_rate": 1.8659907905428937e-05, |
|
"loss": 0.3962, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.6738381766822736, |
|
"grad_norm": 3.517664670944214, |
|
"learning_rate": 1.865216886584375e-05, |
|
"loss": 0.4053, |
|
"step": 69100 |
|
}, |
|
{ |
|
"epoch": 2.6777076964748674, |
|
"grad_norm": 1.5401078462600708, |
|
"learning_rate": 1.8644429826258563e-05, |
|
"loss": 0.4013, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 2.681577216267461, |
|
"grad_norm": 1.832924485206604, |
|
"learning_rate": 1.8636690786673376e-05, |
|
"loss": 0.4001, |
|
"step": 69300 |
|
}, |
|
{ |
|
"epoch": 2.685446736060055, |
|
"grad_norm": 1.508527398109436, |
|
"learning_rate": 1.862895174708819e-05, |
|
"loss": 0.3918, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 2.6893162558526487, |
|
"grad_norm": 1.329871654510498, |
|
"learning_rate": 1.8621212707503e-05, |
|
"loss": 0.4053, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 2.6931857756452424, |
|
"grad_norm": 1.6876091957092285, |
|
"learning_rate": 1.8613473667917812e-05, |
|
"loss": 0.4068, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 2.697055295437836, |
|
"grad_norm": 2.244474411010742, |
|
"learning_rate": 1.8605734628332625e-05, |
|
"loss": 0.3733, |
|
"step": 69700 |
|
}, |
|
{ |
|
"epoch": 2.70092481523043, |
|
"grad_norm": 1.6640311479568481, |
|
"learning_rate": 1.8597995588747438e-05, |
|
"loss": 0.4118, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 2.7047943350230237, |
|
"grad_norm": 1.3973504304885864, |
|
"learning_rate": 1.859025654916225e-05, |
|
"loss": 0.3921, |
|
"step": 69900 |
|
}, |
|
{ |
|
"epoch": 2.7086638548156174, |
|
"grad_norm": 1.8491889238357544, |
|
"learning_rate": 1.8582517509577064e-05, |
|
"loss": 0.4114, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.712533374608211, |
|
"grad_norm": 2.136467933654785, |
|
"learning_rate": 1.8574778469991877e-05, |
|
"loss": 0.3907, |
|
"step": 70100 |
|
}, |
|
{ |
|
"epoch": 2.716402894400805, |
|
"grad_norm": 1.5678136348724365, |
|
"learning_rate": 1.8567039430406687e-05, |
|
"loss": 0.3753, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 2.7202724141933987, |
|
"grad_norm": 2.590696096420288, |
|
"learning_rate": 1.85593003908215e-05, |
|
"loss": 0.378, |
|
"step": 70300 |
|
}, |
|
{ |
|
"epoch": 2.7241419339859925, |
|
"grad_norm": 1.5565811395645142, |
|
"learning_rate": 1.8551561351236313e-05, |
|
"loss": 0.3704, |
|
"step": 70400 |
|
}, |
|
{ |
|
"epoch": 2.7280114537785862, |
|
"grad_norm": 0.8795768618583679, |
|
"learning_rate": 1.8543822311651126e-05, |
|
"loss": 0.4054, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 2.73188097357118, |
|
"grad_norm": 1.5178683996200562, |
|
"learning_rate": 1.853608327206594e-05, |
|
"loss": 0.3753, |
|
"step": 70600 |
|
}, |
|
{ |
|
"epoch": 2.7357504933637733, |
|
"grad_norm": 1.8956971168518066, |
|
"learning_rate": 1.852834423248075e-05, |
|
"loss": 0.3957, |
|
"step": 70700 |
|
}, |
|
{ |
|
"epoch": 2.739620013156367, |
|
"grad_norm": 2.690990447998047, |
|
"learning_rate": 1.8520605192895562e-05, |
|
"loss": 0.4068, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 2.743489532948961, |
|
"grad_norm": 2.114319086074829, |
|
"learning_rate": 1.8512866153310375e-05, |
|
"loss": 0.3945, |
|
"step": 70900 |
|
}, |
|
{ |
|
"epoch": 2.7473590527415546, |
|
"grad_norm": 1.9230573177337646, |
|
"learning_rate": 1.850512711372519e-05, |
|
"loss": 0.4104, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 2.7512285725341483, |
|
"grad_norm": 1.365560531616211, |
|
"learning_rate": 1.8497388074140002e-05, |
|
"loss": 0.3866, |
|
"step": 71100 |
|
}, |
|
{ |
|
"epoch": 2.755098092326742, |
|
"grad_norm": 1.803850769996643, |
|
"learning_rate": 1.848964903455481e-05, |
|
"loss": 0.3915, |
|
"step": 71200 |
|
}, |
|
{ |
|
"epoch": 2.758967612119336, |
|
"grad_norm": 1.6910895109176636, |
|
"learning_rate": 1.8481909994969628e-05, |
|
"loss": 0.416, |
|
"step": 71300 |
|
}, |
|
{ |
|
"epoch": 2.7628371319119296, |
|
"grad_norm": 1.7176426649093628, |
|
"learning_rate": 1.8474170955384438e-05, |
|
"loss": 0.3913, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 2.7667066517045233, |
|
"grad_norm": 1.2521884441375732, |
|
"learning_rate": 1.846643191579925e-05, |
|
"loss": 0.3802, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 2.770576171497117, |
|
"grad_norm": 1.5698885917663574, |
|
"learning_rate": 1.8458692876214064e-05, |
|
"loss": 0.3964, |
|
"step": 71600 |
|
}, |
|
{ |
|
"epoch": 2.774445691289711, |
|
"grad_norm": 1.6318507194519043, |
|
"learning_rate": 1.8450953836628877e-05, |
|
"loss": 0.3688, |
|
"step": 71700 |
|
}, |
|
{ |
|
"epoch": 2.7783152110823046, |
|
"grad_norm": 2.5745694637298584, |
|
"learning_rate": 1.844321479704369e-05, |
|
"loss": 0.3796, |
|
"step": 71800 |
|
}, |
|
{ |
|
"epoch": 2.7821847308748984, |
|
"grad_norm": 2.249467134475708, |
|
"learning_rate": 1.84354757574585e-05, |
|
"loss": 0.4085, |
|
"step": 71900 |
|
}, |
|
{ |
|
"epoch": 2.786054250667492, |
|
"grad_norm": 1.1853622198104858, |
|
"learning_rate": 1.8427736717873313e-05, |
|
"loss": 0.3894, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.789923770460086, |
|
"grad_norm": 1.2344344854354858, |
|
"learning_rate": 1.8419997678288126e-05, |
|
"loss": 0.3709, |
|
"step": 72100 |
|
}, |
|
{ |
|
"epoch": 2.7937932902526796, |
|
"grad_norm": 1.520385980606079, |
|
"learning_rate": 1.841225863870294e-05, |
|
"loss": 0.3874, |
|
"step": 72200 |
|
}, |
|
{ |
|
"epoch": 2.7976628100452734, |
|
"grad_norm": 2.7006897926330566, |
|
"learning_rate": 1.8404519599117752e-05, |
|
"loss": 0.3745, |
|
"step": 72300 |
|
}, |
|
{ |
|
"epoch": 2.801532329837867, |
|
"grad_norm": 2.2351534366607666, |
|
"learning_rate": 1.8396780559532562e-05, |
|
"loss": 0.3751, |
|
"step": 72400 |
|
}, |
|
{ |
|
"epoch": 2.805401849630461, |
|
"grad_norm": 2.515935182571411, |
|
"learning_rate": 1.838904151994738e-05, |
|
"loss": 0.3717, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 2.8092713694230547, |
|
"grad_norm": 1.3921540975570679, |
|
"learning_rate": 1.8381302480362188e-05, |
|
"loss": 0.3742, |
|
"step": 72600 |
|
}, |
|
{ |
|
"epoch": 2.8131408892156484, |
|
"grad_norm": 3.017775058746338, |
|
"learning_rate": 1.8373563440777e-05, |
|
"loss": 0.384, |
|
"step": 72700 |
|
}, |
|
{ |
|
"epoch": 2.817010409008242, |
|
"grad_norm": 2.8901004791259766, |
|
"learning_rate": 1.8365824401191814e-05, |
|
"loss": 0.3757, |
|
"step": 72800 |
|
}, |
|
{ |
|
"epoch": 2.820879928800836, |
|
"grad_norm": 1.0690851211547852, |
|
"learning_rate": 1.8358085361606624e-05, |
|
"loss": 0.3878, |
|
"step": 72900 |
|
}, |
|
{ |
|
"epoch": 2.8247494485934297, |
|
"grad_norm": 1.4159176349639893, |
|
"learning_rate": 1.835034632202144e-05, |
|
"loss": 0.3976, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 2.8286189683860234, |
|
"grad_norm": 1.8387011289596558, |
|
"learning_rate": 1.834260728243625e-05, |
|
"loss": 0.4264, |
|
"step": 73100 |
|
}, |
|
{ |
|
"epoch": 2.832488488178617, |
|
"grad_norm": 2.336967706680298, |
|
"learning_rate": 1.8334868242851063e-05, |
|
"loss": 0.4015, |
|
"step": 73200 |
|
}, |
|
{ |
|
"epoch": 2.836358007971211, |
|
"grad_norm": 2.210538864135742, |
|
"learning_rate": 1.8327129203265876e-05, |
|
"loss": 0.402, |
|
"step": 73300 |
|
}, |
|
{ |
|
"epoch": 2.8402275277638047, |
|
"grad_norm": 1.7943260669708252, |
|
"learning_rate": 1.831939016368069e-05, |
|
"loss": 0.3829, |
|
"step": 73400 |
|
}, |
|
{ |
|
"epoch": 2.8440970475563985, |
|
"grad_norm": 2.171783447265625, |
|
"learning_rate": 1.8311651124095503e-05, |
|
"loss": 0.3925, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 2.8479665673489922, |
|
"grad_norm": 1.937455654144287, |
|
"learning_rate": 1.8303912084510312e-05, |
|
"loss": 0.3731, |
|
"step": 73600 |
|
}, |
|
{ |
|
"epoch": 2.851836087141586, |
|
"grad_norm": 2.4677200317382812, |
|
"learning_rate": 1.829617304492513e-05, |
|
"loss": 0.3792, |
|
"step": 73700 |
|
}, |
|
{ |
|
"epoch": 2.8557056069341797, |
|
"grad_norm": 1.6288717985153198, |
|
"learning_rate": 1.828843400533994e-05, |
|
"loss": 0.4079, |
|
"step": 73800 |
|
}, |
|
{ |
|
"epoch": 2.859575126726773, |
|
"grad_norm": 1.2947713136672974, |
|
"learning_rate": 1.828069496575475e-05, |
|
"loss": 0.3677, |
|
"step": 73900 |
|
}, |
|
{ |
|
"epoch": 2.863444646519367, |
|
"grad_norm": 2.0474677085876465, |
|
"learning_rate": 1.8272955926169565e-05, |
|
"loss": 0.3969, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 2.8673141663119606, |
|
"grad_norm": 0.9555093050003052, |
|
"learning_rate": 1.8265216886584374e-05, |
|
"loss": 0.3791, |
|
"step": 74100 |
|
}, |
|
{ |
|
"epoch": 2.8711836861045543, |
|
"grad_norm": 2.7177960872650146, |
|
"learning_rate": 1.825747784699919e-05, |
|
"loss": 0.3899, |
|
"step": 74200 |
|
}, |
|
{ |
|
"epoch": 2.875053205897148, |
|
"grad_norm": 1.8301888704299927, |
|
"learning_rate": 1.8249738807414e-05, |
|
"loss": 0.3703, |
|
"step": 74300 |
|
}, |
|
{ |
|
"epoch": 2.878922725689742, |
|
"grad_norm": 1.6821845769882202, |
|
"learning_rate": 1.8241999767828814e-05, |
|
"loss": 0.3744, |
|
"step": 74400 |
|
}, |
|
{ |
|
"epoch": 2.8827922454823356, |
|
"grad_norm": 1.0236321687698364, |
|
"learning_rate": 1.8234260728243627e-05, |
|
"loss": 0.3849, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 2.8866617652749293, |
|
"grad_norm": 1.5947498083114624, |
|
"learning_rate": 1.822652168865844e-05, |
|
"loss": 0.3727, |
|
"step": 74600 |
|
}, |
|
{ |
|
"epoch": 2.890531285067523, |
|
"grad_norm": 2.4842593669891357, |
|
"learning_rate": 1.8218782649073253e-05, |
|
"loss": 0.3939, |
|
"step": 74700 |
|
}, |
|
{ |
|
"epoch": 2.894400804860117, |
|
"grad_norm": 2.366248607635498, |
|
"learning_rate": 1.8211043609488063e-05, |
|
"loss": 0.3731, |
|
"step": 74800 |
|
}, |
|
{ |
|
"epoch": 2.8982703246527106, |
|
"grad_norm": 1.7339088916778564, |
|
"learning_rate": 1.820330456990288e-05, |
|
"loss": 0.404, |
|
"step": 74900 |
|
}, |
|
{ |
|
"epoch": 2.9021398444453044, |
|
"grad_norm": 3.6488988399505615, |
|
"learning_rate": 1.819556553031769e-05, |
|
"loss": 0.3875, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.906009364237898, |
|
"grad_norm": 1.3778159618377686, |
|
"learning_rate": 1.8187826490732502e-05, |
|
"loss": 0.3858, |
|
"step": 75100 |
|
}, |
|
{ |
|
"epoch": 2.909878884030492, |
|
"grad_norm": 1.6373904943466187, |
|
"learning_rate": 1.8180087451147315e-05, |
|
"loss": 0.3784, |
|
"step": 75200 |
|
}, |
|
{ |
|
"epoch": 2.9137484038230856, |
|
"grad_norm": 5.406473636627197, |
|
"learning_rate": 1.8172348411562125e-05, |
|
"loss": 0.3787, |
|
"step": 75300 |
|
}, |
|
{ |
|
"epoch": 2.9176179236156794, |
|
"grad_norm": 1.9427270889282227, |
|
"learning_rate": 1.816460937197694e-05, |
|
"loss": 0.4024, |
|
"step": 75400 |
|
}, |
|
{ |
|
"epoch": 2.921487443408273, |
|
"grad_norm": 2.2424678802490234, |
|
"learning_rate": 1.815687033239175e-05, |
|
"loss": 0.3657, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 2.925356963200867, |
|
"grad_norm": 1.9070557355880737, |
|
"learning_rate": 1.8149131292806564e-05, |
|
"loss": 0.3687, |
|
"step": 75600 |
|
}, |
|
{ |
|
"epoch": 2.9292264829934607, |
|
"grad_norm": 1.7725392580032349, |
|
"learning_rate": 1.8141392253221377e-05, |
|
"loss": 0.3735, |
|
"step": 75700 |
|
}, |
|
{ |
|
"epoch": 2.933096002786054, |
|
"grad_norm": 1.7941452264785767, |
|
"learning_rate": 1.8133653213636187e-05, |
|
"loss": 0.3861, |
|
"step": 75800 |
|
}, |
|
{ |
|
"epoch": 2.9369655225786477, |
|
"grad_norm": 2.008236885070801, |
|
"learning_rate": 1.8125914174051004e-05, |
|
"loss": 0.378, |
|
"step": 75900 |
|
}, |
|
{ |
|
"epoch": 2.9408350423712415, |
|
"grad_norm": 1.4745265245437622, |
|
"learning_rate": 1.8118175134465813e-05, |
|
"loss": 0.369, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 2.9447045621638352, |
|
"grad_norm": 1.3864421844482422, |
|
"learning_rate": 1.8110436094880626e-05, |
|
"loss": 0.371, |
|
"step": 76100 |
|
}, |
|
{ |
|
"epoch": 2.948574081956429, |
|
"grad_norm": 2.3478002548217773, |
|
"learning_rate": 1.810269705529544e-05, |
|
"loss": 0.3923, |
|
"step": 76200 |
|
}, |
|
{ |
|
"epoch": 2.9524436017490228, |
|
"grad_norm": 1.6446783542633057, |
|
"learning_rate": 1.8094958015710253e-05, |
|
"loss": 0.3759, |
|
"step": 76300 |
|
}, |
|
{ |
|
"epoch": 2.9563131215416165, |
|
"grad_norm": 6.377575874328613, |
|
"learning_rate": 1.8087218976125066e-05, |
|
"loss": 0.376, |
|
"step": 76400 |
|
}, |
|
{ |
|
"epoch": 2.9601826413342103, |
|
"grad_norm": 1.960789680480957, |
|
"learning_rate": 1.8079479936539875e-05, |
|
"loss": 0.3874, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 2.964052161126804, |
|
"grad_norm": 1.6182048320770264, |
|
"learning_rate": 1.8071740896954692e-05, |
|
"loss": 0.3949, |
|
"step": 76600 |
|
}, |
|
{ |
|
"epoch": 2.967921680919398, |
|
"grad_norm": 1.5921803712844849, |
|
"learning_rate": 1.80640018573695e-05, |
|
"loss": 0.3579, |
|
"step": 76700 |
|
}, |
|
{ |
|
"epoch": 2.9717912007119915, |
|
"grad_norm": 1.7049123048782349, |
|
"learning_rate": 1.8056262817784315e-05, |
|
"loss": 0.3704, |
|
"step": 76800 |
|
}, |
|
{ |
|
"epoch": 2.9756607205045853, |
|
"grad_norm": 1.4930731058120728, |
|
"learning_rate": 1.8048523778199128e-05, |
|
"loss": 0.357, |
|
"step": 76900 |
|
}, |
|
{ |
|
"epoch": 2.979530240297179, |
|
"grad_norm": 2.8269336223602295, |
|
"learning_rate": 1.8040784738613938e-05, |
|
"loss": 0.3747, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 2.983399760089773, |
|
"grad_norm": 3.652132987976074, |
|
"learning_rate": 1.8033045699028754e-05, |
|
"loss": 0.3693, |
|
"step": 77100 |
|
}, |
|
{ |
|
"epoch": 2.9872692798823666, |
|
"grad_norm": 1.7305335998535156, |
|
"learning_rate": 1.8025306659443564e-05, |
|
"loss": 0.3689, |
|
"step": 77200 |
|
}, |
|
{ |
|
"epoch": 2.9911387996749603, |
|
"grad_norm": 1.245302438735962, |
|
"learning_rate": 1.8017567619858377e-05, |
|
"loss": 0.3731, |
|
"step": 77300 |
|
}, |
|
{ |
|
"epoch": 2.995008319467554, |
|
"grad_norm": 1.4806208610534668, |
|
"learning_rate": 1.800982858027319e-05, |
|
"loss": 0.3532, |
|
"step": 77400 |
|
}, |
|
{ |
|
"epoch": 2.998877839260148, |
|
"grad_norm": 1.8243787288665771, |
|
"learning_rate": 1.8002089540688003e-05, |
|
"loss": 0.3875, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 2.9998839144062224, |
|
"eval_loss": 0.2676403522491455, |
|
"eval_runtime": 71.2462, |
|
"eval_samples_per_second": 29.321, |
|
"eval_steps_per_second": 3.677, |
|
"step": 77526 |
|
}, |
|
{ |
|
"epoch": 3.0027473590527416, |
|
"grad_norm": 1.5966713428497314, |
|
"learning_rate": 1.9994273110706964e-05, |
|
"loss": 0.3706, |
|
"step": 77600 |
|
}, |
|
{ |
|
"epoch": 3.0066168788453353, |
|
"grad_norm": 0.9762176275253296, |
|
"learning_rate": 1.9986534071121774e-05, |
|
"loss": 0.3468, |
|
"step": 77700 |
|
}, |
|
{ |
|
"epoch": 3.010486398637929, |
|
"grad_norm": 1.1284077167510986, |
|
"learning_rate": 1.9978795031536587e-05, |
|
"loss": 0.346, |
|
"step": 77800 |
|
}, |
|
{ |
|
"epoch": 3.014355918430523, |
|
"grad_norm": 2.162651538848877, |
|
"learning_rate": 1.99710559919514e-05, |
|
"loss": 0.3852, |
|
"step": 77900 |
|
}, |
|
{ |
|
"epoch": 3.0182254382231166, |
|
"grad_norm": 1.472206711769104, |
|
"learning_rate": 1.9963316952366213e-05, |
|
"loss": 0.3737, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 3.0220949580157104, |
|
"grad_norm": 1.6894917488098145, |
|
"learning_rate": 1.9955577912781027e-05, |
|
"loss": 0.3638, |
|
"step": 78100 |
|
}, |
|
{ |
|
"epoch": 3.025964477808304, |
|
"grad_norm": 1.6989420652389526, |
|
"learning_rate": 1.9947838873195836e-05, |
|
"loss": 0.3866, |
|
"step": 78200 |
|
}, |
|
{ |
|
"epoch": 3.029833997600898, |
|
"grad_norm": 1.79862642288208, |
|
"learning_rate": 1.994009983361065e-05, |
|
"loss": 0.3874, |
|
"step": 78300 |
|
}, |
|
{ |
|
"epoch": 3.0337035173934916, |
|
"grad_norm": 2.368971586227417, |
|
"learning_rate": 1.9932360794025462e-05, |
|
"loss": 0.3828, |
|
"step": 78400 |
|
}, |
|
{ |
|
"epoch": 3.0375730371860854, |
|
"grad_norm": 1.8010534048080444, |
|
"learning_rate": 1.9924621754440276e-05, |
|
"loss": 0.3659, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 3.041442556978679, |
|
"grad_norm": 1.9091135263442993, |
|
"learning_rate": 1.991688271485509e-05, |
|
"loss": 0.351, |
|
"step": 78600 |
|
}, |
|
{ |
|
"epoch": 3.0453120767712725, |
|
"grad_norm": 1.708998203277588, |
|
"learning_rate": 1.99091436752699e-05, |
|
"loss": 0.3696, |
|
"step": 78700 |
|
}, |
|
{ |
|
"epoch": 3.0491815965638662, |
|
"grad_norm": 1.80574369430542, |
|
"learning_rate": 1.9901404635684715e-05, |
|
"loss": 0.3939, |
|
"step": 78800 |
|
}, |
|
{ |
|
"epoch": 3.05305111635646, |
|
"grad_norm": 1.2384390830993652, |
|
"learning_rate": 1.9893665596099525e-05, |
|
"loss": 0.3512, |
|
"step": 78900 |
|
}, |
|
{ |
|
"epoch": 3.0569206361490537, |
|
"grad_norm": 2.3889994621276855, |
|
"learning_rate": 1.9885926556514338e-05, |
|
"loss": 0.3601, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 3.0607901559416475, |
|
"grad_norm": 1.5315697193145752, |
|
"learning_rate": 1.987818751692915e-05, |
|
"loss": 0.3889, |
|
"step": 79100 |
|
}, |
|
{ |
|
"epoch": 3.0646596757342413, |
|
"grad_norm": 3.5246341228485107, |
|
"learning_rate": 1.9870448477343964e-05, |
|
"loss": 0.3785, |
|
"step": 79200 |
|
}, |
|
{ |
|
"epoch": 3.068529195526835, |
|
"grad_norm": 1.7777693271636963, |
|
"learning_rate": 1.9862709437758777e-05, |
|
"loss": 0.3744, |
|
"step": 79300 |
|
}, |
|
{ |
|
"epoch": 3.0723987153194288, |
|
"grad_norm": 1.318174123764038, |
|
"learning_rate": 1.9854970398173587e-05, |
|
"loss": 0.3681, |
|
"step": 79400 |
|
}, |
|
{ |
|
"epoch": 3.0762682351120225, |
|
"grad_norm": 1.5280954837799072, |
|
"learning_rate": 1.98472313585884e-05, |
|
"loss": 0.3844, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 3.0801377549046163, |
|
"grad_norm": 2.24074649810791, |
|
"learning_rate": 1.9839492319003213e-05, |
|
"loss": 0.3634, |
|
"step": 79600 |
|
}, |
|
{ |
|
"epoch": 3.08400727469721, |
|
"grad_norm": 1.337833285331726, |
|
"learning_rate": 1.9831753279418026e-05, |
|
"loss": 0.3586, |
|
"step": 79700 |
|
}, |
|
{ |
|
"epoch": 3.087876794489804, |
|
"grad_norm": 2.0881550312042236, |
|
"learning_rate": 1.982401423983284e-05, |
|
"loss": 0.3659, |
|
"step": 79800 |
|
}, |
|
{ |
|
"epoch": 3.0917463142823975, |
|
"grad_norm": 1.7096840143203735, |
|
"learning_rate": 1.981627520024765e-05, |
|
"loss": 0.3691, |
|
"step": 79900 |
|
}, |
|
{ |
|
"epoch": 3.0956158340749913, |
|
"grad_norm": 1.1658939123153687, |
|
"learning_rate": 1.9808536160662465e-05, |
|
"loss": 0.3816, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 3.099485353867585, |
|
"grad_norm": 1.7050002813339233, |
|
"learning_rate": 1.9800797121077275e-05, |
|
"loss": 0.3606, |
|
"step": 80100 |
|
}, |
|
{ |
|
"epoch": 3.103354873660179, |
|
"grad_norm": 1.3548462390899658, |
|
"learning_rate": 1.9793058081492088e-05, |
|
"loss": 0.3597, |
|
"step": 80200 |
|
}, |
|
{ |
|
"epoch": 3.1072243934527726, |
|
"grad_norm": 2.5295116901397705, |
|
"learning_rate": 1.97853190419069e-05, |
|
"loss": 0.3689, |
|
"step": 80300 |
|
}, |
|
{ |
|
"epoch": 3.1110939132453663, |
|
"grad_norm": 1.1832600831985474, |
|
"learning_rate": 1.9777580002321714e-05, |
|
"loss": 0.365, |
|
"step": 80400 |
|
}, |
|
{ |
|
"epoch": 3.11496343303796, |
|
"grad_norm": 1.9550867080688477, |
|
"learning_rate": 1.9769840962736527e-05, |
|
"loss": 0.3754, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 3.118832952830554, |
|
"grad_norm": 2.029646396636963, |
|
"learning_rate": 1.9762101923151337e-05, |
|
"loss": 0.3537, |
|
"step": 80600 |
|
}, |
|
{ |
|
"epoch": 3.1227024726231476, |
|
"grad_norm": 1.465968370437622, |
|
"learning_rate": 1.975436288356615e-05, |
|
"loss": 0.3779, |
|
"step": 80700 |
|
}, |
|
{ |
|
"epoch": 3.1265719924157414, |
|
"grad_norm": 1.8936628103256226, |
|
"learning_rate": 1.9746623843980963e-05, |
|
"loss": 0.3781, |
|
"step": 80800 |
|
}, |
|
{ |
|
"epoch": 3.130441512208335, |
|
"grad_norm": 2.382840633392334, |
|
"learning_rate": 1.9738884804395777e-05, |
|
"loss": 0.3727, |
|
"step": 80900 |
|
}, |
|
{ |
|
"epoch": 3.134311032000929, |
|
"grad_norm": 2.595386028289795, |
|
"learning_rate": 1.973114576481059e-05, |
|
"loss": 0.3537, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 3.1381805517935226, |
|
"grad_norm": 2.3016576766967773, |
|
"learning_rate": 1.97234067252254e-05, |
|
"loss": 0.3797, |
|
"step": 81100 |
|
}, |
|
{ |
|
"epoch": 3.142050071586116, |
|
"grad_norm": 1.0764691829681396, |
|
"learning_rate": 1.9715667685640212e-05, |
|
"loss": 0.3753, |
|
"step": 81200 |
|
}, |
|
{ |
|
"epoch": 3.1459195913787097, |
|
"grad_norm": 1.311075210571289, |
|
"learning_rate": 1.9707928646055026e-05, |
|
"loss": 0.3551, |
|
"step": 81300 |
|
}, |
|
{ |
|
"epoch": 3.1497891111713034, |
|
"grad_norm": 1.7169547080993652, |
|
"learning_rate": 1.970018960646984e-05, |
|
"loss": 0.3624, |
|
"step": 81400 |
|
}, |
|
{ |
|
"epoch": 3.153658630963897, |
|
"grad_norm": 1.833234429359436, |
|
"learning_rate": 1.9692450566884652e-05, |
|
"loss": 0.3975, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 3.157528150756491, |
|
"grad_norm": 1.6446950435638428, |
|
"learning_rate": 1.968471152729946e-05, |
|
"loss": 0.3693, |
|
"step": 81600 |
|
}, |
|
{ |
|
"epoch": 3.1613976705490847, |
|
"grad_norm": 1.723495364189148, |
|
"learning_rate": 1.9676972487714278e-05, |
|
"loss": 0.3572, |
|
"step": 81700 |
|
}, |
|
{ |
|
"epoch": 3.1652671903416785, |
|
"grad_norm": 1.0325422286987305, |
|
"learning_rate": 1.9669233448129088e-05, |
|
"loss": 0.3645, |
|
"step": 81800 |
|
}, |
|
{ |
|
"epoch": 3.1691367101342722, |
|
"grad_norm": 1.537728190422058, |
|
"learning_rate": 1.96614944085439e-05, |
|
"loss": 0.3693, |
|
"step": 81900 |
|
}, |
|
{ |
|
"epoch": 3.173006229926866, |
|
"grad_norm": 1.5777703523635864, |
|
"learning_rate": 1.9653755368958714e-05, |
|
"loss": 0.3552, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 3.1768757497194597, |
|
"grad_norm": 1.3765802383422852, |
|
"learning_rate": 1.9646016329373527e-05, |
|
"loss": 0.3835, |
|
"step": 82100 |
|
}, |
|
{ |
|
"epoch": 3.1807452695120535, |
|
"grad_norm": 1.3902156352996826, |
|
"learning_rate": 1.963827728978834e-05, |
|
"loss": 0.3577, |
|
"step": 82200 |
|
}, |
|
{ |
|
"epoch": 3.1846147893046473, |
|
"grad_norm": 1.3278142213821411, |
|
"learning_rate": 1.963053825020315e-05, |
|
"loss": 0.3794, |
|
"step": 82300 |
|
}, |
|
{ |
|
"epoch": 3.188484309097241, |
|
"grad_norm": 1.7168638706207275, |
|
"learning_rate": 1.9622799210617963e-05, |
|
"loss": 0.3522, |
|
"step": 82400 |
|
}, |
|
{ |
|
"epoch": 3.1923538288898348, |
|
"grad_norm": 1.9700740575790405, |
|
"learning_rate": 1.9615060171032776e-05, |
|
"loss": 0.3769, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 3.1962233486824285, |
|
"grad_norm": 1.5577361583709717, |
|
"learning_rate": 1.960732113144759e-05, |
|
"loss": 0.3537, |
|
"step": 82600 |
|
}, |
|
{ |
|
"epoch": 3.2000928684750223, |
|
"grad_norm": 1.9805132150650024, |
|
"learning_rate": 1.9599582091862402e-05, |
|
"loss": 0.3598, |
|
"step": 82700 |
|
}, |
|
{ |
|
"epoch": 3.203962388267616, |
|
"grad_norm": 2.008052110671997, |
|
"learning_rate": 1.9591843052277212e-05, |
|
"loss": 0.3595, |
|
"step": 82800 |
|
}, |
|
{ |
|
"epoch": 3.20783190806021, |
|
"grad_norm": 1.7605400085449219, |
|
"learning_rate": 1.958410401269203e-05, |
|
"loss": 0.3566, |
|
"step": 82900 |
|
}, |
|
{ |
|
"epoch": 3.2117014278528035, |
|
"grad_norm": 1.9822074174880981, |
|
"learning_rate": 1.9576364973106838e-05, |
|
"loss": 0.3541, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 3.2155709476453973, |
|
"grad_norm": 1.9497880935668945, |
|
"learning_rate": 1.956862593352165e-05, |
|
"loss": 0.3554, |
|
"step": 83100 |
|
}, |
|
{ |
|
"epoch": 3.219440467437991, |
|
"grad_norm": 1.1537199020385742, |
|
"learning_rate": 1.9560886893936464e-05, |
|
"loss": 0.3549, |
|
"step": 83200 |
|
}, |
|
{ |
|
"epoch": 3.223309987230585, |
|
"grad_norm": 1.6270828247070312, |
|
"learning_rate": 1.9553147854351277e-05, |
|
"loss": 0.3398, |
|
"step": 83300 |
|
}, |
|
{ |
|
"epoch": 3.2271795070231786, |
|
"grad_norm": 1.7348685264587402, |
|
"learning_rate": 1.954540881476609e-05, |
|
"loss": 0.3572, |
|
"step": 83400 |
|
}, |
|
{ |
|
"epoch": 3.2310490268157723, |
|
"grad_norm": 1.329280972480774, |
|
"learning_rate": 1.95376697751809e-05, |
|
"loss": 0.3615, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 3.234918546608366, |
|
"grad_norm": 1.6342438459396362, |
|
"learning_rate": 1.9529930735595713e-05, |
|
"loss": 0.3649, |
|
"step": 83600 |
|
}, |
|
{ |
|
"epoch": 3.23878806640096, |
|
"grad_norm": 1.7982897758483887, |
|
"learning_rate": 1.9522191696010526e-05, |
|
"loss": 0.3553, |
|
"step": 83700 |
|
}, |
|
{ |
|
"epoch": 3.2426575861935536, |
|
"grad_norm": 1.6109760999679565, |
|
"learning_rate": 1.951445265642534e-05, |
|
"loss": 0.378, |
|
"step": 83800 |
|
}, |
|
{ |
|
"epoch": 3.2465271059861474, |
|
"grad_norm": 1.328223466873169, |
|
"learning_rate": 1.9506713616840153e-05, |
|
"loss": 0.3589, |
|
"step": 83900 |
|
}, |
|
{ |
|
"epoch": 3.2503966257787407, |
|
"grad_norm": 1.47650146484375, |
|
"learning_rate": 1.9498974577254962e-05, |
|
"loss": 0.3491, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 3.2542661455713344, |
|
"grad_norm": 2.0688846111297607, |
|
"learning_rate": 1.9491235537669776e-05, |
|
"loss": 0.3624, |
|
"step": 84100 |
|
}, |
|
{ |
|
"epoch": 3.258135665363928, |
|
"grad_norm": 1.7907928228378296, |
|
"learning_rate": 1.948349649808459e-05, |
|
"loss": 0.3579, |
|
"step": 84200 |
|
}, |
|
{ |
|
"epoch": 3.262005185156522, |
|
"grad_norm": 1.5677344799041748, |
|
"learning_rate": 1.9475757458499402e-05, |
|
"loss": 0.3651, |
|
"step": 84300 |
|
}, |
|
{ |
|
"epoch": 3.2658747049491157, |
|
"grad_norm": 2.2685437202453613, |
|
"learning_rate": 1.9468018418914215e-05, |
|
"loss": 0.3469, |
|
"step": 84400 |
|
}, |
|
{ |
|
"epoch": 3.2697442247417094, |
|
"grad_norm": 1.3307223320007324, |
|
"learning_rate": 1.9460279379329028e-05, |
|
"loss": 0.3548, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 3.273613744534303, |
|
"grad_norm": 1.2336021661758423, |
|
"learning_rate": 1.945254033974384e-05, |
|
"loss": 0.3604, |
|
"step": 84600 |
|
}, |
|
{ |
|
"epoch": 3.277483264326897, |
|
"grad_norm": 1.3890929222106934, |
|
"learning_rate": 1.944480130015865e-05, |
|
"loss": 0.3435, |
|
"step": 84700 |
|
}, |
|
{ |
|
"epoch": 3.2813527841194907, |
|
"grad_norm": 1.5486915111541748, |
|
"learning_rate": 1.9437062260573464e-05, |
|
"loss": 0.3538, |
|
"step": 84800 |
|
}, |
|
{ |
|
"epoch": 3.2852223039120845, |
|
"grad_norm": 1.2610055208206177, |
|
"learning_rate": 1.9429323220988277e-05, |
|
"loss": 0.3595, |
|
"step": 84900 |
|
}, |
|
{ |
|
"epoch": 3.2890918237046782, |
|
"grad_norm": 1.2226618528366089, |
|
"learning_rate": 1.942158418140309e-05, |
|
"loss": 0.3478, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 3.292961343497272, |
|
"grad_norm": 2.4010233879089355, |
|
"learning_rate": 1.9413845141817903e-05, |
|
"loss": 0.3735, |
|
"step": 85100 |
|
}, |
|
{ |
|
"epoch": 3.2968308632898657, |
|
"grad_norm": 2.1810591220855713, |
|
"learning_rate": 1.9406106102232713e-05, |
|
"loss": 0.3493, |
|
"step": 85200 |
|
}, |
|
{ |
|
"epoch": 3.3007003830824595, |
|
"grad_norm": 1.3222867250442505, |
|
"learning_rate": 1.9398367062647526e-05, |
|
"loss": 0.3608, |
|
"step": 85300 |
|
}, |
|
{ |
|
"epoch": 3.3045699028750533, |
|
"grad_norm": 2.0267252922058105, |
|
"learning_rate": 1.939062802306234e-05, |
|
"loss": 0.3479, |
|
"step": 85400 |
|
}, |
|
{ |
|
"epoch": 3.308439422667647, |
|
"grad_norm": 2.4806270599365234, |
|
"learning_rate": 1.9382888983477152e-05, |
|
"loss": 0.3658, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 3.3123089424602408, |
|
"grad_norm": 2.1248300075531006, |
|
"learning_rate": 1.9375149943891965e-05, |
|
"loss": 0.3559, |
|
"step": 85600 |
|
}, |
|
{ |
|
"epoch": 3.3161784622528345, |
|
"grad_norm": 1.243067741394043, |
|
"learning_rate": 1.936741090430678e-05, |
|
"loss": 0.3412, |
|
"step": 85700 |
|
}, |
|
{ |
|
"epoch": 3.3200479820454283, |
|
"grad_norm": 1.4840171337127686, |
|
"learning_rate": 1.9359671864721588e-05, |
|
"loss": 0.3364, |
|
"step": 85800 |
|
}, |
|
{ |
|
"epoch": 3.323917501838022, |
|
"grad_norm": 1.28212571144104, |
|
"learning_rate": 1.93519328251364e-05, |
|
"loss": 0.3495, |
|
"step": 85900 |
|
}, |
|
{ |
|
"epoch": 3.327787021630616, |
|
"grad_norm": 1.3520444631576538, |
|
"learning_rate": 1.9344193785551214e-05, |
|
"loss": 0.3588, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 3.3316565414232095, |
|
"grad_norm": 1.606806755065918, |
|
"learning_rate": 1.9336454745966027e-05, |
|
"loss": 0.36, |
|
"step": 86100 |
|
}, |
|
{ |
|
"epoch": 3.3355260612158033, |
|
"grad_norm": 1.5933377742767334, |
|
"learning_rate": 1.932871570638084e-05, |
|
"loss": 0.3586, |
|
"step": 86200 |
|
}, |
|
{ |
|
"epoch": 3.3393955810083966, |
|
"grad_norm": 1.4497184753417969, |
|
"learning_rate": 1.9320976666795654e-05, |
|
"loss": 0.3383, |
|
"step": 86300 |
|
}, |
|
{ |
|
"epoch": 3.3432651008009904, |
|
"grad_norm": 1.1339248418807983, |
|
"learning_rate": 1.9313237627210463e-05, |
|
"loss": 0.3399, |
|
"step": 86400 |
|
}, |
|
{ |
|
"epoch": 3.347134620593584, |
|
"grad_norm": 1.4607455730438232, |
|
"learning_rate": 1.9305498587625276e-05, |
|
"loss": 0.3472, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 3.351004140386178, |
|
"grad_norm": 1.5800549983978271, |
|
"learning_rate": 1.929775954804009e-05, |
|
"loss": 0.3452, |
|
"step": 86600 |
|
}, |
|
{ |
|
"epoch": 3.3548736601787716, |
|
"grad_norm": 1.3513827323913574, |
|
"learning_rate": 1.9290020508454903e-05, |
|
"loss": 0.3577, |
|
"step": 86700 |
|
}, |
|
{ |
|
"epoch": 3.3587431799713654, |
|
"grad_norm": 1.5983587503433228, |
|
"learning_rate": 1.9282281468869716e-05, |
|
"loss": 0.3621, |
|
"step": 86800 |
|
}, |
|
{ |
|
"epoch": 3.362612699763959, |
|
"grad_norm": 1.893060326576233, |
|
"learning_rate": 1.927454242928453e-05, |
|
"loss": 0.3673, |
|
"step": 86900 |
|
}, |
|
{ |
|
"epoch": 3.366482219556553, |
|
"grad_norm": 0.9722900986671448, |
|
"learning_rate": 1.926680338969934e-05, |
|
"loss": 0.3315, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 3.3703517393491467, |
|
"grad_norm": 1.8283945322036743, |
|
"learning_rate": 1.9259064350114152e-05, |
|
"loss": 0.339, |
|
"step": 87100 |
|
}, |
|
{ |
|
"epoch": 3.3742212591417404, |
|
"grad_norm": 1.7708581686019897, |
|
"learning_rate": 1.9251325310528965e-05, |
|
"loss": 0.3597, |
|
"step": 87200 |
|
}, |
|
{ |
|
"epoch": 3.378090778934334, |
|
"grad_norm": 2.0267698764801025, |
|
"learning_rate": 1.9243586270943778e-05, |
|
"loss": 0.3365, |
|
"step": 87300 |
|
}, |
|
{ |
|
"epoch": 3.381960298726928, |
|
"grad_norm": 1.3400310277938843, |
|
"learning_rate": 1.923584723135859e-05, |
|
"loss": 0.3626, |
|
"step": 87400 |
|
}, |
|
{ |
|
"epoch": 3.3858298185195217, |
|
"grad_norm": 2.713268280029297, |
|
"learning_rate": 1.9228108191773404e-05, |
|
"loss": 0.3576, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 3.3896993383121155, |
|
"grad_norm": 1.648658037185669, |
|
"learning_rate": 1.9220369152188214e-05, |
|
"loss": 0.3696, |
|
"step": 87600 |
|
}, |
|
{ |
|
"epoch": 3.393568858104709, |
|
"grad_norm": 1.1035487651824951, |
|
"learning_rate": 1.9212630112603027e-05, |
|
"loss": 0.3622, |
|
"step": 87700 |
|
}, |
|
{ |
|
"epoch": 3.397438377897303, |
|
"grad_norm": 1.4534286260604858, |
|
"learning_rate": 1.920489107301784e-05, |
|
"loss": 0.3718, |
|
"step": 87800 |
|
}, |
|
{ |
|
"epoch": 3.4013078976898967, |
|
"grad_norm": 1.3734116554260254, |
|
"learning_rate": 1.9197152033432653e-05, |
|
"loss": 0.3496, |
|
"step": 87900 |
|
}, |
|
{ |
|
"epoch": 3.4051774174824905, |
|
"grad_norm": 1.6333812475204468, |
|
"learning_rate": 1.9189412993847466e-05, |
|
"loss": 0.3555, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 3.4090469372750842, |
|
"grad_norm": 1.196081519126892, |
|
"learning_rate": 1.9181673954262276e-05, |
|
"loss": 0.3549, |
|
"step": 88100 |
|
}, |
|
{ |
|
"epoch": 3.412916457067678, |
|
"grad_norm": 1.954453468322754, |
|
"learning_rate": 1.917393491467709e-05, |
|
"loss": 0.3231, |
|
"step": 88200 |
|
}, |
|
{ |
|
"epoch": 3.4167859768602717, |
|
"grad_norm": 0.7807307839393616, |
|
"learning_rate": 1.9166195875091902e-05, |
|
"loss": 0.3486, |
|
"step": 88300 |
|
}, |
|
{ |
|
"epoch": 3.4206554966528655, |
|
"grad_norm": 1.2698251008987427, |
|
"learning_rate": 1.9158456835506715e-05, |
|
"loss": 0.3351, |
|
"step": 88400 |
|
}, |
|
{ |
|
"epoch": 3.4245250164454593, |
|
"grad_norm": 1.6529748439788818, |
|
"learning_rate": 1.915071779592153e-05, |
|
"loss": 0.3351, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 3.428394536238053, |
|
"grad_norm": 3.4594674110412598, |
|
"learning_rate": 1.914297875633634e-05, |
|
"loss": 0.3504, |
|
"step": 88600 |
|
}, |
|
{ |
|
"epoch": 3.4322640560306468, |
|
"grad_norm": 3.723195791244507, |
|
"learning_rate": 1.913523971675115e-05, |
|
"loss": 0.3472, |
|
"step": 88700 |
|
}, |
|
{ |
|
"epoch": 3.4361335758232405, |
|
"grad_norm": 1.7539480924606323, |
|
"learning_rate": 1.9127500677165964e-05, |
|
"loss": 0.3386, |
|
"step": 88800 |
|
}, |
|
{ |
|
"epoch": 3.4400030956158343, |
|
"grad_norm": 2.1020853519439697, |
|
"learning_rate": 1.9119761637580777e-05, |
|
"loss": 0.3599, |
|
"step": 88900 |
|
}, |
|
{ |
|
"epoch": 3.443872615408428, |
|
"grad_norm": 1.5043954849243164, |
|
"learning_rate": 1.911202259799559e-05, |
|
"loss": 0.3386, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 3.447742135201022, |
|
"grad_norm": 0.9868506193161011, |
|
"learning_rate": 1.9104283558410404e-05, |
|
"loss": 0.3388, |
|
"step": 89100 |
|
}, |
|
{ |
|
"epoch": 3.451611654993615, |
|
"grad_norm": 1.3694320917129517, |
|
"learning_rate": 1.9096544518825217e-05, |
|
"loss": 0.3185, |
|
"step": 89200 |
|
}, |
|
{ |
|
"epoch": 3.455481174786209, |
|
"grad_norm": 1.4770699739456177, |
|
"learning_rate": 1.9088805479240026e-05, |
|
"loss": 0.3386, |
|
"step": 89300 |
|
}, |
|
{ |
|
"epoch": 3.4593506945788026, |
|
"grad_norm": 3.487114191055298, |
|
"learning_rate": 1.908106643965484e-05, |
|
"loss": 0.3497, |
|
"step": 89400 |
|
}, |
|
{ |
|
"epoch": 3.4632202143713964, |
|
"grad_norm": 2.1382334232330322, |
|
"learning_rate": 1.9073327400069653e-05, |
|
"loss": 0.3469, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 3.46708973416399, |
|
"grad_norm": 3.232499361038208, |
|
"learning_rate": 1.9065588360484466e-05, |
|
"loss": 0.3323, |
|
"step": 89600 |
|
}, |
|
{ |
|
"epoch": 3.470959253956584, |
|
"grad_norm": 1.2111986875534058, |
|
"learning_rate": 1.905784932089928e-05, |
|
"loss": 0.3465, |
|
"step": 89700 |
|
}, |
|
{ |
|
"epoch": 3.4748287737491776, |
|
"grad_norm": 1.2314847707748413, |
|
"learning_rate": 1.9050110281314092e-05, |
|
"loss": 0.3497, |
|
"step": 89800 |
|
}, |
|
{ |
|
"epoch": 3.4786982935417714, |
|
"grad_norm": 3.4358456134796143, |
|
"learning_rate": 1.90423712417289e-05, |
|
"loss": 0.341, |
|
"step": 89900 |
|
}, |
|
{ |
|
"epoch": 3.482567813334365, |
|
"grad_norm": 1.8911181688308716, |
|
"learning_rate": 1.9034632202143715e-05, |
|
"loss": 0.3588, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 3.486437333126959, |
|
"grad_norm": 1.6309691667556763, |
|
"learning_rate": 1.9026893162558528e-05, |
|
"loss": 0.3457, |
|
"step": 90100 |
|
}, |
|
{ |
|
"epoch": 3.4903068529195527, |
|
"grad_norm": 1.5026049613952637, |
|
"learning_rate": 1.901915412297334e-05, |
|
"loss": 0.3263, |
|
"step": 90200 |
|
}, |
|
{ |
|
"epoch": 3.4941763727121464, |
|
"grad_norm": 1.3562653064727783, |
|
"learning_rate": 1.9011415083388154e-05, |
|
"loss": 0.3566, |
|
"step": 90300 |
|
}, |
|
{ |
|
"epoch": 3.49804589250474, |
|
"grad_norm": 2.2276010513305664, |
|
"learning_rate": 1.9003676043802964e-05, |
|
"loss": 0.3509, |
|
"step": 90400 |
|
}, |
|
{ |
|
"epoch": 3.4998645668072594, |
|
"eval_loss": 0.24299356341362, |
|
"eval_runtime": 73.0718, |
|
"eval_samples_per_second": 28.588, |
|
"eval_steps_per_second": 3.586, |
|
"step": 90447 |
|
}, |
|
{ |
|
"epoch": 3.501915412297334, |
|
"grad_norm": 1.8114732503890991, |
|
"learning_rate": 1.9995898309019853e-05, |
|
"loss": 0.3384, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 3.5057849320899277, |
|
"grad_norm": 1.3792935609817505, |
|
"learning_rate": 1.9988159269434662e-05, |
|
"loss": 0.3464, |
|
"step": 90600 |
|
}, |
|
{ |
|
"epoch": 3.5096544518825215, |
|
"grad_norm": 2.4080448150634766, |
|
"learning_rate": 1.998042022984948e-05, |
|
"loss": 0.3617, |
|
"step": 90700 |
|
}, |
|
{ |
|
"epoch": 3.513523971675115, |
|
"grad_norm": 2.0266382694244385, |
|
"learning_rate": 1.997268119026429e-05, |
|
"loss": 0.3667, |
|
"step": 90800 |
|
}, |
|
{ |
|
"epoch": 3.517393491467709, |
|
"grad_norm": 4.2909440994262695, |
|
"learning_rate": 1.9964942150679102e-05, |
|
"loss": 0.3303, |
|
"step": 90900 |
|
}, |
|
{ |
|
"epoch": 3.5212630112603027, |
|
"grad_norm": 1.7127686738967896, |
|
"learning_rate": 1.9957203111093915e-05, |
|
"loss": 0.35, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 3.5251325310528965, |
|
"grad_norm": 1.5974979400634766, |
|
"learning_rate": 1.9949464071508728e-05, |
|
"loss": 0.3499, |
|
"step": 91100 |
|
}, |
|
{ |
|
"epoch": 3.5290020508454902, |
|
"grad_norm": 3.3751542568206787, |
|
"learning_rate": 1.994172503192354e-05, |
|
"loss": 0.3329, |
|
"step": 91200 |
|
}, |
|
{ |
|
"epoch": 3.5328715706380835, |
|
"grad_norm": 1.623238444328308, |
|
"learning_rate": 1.993398599233835e-05, |
|
"loss": 0.3496, |
|
"step": 91300 |
|
}, |
|
{ |
|
"epoch": 3.5367410904306773, |
|
"grad_norm": 1.5852680206298828, |
|
"learning_rate": 1.9926246952753164e-05, |
|
"loss": 0.3524, |
|
"step": 91400 |
|
}, |
|
{ |
|
"epoch": 3.540610610223271, |
|
"grad_norm": 1.1841716766357422, |
|
"learning_rate": 1.9918507913167977e-05, |
|
"loss": 0.3471, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 3.544480130015865, |
|
"grad_norm": 1.724593162536621, |
|
"learning_rate": 1.991076887358279e-05, |
|
"loss": 0.3323, |
|
"step": 91600 |
|
}, |
|
{ |
|
"epoch": 3.5483496498084586, |
|
"grad_norm": 2.057185173034668, |
|
"learning_rate": 1.9903029833997603e-05, |
|
"loss": 0.3584, |
|
"step": 91700 |
|
}, |
|
{ |
|
"epoch": 3.5522191696010523, |
|
"grad_norm": 1.5378031730651855, |
|
"learning_rate": 1.9895290794412413e-05, |
|
"loss": 0.3505, |
|
"step": 91800 |
|
}, |
|
{ |
|
"epoch": 3.556088689393646, |
|
"grad_norm": 1.5235657691955566, |
|
"learning_rate": 1.988755175482723e-05, |
|
"loss": 0.3278, |
|
"step": 91900 |
|
}, |
|
{ |
|
"epoch": 3.55995820918624, |
|
"grad_norm": 1.6705044507980347, |
|
"learning_rate": 1.987981271524204e-05, |
|
"loss": 0.3475, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 3.5638277289788336, |
|
"grad_norm": 1.6356074810028076, |
|
"learning_rate": 1.9872073675656852e-05, |
|
"loss": 0.3426, |
|
"step": 92100 |
|
}, |
|
{ |
|
"epoch": 3.5676972487714274, |
|
"grad_norm": 1.408981204032898, |
|
"learning_rate": 1.9864334636071665e-05, |
|
"loss": 0.3311, |
|
"step": 92200 |
|
}, |
|
{ |
|
"epoch": 3.571566768564021, |
|
"grad_norm": 8.708040237426758, |
|
"learning_rate": 1.985659559648648e-05, |
|
"loss": 0.3257, |
|
"step": 92300 |
|
}, |
|
{ |
|
"epoch": 3.575436288356615, |
|
"grad_norm": 2.8553311824798584, |
|
"learning_rate": 1.984885655690129e-05, |
|
"loss": 0.3452, |
|
"step": 92400 |
|
}, |
|
{ |
|
"epoch": 3.5793058081492086, |
|
"grad_norm": 2.367499589920044, |
|
"learning_rate": 1.98411175173161e-05, |
|
"loss": 0.3456, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 3.5831753279418024, |
|
"grad_norm": 1.9503910541534424, |
|
"learning_rate": 1.9833378477730914e-05, |
|
"loss": 0.3471, |
|
"step": 92600 |
|
}, |
|
{ |
|
"epoch": 3.587044847734396, |
|
"grad_norm": 1.4180583953857422, |
|
"learning_rate": 1.9825639438145727e-05, |
|
"loss": 0.3421, |
|
"step": 92700 |
|
}, |
|
{ |
|
"epoch": 3.59091436752699, |
|
"grad_norm": 0.9737741351127625, |
|
"learning_rate": 1.981790039856054e-05, |
|
"loss": 0.3429, |
|
"step": 92800 |
|
}, |
|
{ |
|
"epoch": 3.5947838873195836, |
|
"grad_norm": 1.2342348098754883, |
|
"learning_rate": 1.9810161358975354e-05, |
|
"loss": 0.3474, |
|
"step": 92900 |
|
}, |
|
{ |
|
"epoch": 3.5986534071121774, |
|
"grad_norm": 3.7432026863098145, |
|
"learning_rate": 1.9802422319390163e-05, |
|
"loss": 0.3437, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 3.602522926904771, |
|
"grad_norm": 1.9314531087875366, |
|
"learning_rate": 1.9794683279804976e-05, |
|
"loss": 0.3408, |
|
"step": 93100 |
|
}, |
|
{ |
|
"epoch": 3.606392446697365, |
|
"grad_norm": 4.156320571899414, |
|
"learning_rate": 1.978694424021979e-05, |
|
"loss": 0.3414, |
|
"step": 93200 |
|
}, |
|
{ |
|
"epoch": 3.6102619664899587, |
|
"grad_norm": 1.2768079042434692, |
|
"learning_rate": 1.9779205200634603e-05, |
|
"loss": 0.3461, |
|
"step": 93300 |
|
}, |
|
{ |
|
"epoch": 3.6141314862825524, |
|
"grad_norm": 2.0528101921081543, |
|
"learning_rate": 1.9771466161049416e-05, |
|
"loss": 0.3223, |
|
"step": 93400 |
|
}, |
|
{ |
|
"epoch": 3.618001006075146, |
|
"grad_norm": 1.143532633781433, |
|
"learning_rate": 1.976372712146423e-05, |
|
"loss": 0.3473, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 3.62187052586774, |
|
"grad_norm": 1.0867356061935425, |
|
"learning_rate": 1.9755988081879042e-05, |
|
"loss": 0.3326, |
|
"step": 93600 |
|
}, |
|
{ |
|
"epoch": 3.6257400456603337, |
|
"grad_norm": 2.3322672843933105, |
|
"learning_rate": 1.9748249042293852e-05, |
|
"loss": 0.3331, |
|
"step": 93700 |
|
}, |
|
{ |
|
"epoch": 3.6296095654529275, |
|
"grad_norm": 1.39437997341156, |
|
"learning_rate": 1.9740510002708665e-05, |
|
"loss": 0.334, |
|
"step": 93800 |
|
}, |
|
{ |
|
"epoch": 3.633479085245521, |
|
"grad_norm": 1.3758375644683838, |
|
"learning_rate": 1.9732770963123478e-05, |
|
"loss": 0.3387, |
|
"step": 93900 |
|
}, |
|
{ |
|
"epoch": 3.637348605038115, |
|
"grad_norm": 1.9272472858428955, |
|
"learning_rate": 1.972503192353829e-05, |
|
"loss": 0.3288, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 3.6412181248307087, |
|
"grad_norm": 1.3517364263534546, |
|
"learning_rate": 1.9717292883953104e-05, |
|
"loss": 0.333, |
|
"step": 94100 |
|
}, |
|
{ |
|
"epoch": 3.6450876446233025, |
|
"grad_norm": 1.0485949516296387, |
|
"learning_rate": 1.9709553844367914e-05, |
|
"loss": 0.3385, |
|
"step": 94200 |
|
}, |
|
{ |
|
"epoch": 3.6489571644158962, |
|
"grad_norm": 1.0853439569473267, |
|
"learning_rate": 1.9701814804782727e-05, |
|
"loss": 0.3322, |
|
"step": 94300 |
|
}, |
|
{ |
|
"epoch": 3.65282668420849, |
|
"grad_norm": 1.4697808027267456, |
|
"learning_rate": 1.969407576519754e-05, |
|
"loss": 0.3259, |
|
"step": 94400 |
|
}, |
|
{ |
|
"epoch": 3.6566962040010833, |
|
"grad_norm": 1.0822653770446777, |
|
"learning_rate": 1.9686336725612353e-05, |
|
"loss": 0.3466, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 3.660565723793677, |
|
"grad_norm": 1.5661627054214478, |
|
"learning_rate": 1.9678597686027166e-05, |
|
"loss": 0.31, |
|
"step": 94600 |
|
}, |
|
{ |
|
"epoch": 3.664435243586271, |
|
"grad_norm": 1.7367948293685913, |
|
"learning_rate": 1.9670858646441976e-05, |
|
"loss": 0.336, |
|
"step": 94700 |
|
}, |
|
{ |
|
"epoch": 3.6683047633788646, |
|
"grad_norm": 1.3050540685653687, |
|
"learning_rate": 1.9663119606856792e-05, |
|
"loss": 0.3415, |
|
"step": 94800 |
|
}, |
|
{ |
|
"epoch": 3.6721742831714583, |
|
"grad_norm": 1.3506726026535034, |
|
"learning_rate": 1.9655380567271602e-05, |
|
"loss": 0.3496, |
|
"step": 94900 |
|
}, |
|
{ |
|
"epoch": 3.676043802964052, |
|
"grad_norm": 1.4908133745193481, |
|
"learning_rate": 1.9647641527686415e-05, |
|
"loss": 0.3522, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 3.679913322756646, |
|
"grad_norm": 1.5835528373718262, |
|
"learning_rate": 1.963990248810123e-05, |
|
"loss": 0.3389, |
|
"step": 95100 |
|
}, |
|
{ |
|
"epoch": 3.6837828425492396, |
|
"grad_norm": 2.2646801471710205, |
|
"learning_rate": 1.963216344851604e-05, |
|
"loss": 0.3313, |
|
"step": 95200 |
|
}, |
|
{ |
|
"epoch": 3.6876523623418334, |
|
"grad_norm": 2.0749449729919434, |
|
"learning_rate": 1.9624424408930855e-05, |
|
"loss": 0.3465, |
|
"step": 95300 |
|
}, |
|
{ |
|
"epoch": 3.691521882134427, |
|
"grad_norm": 1.151435136795044, |
|
"learning_rate": 1.9616685369345664e-05, |
|
"loss": 0.328, |
|
"step": 95400 |
|
}, |
|
{ |
|
"epoch": 3.695391401927021, |
|
"grad_norm": 1.1644638776779175, |
|
"learning_rate": 1.9608946329760477e-05, |
|
"loss": 0.3415, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 3.6992609217196146, |
|
"grad_norm": 1.7427713871002197, |
|
"learning_rate": 1.960120729017529e-05, |
|
"loss": 0.3278, |
|
"step": 95600 |
|
}, |
|
{ |
|
"epoch": 3.7031304415122084, |
|
"grad_norm": 1.5325894355773926, |
|
"learning_rate": 1.9593468250590104e-05, |
|
"loss": 0.3567, |
|
"step": 95700 |
|
}, |
|
{ |
|
"epoch": 3.706999961304802, |
|
"grad_norm": 0.8409464359283447, |
|
"learning_rate": 1.9585729211004917e-05, |
|
"loss": 0.332, |
|
"step": 95800 |
|
}, |
|
{ |
|
"epoch": 3.710869481097396, |
|
"grad_norm": 1.7328728437423706, |
|
"learning_rate": 1.9577990171419726e-05, |
|
"loss": 0.3382, |
|
"step": 95900 |
|
}, |
|
{ |
|
"epoch": 3.7147390008899897, |
|
"grad_norm": 1.5052417516708374, |
|
"learning_rate": 1.957025113183454e-05, |
|
"loss": 0.3389, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 3.7186085206825834, |
|
"grad_norm": 1.1802027225494385, |
|
"learning_rate": 1.9562512092249353e-05, |
|
"loss": 0.3377, |
|
"step": 96100 |
|
}, |
|
{ |
|
"epoch": 3.722478040475177, |
|
"grad_norm": 1.825426459312439, |
|
"learning_rate": 1.9554773052664166e-05, |
|
"loss": 0.3391, |
|
"step": 96200 |
|
}, |
|
{ |
|
"epoch": 3.726347560267771, |
|
"grad_norm": 1.3100457191467285, |
|
"learning_rate": 1.954703401307898e-05, |
|
"loss": 0.3418, |
|
"step": 96300 |
|
}, |
|
{ |
|
"epoch": 3.7302170800603642, |
|
"grad_norm": 1.3283213376998901, |
|
"learning_rate": 1.9539294973493792e-05, |
|
"loss": 0.3419, |
|
"step": 96400 |
|
}, |
|
{ |
|
"epoch": 3.734086599852958, |
|
"grad_norm": 1.5435948371887207, |
|
"learning_rate": 1.9531555933908605e-05, |
|
"loss": 0.3366, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 3.7379561196455517, |
|
"grad_norm": 1.3406691551208496, |
|
"learning_rate": 1.9523816894323415e-05, |
|
"loss": 0.3399, |
|
"step": 96600 |
|
}, |
|
{ |
|
"epoch": 3.7418256394381455, |
|
"grad_norm": 1.3712650537490845, |
|
"learning_rate": 1.9516077854738228e-05, |
|
"loss": 0.3419, |
|
"step": 96700 |
|
}, |
|
{ |
|
"epoch": 3.7456951592307393, |
|
"grad_norm": 1.0608057975769043, |
|
"learning_rate": 1.950833881515304e-05, |
|
"loss": 0.3422, |
|
"step": 96800 |
|
}, |
|
{ |
|
"epoch": 3.749564679023333, |
|
"grad_norm": 2.5015125274658203, |
|
"learning_rate": 1.9500599775567854e-05, |
|
"loss": 0.3318, |
|
"step": 96900 |
|
}, |
|
{ |
|
"epoch": 3.7534341988159268, |
|
"grad_norm": 1.6725361347198486, |
|
"learning_rate": 1.9492860735982667e-05, |
|
"loss": 0.3186, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 3.7573037186085205, |
|
"grad_norm": 0.9464316964149475, |
|
"learning_rate": 1.9485121696397477e-05, |
|
"loss": 0.3397, |
|
"step": 97100 |
|
}, |
|
{ |
|
"epoch": 3.7611732384011143, |
|
"grad_norm": 1.285186529159546, |
|
"learning_rate": 1.947738265681229e-05, |
|
"loss": 0.3324, |
|
"step": 97200 |
|
}, |
|
{ |
|
"epoch": 3.765042758193708, |
|
"grad_norm": 1.267645001411438, |
|
"learning_rate": 1.9469643617227103e-05, |
|
"loss": 0.3242, |
|
"step": 97300 |
|
}, |
|
{ |
|
"epoch": 3.768912277986302, |
|
"grad_norm": 1.1808134317398071, |
|
"learning_rate": 1.9461904577641916e-05, |
|
"loss": 0.3351, |
|
"step": 97400 |
|
}, |
|
{ |
|
"epoch": 3.7727817977788956, |
|
"grad_norm": 1.590160846710205, |
|
"learning_rate": 1.945416553805673e-05, |
|
"loss": 0.3276, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 3.7766513175714893, |
|
"grad_norm": 1.5932397842407227, |
|
"learning_rate": 1.9446426498471542e-05, |
|
"loss": 0.3622, |
|
"step": 97600 |
|
}, |
|
{ |
|
"epoch": 3.780520837364083, |
|
"grad_norm": 1.2350918054580688, |
|
"learning_rate": 1.9438687458886356e-05, |
|
"loss": 0.3193, |
|
"step": 97700 |
|
}, |
|
{ |
|
"epoch": 3.784390357156677, |
|
"grad_norm": 1.921157956123352, |
|
"learning_rate": 1.9430948419301165e-05, |
|
"loss": 0.3374, |
|
"step": 97800 |
|
}, |
|
{ |
|
"epoch": 3.7882598769492706, |
|
"grad_norm": 1.235912799835205, |
|
"learning_rate": 1.942320937971598e-05, |
|
"loss": 0.3467, |
|
"step": 97900 |
|
}, |
|
{ |
|
"epoch": 3.7921293967418643, |
|
"grad_norm": 1.6335248947143555, |
|
"learning_rate": 1.941547034013079e-05, |
|
"loss": 0.3454, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 3.795998916534458, |
|
"grad_norm": 1.8079530000686646, |
|
"learning_rate": 1.9407731300545605e-05, |
|
"loss": 0.3497, |
|
"step": 98100 |
|
}, |
|
{ |
|
"epoch": 3.799868436327052, |
|
"grad_norm": 2.2827401161193848, |
|
"learning_rate": 1.9399992260960418e-05, |
|
"loss": 0.3703, |
|
"step": 98200 |
|
}, |
|
{ |
|
"epoch": 3.8037379561196456, |
|
"grad_norm": 1.0421605110168457, |
|
"learning_rate": 1.9392253221375227e-05, |
|
"loss": 0.341, |
|
"step": 98300 |
|
}, |
|
{ |
|
"epoch": 3.8076074759122394, |
|
"grad_norm": 2.6579549312591553, |
|
"learning_rate": 1.938451418179004e-05, |
|
"loss": 0.3506, |
|
"step": 98400 |
|
}, |
|
{ |
|
"epoch": 3.811476995704833, |
|
"grad_norm": 0.8981249332427979, |
|
"learning_rate": 1.9376775142204854e-05, |
|
"loss": 0.317, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 3.815346515497427, |
|
"grad_norm": 1.5071488618850708, |
|
"learning_rate": 1.9369036102619667e-05, |
|
"loss": 0.3481, |
|
"step": 98600 |
|
}, |
|
{ |
|
"epoch": 3.8192160352900206, |
|
"grad_norm": 2.4668521881103516, |
|
"learning_rate": 1.936129706303448e-05, |
|
"loss": 0.3189, |
|
"step": 98700 |
|
}, |
|
{ |
|
"epoch": 3.8230855550826144, |
|
"grad_norm": 2.102757692337036, |
|
"learning_rate": 1.9353558023449293e-05, |
|
"loss": 0.3246, |
|
"step": 98800 |
|
}, |
|
{ |
|
"epoch": 3.826955074875208, |
|
"grad_norm": 1.3990046977996826, |
|
"learning_rate": 1.9345818983864103e-05, |
|
"loss": 0.3284, |
|
"step": 98900 |
|
}, |
|
{ |
|
"epoch": 3.830824594667802, |
|
"grad_norm": 2.944544792175293, |
|
"learning_rate": 1.9338079944278916e-05, |
|
"loss": 0.3242, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 3.8346941144603957, |
|
"grad_norm": 1.9801486730575562, |
|
"learning_rate": 1.933034090469373e-05, |
|
"loss": 0.3319, |
|
"step": 99100 |
|
}, |
|
{ |
|
"epoch": 3.8385636342529894, |
|
"grad_norm": 1.5694576501846313, |
|
"learning_rate": 1.9322601865108542e-05, |
|
"loss": 0.3292, |
|
"step": 99200 |
|
}, |
|
{ |
|
"epoch": 3.842433154045583, |
|
"grad_norm": 1.6066193580627441, |
|
"learning_rate": 1.9314862825523355e-05, |
|
"loss": 0.3304, |
|
"step": 99300 |
|
}, |
|
{ |
|
"epoch": 3.846302673838177, |
|
"grad_norm": 1.478926181793213, |
|
"learning_rate": 1.9307123785938168e-05, |
|
"loss": 0.3208, |
|
"step": 99400 |
|
}, |
|
{ |
|
"epoch": 3.8501721936307707, |
|
"grad_norm": 1.1129310131072998, |
|
"learning_rate": 1.9299384746352978e-05, |
|
"loss": 0.3373, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 3.8540417134233644, |
|
"grad_norm": 3.5530917644500732, |
|
"learning_rate": 1.929164570676779e-05, |
|
"loss": 0.3252, |
|
"step": 99600 |
|
}, |
|
{ |
|
"epoch": 3.8579112332159577, |
|
"grad_norm": 0.8966418504714966, |
|
"learning_rate": 1.9283906667182604e-05, |
|
"loss": 0.3244, |
|
"step": 99700 |
|
}, |
|
{ |
|
"epoch": 3.8617807530085515, |
|
"grad_norm": 2.311257839202881, |
|
"learning_rate": 1.9276167627597417e-05, |
|
"loss": 0.3115, |
|
"step": 99800 |
|
}, |
|
{ |
|
"epoch": 3.8656502728011453, |
|
"grad_norm": 2.062633752822876, |
|
"learning_rate": 1.926842858801223e-05, |
|
"loss": 0.3325, |
|
"step": 99900 |
|
}, |
|
{ |
|
"epoch": 3.869519792593739, |
|
"grad_norm": 1.8347896337509155, |
|
"learning_rate": 1.9260689548427043e-05, |
|
"loss": 0.3271, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 3.8733893123863328, |
|
"grad_norm": 1.5175038576126099, |
|
"learning_rate": 1.9252950508841853e-05, |
|
"loss": 0.3285, |
|
"step": 100100 |
|
}, |
|
{ |
|
"epoch": 3.8772588321789265, |
|
"grad_norm": 1.3393527269363403, |
|
"learning_rate": 1.9245211469256666e-05, |
|
"loss": 0.3409, |
|
"step": 100200 |
|
}, |
|
{ |
|
"epoch": 3.8811283519715203, |
|
"grad_norm": 1.7215604782104492, |
|
"learning_rate": 1.923747242967148e-05, |
|
"loss": 0.3321, |
|
"step": 100300 |
|
}, |
|
{ |
|
"epoch": 3.884997871764114, |
|
"grad_norm": 1.3278648853302002, |
|
"learning_rate": 1.9229733390086292e-05, |
|
"loss": 0.3166, |
|
"step": 100400 |
|
}, |
|
{ |
|
"epoch": 3.888867391556708, |
|
"grad_norm": 1.59735107421875, |
|
"learning_rate": 1.9221994350501106e-05, |
|
"loss": 0.3215, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 3.8927369113493016, |
|
"grad_norm": 1.3256441354751587, |
|
"learning_rate": 1.9214255310915915e-05, |
|
"loss": 0.3456, |
|
"step": 100600 |
|
}, |
|
{ |
|
"epoch": 3.8966064311418953, |
|
"grad_norm": 1.9966683387756348, |
|
"learning_rate": 1.920651627133073e-05, |
|
"loss": 0.3012, |
|
"step": 100700 |
|
}, |
|
{ |
|
"epoch": 3.900475950934489, |
|
"grad_norm": 1.6612262725830078, |
|
"learning_rate": 1.919877723174554e-05, |
|
"loss": 0.3588, |
|
"step": 100800 |
|
}, |
|
{ |
|
"epoch": 3.904345470727083, |
|
"grad_norm": 3.342247486114502, |
|
"learning_rate": 1.9191038192160355e-05, |
|
"loss": 0.3205, |
|
"step": 100900 |
|
}, |
|
{ |
|
"epoch": 3.9082149905196766, |
|
"grad_norm": 1.1689525842666626, |
|
"learning_rate": 1.9183299152575168e-05, |
|
"loss": 0.325, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 3.9120845103122703, |
|
"grad_norm": 1.5586668252944946, |
|
"learning_rate": 1.917556011298998e-05, |
|
"loss": 0.3091, |
|
"step": 101100 |
|
}, |
|
{ |
|
"epoch": 3.915954030104864, |
|
"grad_norm": 1.0798732042312622, |
|
"learning_rate": 1.916782107340479e-05, |
|
"loss": 0.3229, |
|
"step": 101200 |
|
}, |
|
{ |
|
"epoch": 3.919823549897458, |
|
"grad_norm": 1.0493509769439697, |
|
"learning_rate": 1.9160082033819604e-05, |
|
"loss": 0.3039, |
|
"step": 101300 |
|
}, |
|
{ |
|
"epoch": 3.9236930696900516, |
|
"grad_norm": 0.7543585896492004, |
|
"learning_rate": 1.9152342994234417e-05, |
|
"loss": 0.3191, |
|
"step": 101400 |
|
}, |
|
{ |
|
"epoch": 3.9275625894826454, |
|
"grad_norm": 1.2396085262298584, |
|
"learning_rate": 1.914460395464923e-05, |
|
"loss": 0.3338, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 3.9314321092752387, |
|
"grad_norm": 1.0943130254745483, |
|
"learning_rate": 1.9136864915064043e-05, |
|
"loss": 0.3151, |
|
"step": 101600 |
|
}, |
|
{ |
|
"epoch": 3.9353016290678324, |
|
"grad_norm": 1.3704535961151123, |
|
"learning_rate": 1.9129125875478856e-05, |
|
"loss": 0.3238, |
|
"step": 101700 |
|
}, |
|
{ |
|
"epoch": 3.939171148860426, |
|
"grad_norm": 1.32877516746521, |
|
"learning_rate": 1.9121386835893666e-05, |
|
"loss": 0.3359, |
|
"step": 101800 |
|
}, |
|
{ |
|
"epoch": 3.94304066865302, |
|
"grad_norm": 1.6287816762924194, |
|
"learning_rate": 1.911364779630848e-05, |
|
"loss": 0.333, |
|
"step": 101900 |
|
}, |
|
{ |
|
"epoch": 3.9469101884456137, |
|
"grad_norm": 1.7541491985321045, |
|
"learning_rate": 1.9105908756723292e-05, |
|
"loss": 0.3174, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 3.9507797082382075, |
|
"grad_norm": 1.1953548192977905, |
|
"learning_rate": 1.9098169717138105e-05, |
|
"loss": 0.3445, |
|
"step": 102100 |
|
}, |
|
{ |
|
"epoch": 3.954649228030801, |
|
"grad_norm": 1.7301913499832153, |
|
"learning_rate": 1.9090430677552918e-05, |
|
"loss": 0.3225, |
|
"step": 102200 |
|
}, |
|
{ |
|
"epoch": 3.958518747823395, |
|
"grad_norm": 1.687058448791504, |
|
"learning_rate": 1.908269163796773e-05, |
|
"loss": 0.339, |
|
"step": 102300 |
|
}, |
|
{ |
|
"epoch": 3.9623882676159887, |
|
"grad_norm": 1.3156592845916748, |
|
"learning_rate": 1.907495259838254e-05, |
|
"loss": 0.3227, |
|
"step": 102400 |
|
}, |
|
{ |
|
"epoch": 3.9662577874085825, |
|
"grad_norm": 1.7830286026000977, |
|
"learning_rate": 1.9067213558797354e-05, |
|
"loss": 0.3241, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 3.9701273072011762, |
|
"grad_norm": 1.549320936203003, |
|
"learning_rate": 1.9059474519212167e-05, |
|
"loss": 0.3324, |
|
"step": 102600 |
|
}, |
|
{ |
|
"epoch": 3.97399682699377, |
|
"grad_norm": 1.6894500255584717, |
|
"learning_rate": 1.905173547962698e-05, |
|
"loss": 0.3295, |
|
"step": 102700 |
|
}, |
|
{ |
|
"epoch": 3.9778663467863637, |
|
"grad_norm": 4.200815677642822, |
|
"learning_rate": 1.9043996440041793e-05, |
|
"loss": 0.3266, |
|
"step": 102800 |
|
}, |
|
{ |
|
"epoch": 3.9817358665789575, |
|
"grad_norm": 1.9882376194000244, |
|
"learning_rate": 1.9036257400456606e-05, |
|
"loss": 0.3547, |
|
"step": 102900 |
|
}, |
|
{ |
|
"epoch": 3.9856053863715513, |
|
"grad_norm": 1.1047308444976807, |
|
"learning_rate": 1.9028518360871416e-05, |
|
"loss": 0.3249, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 3.989474906164145, |
|
"grad_norm": 1.6856424808502197, |
|
"learning_rate": 1.902077932128623e-05, |
|
"loss": 0.3001, |
|
"step": 103100 |
|
}, |
|
{ |
|
"epoch": 3.9933444259567388, |
|
"grad_norm": 1.4230420589447021, |
|
"learning_rate": 1.9013040281701042e-05, |
|
"loss": 0.3316, |
|
"step": 103200 |
|
}, |
|
{ |
|
"epoch": 3.9972139457493325, |
|
"grad_norm": 1.5009479522705078, |
|
"learning_rate": 1.9005301242115855e-05, |
|
"loss": 0.318, |
|
"step": 103300 |
|
}, |
|
{ |
|
"epoch": 3.9998452192082965, |
|
"eval_loss": 0.23254971206188202, |
|
"eval_runtime": 69.2697, |
|
"eval_samples_per_second": 30.157, |
|
"eval_steps_per_second": 3.782, |
|
"step": 103368 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 258430, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 12921, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.1862160593846272e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|