|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.10006171569635877, |
|
"eval_steps": 500, |
|
"global_step": 1216, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 8.228759514503188e-05, |
|
"grad_norm": 9.532528095057138, |
|
"learning_rate": 5.479452054794521e-08, |
|
"loss": 0.7901, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00016457519029006376, |
|
"grad_norm": 30.026945671831577, |
|
"learning_rate": 1.0958904109589042e-07, |
|
"loss": 2.1253, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00024686278543509563, |
|
"grad_norm": 8.88519815829157, |
|
"learning_rate": 1.6438356164383561e-07, |
|
"loss": 0.7715, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00032915038058012753, |
|
"grad_norm": 29.197616305414858, |
|
"learning_rate": 2.1917808219178084e-07, |
|
"loss": 2.1284, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0004114379757251594, |
|
"grad_norm": 29.892559190290434, |
|
"learning_rate": 2.73972602739726e-07, |
|
"loss": 2.0685, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0004937255708701913, |
|
"grad_norm": 10.567782598278942, |
|
"learning_rate": 3.2876712328767123e-07, |
|
"loss": 0.8122, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0005760131660152232, |
|
"grad_norm": 28.912763215741734, |
|
"learning_rate": 3.835616438356165e-07, |
|
"loss": 2.1056, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0006583007611602551, |
|
"grad_norm": 29.51664131482477, |
|
"learning_rate": 4.383561643835617e-07, |
|
"loss": 2.0418, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.000740588356305287, |
|
"grad_norm": 28.30266632286417, |
|
"learning_rate": 4.931506849315068e-07, |
|
"loss": 2.0237, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0008228759514503189, |
|
"grad_norm": 27.46875103243188, |
|
"learning_rate": 5.47945205479452e-07, |
|
"loss": 1.9595, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0009051635465953507, |
|
"grad_norm": 24.865752165641698, |
|
"learning_rate": 6.027397260273974e-07, |
|
"loss": 1.9174, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0009874511417403825, |
|
"grad_norm": 24.328147714809518, |
|
"learning_rate": 6.575342465753425e-07, |
|
"loss": 1.9307, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0010697387368854144, |
|
"grad_norm": 5.5234808874616395, |
|
"learning_rate": 7.123287671232878e-07, |
|
"loss": 0.8138, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0011520263320304463, |
|
"grad_norm": 24.035678143620423, |
|
"learning_rate": 7.67123287671233e-07, |
|
"loss": 1.9803, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0012343139271754782, |
|
"grad_norm": 20.7270429685146, |
|
"learning_rate": 8.219178082191781e-07, |
|
"loss": 1.8216, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0013166015223205101, |
|
"grad_norm": 3.1954913902580597, |
|
"learning_rate": 8.767123287671234e-07, |
|
"loss": 0.7577, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.001398889117465542, |
|
"grad_norm": 19.0932823831642, |
|
"learning_rate": 9.315068493150686e-07, |
|
"loss": 1.8765, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.001481176712610574, |
|
"grad_norm": 17.783753558169572, |
|
"learning_rate": 9.863013698630137e-07, |
|
"loss": 1.7423, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0015634643077556058, |
|
"grad_norm": 13.929582396803928, |
|
"learning_rate": 1.041095890410959e-06, |
|
"loss": 1.5683, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0016457519029006377, |
|
"grad_norm": 10.860155069125868, |
|
"learning_rate": 1.095890410958904e-06, |
|
"loss": 1.5344, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0017280394980456696, |
|
"grad_norm": 10.868210550382598, |
|
"learning_rate": 1.1506849315068494e-06, |
|
"loss": 1.4788, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0018103270931907015, |
|
"grad_norm": 9.306619668804826, |
|
"learning_rate": 1.2054794520547947e-06, |
|
"loss": 1.4831, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0018926146883357334, |
|
"grad_norm": 2.4601086961337857, |
|
"learning_rate": 1.26027397260274e-06, |
|
"loss": 0.7305, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.001974902283480765, |
|
"grad_norm": 7.6886950923134005, |
|
"learning_rate": 1.315068493150685e-06, |
|
"loss": 1.4257, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.002057189878625797, |
|
"grad_norm": 6.220708397685521, |
|
"learning_rate": 1.3698630136986302e-06, |
|
"loss": 1.3468, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.002139477473770829, |
|
"grad_norm": 4.674476253548759, |
|
"learning_rate": 1.4246575342465755e-06, |
|
"loss": 1.3151, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.002221765068915861, |
|
"grad_norm": 3.895214381538298, |
|
"learning_rate": 1.4794520547945206e-06, |
|
"loss": 1.3041, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0023040526640608927, |
|
"grad_norm": 3.527134956076901, |
|
"learning_rate": 1.534246575342466e-06, |
|
"loss": 1.2878, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0023863402592059248, |
|
"grad_norm": 3.5362809667326522, |
|
"learning_rate": 1.5890410958904112e-06, |
|
"loss": 1.2726, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0024686278543509564, |
|
"grad_norm": 2.966450361552696, |
|
"learning_rate": 1.6438356164383561e-06, |
|
"loss": 1.2993, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0025509154494959886, |
|
"grad_norm": 2.458939366346722, |
|
"learning_rate": 1.6986301369863014e-06, |
|
"loss": 1.281, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0026332030446410202, |
|
"grad_norm": 2.535030337573037, |
|
"learning_rate": 1.7534246575342468e-06, |
|
"loss": 1.2708, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0027154906397860524, |
|
"grad_norm": 1.239317382781359, |
|
"learning_rate": 1.808219178082192e-06, |
|
"loss": 0.6648, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.002797778234931084, |
|
"grad_norm": 1.1180854196130607, |
|
"learning_rate": 1.8630136986301372e-06, |
|
"loss": 0.6646, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.002880065830076116, |
|
"grad_norm": 2.1450564270921646, |
|
"learning_rate": 1.9178082191780823e-06, |
|
"loss": 1.2447, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.002962353425221148, |
|
"grad_norm": 1.8049145439148968, |
|
"learning_rate": 1.9726027397260274e-06, |
|
"loss": 1.1815, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.00304464102036618, |
|
"grad_norm": 0.795375753210199, |
|
"learning_rate": 2.027397260273973e-06, |
|
"loss": 0.6292, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0031269286155112116, |
|
"grad_norm": 0.7439259016336192, |
|
"learning_rate": 2.082191780821918e-06, |
|
"loss": 0.6468, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0032092162106562437, |
|
"grad_norm": 2.102073236832498, |
|
"learning_rate": 2.1369863013698635e-06, |
|
"loss": 1.1965, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0032915038058012754, |
|
"grad_norm": 1.7507482751861791, |
|
"learning_rate": 2.191780821917808e-06, |
|
"loss": 1.147, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0033737914009463075, |
|
"grad_norm": 2.115499646494852, |
|
"learning_rate": 2.2465753424657537e-06, |
|
"loss": 1.2079, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.003456078996091339, |
|
"grad_norm": 1.5822724466961147, |
|
"learning_rate": 2.301369863013699e-06, |
|
"loss": 1.213, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0035383665912363713, |
|
"grad_norm": 0.6843357265370693, |
|
"learning_rate": 2.356164383561644e-06, |
|
"loss": 0.624, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.003620654186381403, |
|
"grad_norm": 1.9669305292499641, |
|
"learning_rate": 2.4109589041095894e-06, |
|
"loss": 1.1691, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.003702941781526435, |
|
"grad_norm": 4.293989393639943, |
|
"learning_rate": 2.4657534246575345e-06, |
|
"loss": 1.1484, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.003785229376671467, |
|
"grad_norm": 1.3873591085798673, |
|
"learning_rate": 2.52054794520548e-06, |
|
"loss": 1.177, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.0038675169718164985, |
|
"grad_norm": 3.6561002665760807, |
|
"learning_rate": 2.5753424657534247e-06, |
|
"loss": 1.1469, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.00394980456696153, |
|
"grad_norm": 1.5450365482515196, |
|
"learning_rate": 2.63013698630137e-06, |
|
"loss": 1.1521, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.004032092162106563, |
|
"grad_norm": 1.5565124011894804, |
|
"learning_rate": 2.6849315068493153e-06, |
|
"loss": 1.1589, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.004114379757251594, |
|
"grad_norm": 0.6675144755255817, |
|
"learning_rate": 2.7397260273972604e-06, |
|
"loss": 0.6406, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.004196667352396626, |
|
"grad_norm": 1.5292143908928457, |
|
"learning_rate": 2.794520547945206e-06, |
|
"loss": 1.1297, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.004278954947541658, |
|
"grad_norm": 0.6502938857874467, |
|
"learning_rate": 2.849315068493151e-06, |
|
"loss": 0.6186, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.00436124254268669, |
|
"grad_norm": 1.4333837148693778, |
|
"learning_rate": 2.9041095890410957e-06, |
|
"loss": 1.1303, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.004443530137831722, |
|
"grad_norm": 1.4749791593345467, |
|
"learning_rate": 2.9589041095890413e-06, |
|
"loss": 1.1387, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.004525817732976754, |
|
"grad_norm": 1.4998339630977238, |
|
"learning_rate": 3.0136986301369864e-06, |
|
"loss": 1.1857, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.004608105328121785, |
|
"grad_norm": 1.5507431529256293, |
|
"learning_rate": 3.068493150684932e-06, |
|
"loss": 1.1487, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.004690392923266818, |
|
"grad_norm": 1.6348282836598194, |
|
"learning_rate": 3.123287671232877e-06, |
|
"loss": 1.1641, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0047726805184118495, |
|
"grad_norm": 0.5752534532225031, |
|
"learning_rate": 3.1780821917808225e-06, |
|
"loss": 0.5701, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.004854968113556881, |
|
"grad_norm": 1.6099812024773308, |
|
"learning_rate": 3.2328767123287676e-06, |
|
"loss": 1.1721, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.004937255708701913, |
|
"grad_norm": 0.6408161226805661, |
|
"learning_rate": 3.2876712328767123e-06, |
|
"loss": 0.5998, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0050195433038469454, |
|
"grad_norm": 0.5617271278467075, |
|
"learning_rate": 3.342465753424658e-06, |
|
"loss": 0.6265, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.005101830898991977, |
|
"grad_norm": 1.9160395609787255, |
|
"learning_rate": 3.397260273972603e-06, |
|
"loss": 1.1687, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.005184118494137009, |
|
"grad_norm": 1.7944962743686514, |
|
"learning_rate": 3.4520547945205484e-06, |
|
"loss": 1.0999, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.0052664060892820405, |
|
"grad_norm": 1.6550254402978586, |
|
"learning_rate": 3.5068493150684935e-06, |
|
"loss": 1.1283, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.005348693684427073, |
|
"grad_norm": 2.06701106889446, |
|
"learning_rate": 3.5616438356164386e-06, |
|
"loss": 1.1449, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.005430981279572105, |
|
"grad_norm": 1.334891505276627, |
|
"learning_rate": 3.616438356164384e-06, |
|
"loss": 1.0978, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.005513268874717136, |
|
"grad_norm": 1.809032539584058, |
|
"learning_rate": 3.671232876712329e-06, |
|
"loss": 1.1172, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.005595556469862168, |
|
"grad_norm": 0.5631162064075181, |
|
"learning_rate": 3.7260273972602743e-06, |
|
"loss": 0.5793, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.0056778440650072, |
|
"grad_norm": 1.6486487445332147, |
|
"learning_rate": 3.7808219178082194e-06, |
|
"loss": 1.0659, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.005760131660152232, |
|
"grad_norm": 1.7514518974861626, |
|
"learning_rate": 3.8356164383561645e-06, |
|
"loss": 1.1786, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.005842419255297264, |
|
"grad_norm": 2.6958756773092887, |
|
"learning_rate": 3.89041095890411e-06, |
|
"loss": 1.1019, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.005924706850442296, |
|
"grad_norm": 1.7803679070531404, |
|
"learning_rate": 3.945205479452055e-06, |
|
"loss": 1.0859, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.006006994445587327, |
|
"grad_norm": 1.5059878641321802, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 1.0788, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.00608928204073236, |
|
"grad_norm": 1.8716327109844846, |
|
"learning_rate": 4.054794520547946e-06, |
|
"loss": 1.1095, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.0061715696358773916, |
|
"grad_norm": 1.5616475319286818, |
|
"learning_rate": 4.109589041095891e-06, |
|
"loss": 1.1278, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.006253857231022423, |
|
"grad_norm": 1.493898527453622, |
|
"learning_rate": 4.164383561643836e-06, |
|
"loss": 1.104, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.006336144826167455, |
|
"grad_norm": 1.8452837120263397, |
|
"learning_rate": 4.219178082191781e-06, |
|
"loss": 1.1095, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.0064184324213124875, |
|
"grad_norm": 1.784319898693149, |
|
"learning_rate": 4.273972602739727e-06, |
|
"loss": 1.0949, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.006500720016457519, |
|
"grad_norm": 2.137737098454538, |
|
"learning_rate": 4.328767123287671e-06, |
|
"loss": 1.1302, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.006583007611602551, |
|
"grad_norm": 1.5914074135685312, |
|
"learning_rate": 4.383561643835616e-06, |
|
"loss": 1.0916, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0066652952067475825, |
|
"grad_norm": 2.3489068213528266, |
|
"learning_rate": 4.438356164383562e-06, |
|
"loss": 1.0729, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.006747582801892615, |
|
"grad_norm": 2.073369039063705, |
|
"learning_rate": 4.493150684931507e-06, |
|
"loss": 1.0892, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.006829870397037647, |
|
"grad_norm": 1.8770075428367665, |
|
"learning_rate": 4.5479452054794525e-06, |
|
"loss": 1.1187, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.006912157992182678, |
|
"grad_norm": 4.506883747948483, |
|
"learning_rate": 4.602739726027398e-06, |
|
"loss": 1.0762, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.00699444558732771, |
|
"grad_norm": 1.7209663187813125, |
|
"learning_rate": 4.657534246575343e-06, |
|
"loss": 1.1226, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.007076733182472743, |
|
"grad_norm": 0.6052191270162426, |
|
"learning_rate": 4.712328767123288e-06, |
|
"loss": 0.6055, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.007159020777617774, |
|
"grad_norm": 1.7994312730778819, |
|
"learning_rate": 4.767123287671233e-06, |
|
"loss": 1.0967, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.007241308372762806, |
|
"grad_norm": 1.9304702595282108, |
|
"learning_rate": 4.821917808219179e-06, |
|
"loss": 1.1492, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.007323595967907838, |
|
"grad_norm": 2.088564652992412, |
|
"learning_rate": 4.876712328767124e-06, |
|
"loss": 1.0985, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.00740588356305287, |
|
"grad_norm": 1.8604994381662585, |
|
"learning_rate": 4.931506849315069e-06, |
|
"loss": 1.0923, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.007488171158197902, |
|
"grad_norm": 0.5594391183994828, |
|
"learning_rate": 4.986301369863014e-06, |
|
"loss": 0.6021, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.007570458753342934, |
|
"grad_norm": 1.7905925850647735, |
|
"learning_rate": 5.04109589041096e-06, |
|
"loss": 1.1047, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.007652746348487965, |
|
"grad_norm": 2.5829004230758055, |
|
"learning_rate": 5.095890410958904e-06, |
|
"loss": 1.0856, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.007735033943632997, |
|
"grad_norm": 2.8109366679812817, |
|
"learning_rate": 5.1506849315068494e-06, |
|
"loss": 1.0906, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.00781732153877803, |
|
"grad_norm": 1.9488333893087777, |
|
"learning_rate": 5.2054794520547945e-06, |
|
"loss": 1.1174, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.00789960913392306, |
|
"grad_norm": 1.8898489727850725, |
|
"learning_rate": 5.26027397260274e-06, |
|
"loss": 1.0764, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.007981896729068093, |
|
"grad_norm": 1.9662220110655733, |
|
"learning_rate": 5.3150684931506856e-06, |
|
"loss": 1.0687, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.008064184324213125, |
|
"grad_norm": 2.012210892740288, |
|
"learning_rate": 5.369863013698631e-06, |
|
"loss": 1.0688, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.008146471919358156, |
|
"grad_norm": 2.0256582980555145, |
|
"learning_rate": 5.424657534246576e-06, |
|
"loss": 1.0435, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.008228759514503189, |
|
"grad_norm": 2.3161294458478228, |
|
"learning_rate": 5.479452054794521e-06, |
|
"loss": 1.1027, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.008311047109648221, |
|
"grad_norm": 2.159842764055281, |
|
"learning_rate": 5.534246575342466e-06, |
|
"loss": 1.0223, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.008393334704793252, |
|
"grad_norm": 2.7342793057170964, |
|
"learning_rate": 5.589041095890412e-06, |
|
"loss": 1.0485, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.008475622299938285, |
|
"grad_norm": 0.6133807544248717, |
|
"learning_rate": 5.643835616438357e-06, |
|
"loss": 0.5933, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.008557909895083315, |
|
"grad_norm": 2.0957817610708593, |
|
"learning_rate": 5.698630136986302e-06, |
|
"loss": 1.084, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.008640197490228348, |
|
"grad_norm": 3.0607800999765105, |
|
"learning_rate": 5.753424657534246e-06, |
|
"loss": 1.0369, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.00872248508537338, |
|
"grad_norm": 2.3550652220766404, |
|
"learning_rate": 5.8082191780821915e-06, |
|
"loss": 1.0785, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.008804772680518411, |
|
"grad_norm": 2.885362070393249, |
|
"learning_rate": 5.863013698630137e-06, |
|
"loss": 1.1143, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.008887060275663444, |
|
"grad_norm": 2.726344088292101, |
|
"learning_rate": 5.9178082191780825e-06, |
|
"loss": 1.0423, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.008969347870808476, |
|
"grad_norm": 2.720421039977678, |
|
"learning_rate": 5.972602739726028e-06, |
|
"loss": 1.0424, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.009051635465953507, |
|
"grad_norm": 2.7737084246092043, |
|
"learning_rate": 6.027397260273973e-06, |
|
"loss": 1.0669, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.00913392306109854, |
|
"grad_norm": 2.4862795852431696, |
|
"learning_rate": 6.082191780821919e-06, |
|
"loss": 1.0798, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.00921621065624357, |
|
"grad_norm": 1.9953691894673529, |
|
"learning_rate": 6.136986301369864e-06, |
|
"loss": 1.0337, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.009298498251388603, |
|
"grad_norm": 2.1734409375655908, |
|
"learning_rate": 6.191780821917809e-06, |
|
"loss": 1.0769, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.009380785846533636, |
|
"grad_norm": 2.4691052918090457, |
|
"learning_rate": 6.246575342465754e-06, |
|
"loss": 1.0758, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.009463073441678667, |
|
"grad_norm": 2.51765809469206, |
|
"learning_rate": 6.301369863013699e-06, |
|
"loss": 1.1065, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.009545361036823699, |
|
"grad_norm": 2.3976820917439916, |
|
"learning_rate": 6.356164383561645e-06, |
|
"loss": 1.0454, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.00962764863196873, |
|
"grad_norm": 0.5713752667519881, |
|
"learning_rate": 6.41095890410959e-06, |
|
"loss": 0.5767, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.009709936227113762, |
|
"grad_norm": 2.9303587471653385, |
|
"learning_rate": 6.465753424657535e-06, |
|
"loss": 1.0596, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.009792223822258795, |
|
"grad_norm": 2.625385971373383, |
|
"learning_rate": 6.5205479452054794e-06, |
|
"loss": 1.0694, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.009874511417403826, |
|
"grad_norm": 2.6850490082257368, |
|
"learning_rate": 6.5753424657534245e-06, |
|
"loss": 1.0629, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.009956799012548858, |
|
"grad_norm": 2.8941680627630575, |
|
"learning_rate": 6.630136986301371e-06, |
|
"loss": 1.0797, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.010039086607693891, |
|
"grad_norm": 2.437227451528501, |
|
"learning_rate": 6.684931506849316e-06, |
|
"loss": 1.0446, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.010121374202838922, |
|
"grad_norm": 4.2330170384868655, |
|
"learning_rate": 6.739726027397261e-06, |
|
"loss": 1.077, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.010203661797983954, |
|
"grad_norm": 3.742681446646284, |
|
"learning_rate": 6.794520547945206e-06, |
|
"loss": 1.0578, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.010285949393128985, |
|
"grad_norm": 2.905751102486295, |
|
"learning_rate": 6.849315068493151e-06, |
|
"loss": 1.0397, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.010368236988274018, |
|
"grad_norm": 2.248809486049495, |
|
"learning_rate": 6.904109589041097e-06, |
|
"loss": 1.0057, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.01045052458341905, |
|
"grad_norm": 2.793469113179832, |
|
"learning_rate": 6.958904109589042e-06, |
|
"loss": 1.0423, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.010532812178564081, |
|
"grad_norm": 3.044433211099124, |
|
"learning_rate": 7.013698630136987e-06, |
|
"loss": 1.0519, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.010615099773709114, |
|
"grad_norm": 3.453404138683163, |
|
"learning_rate": 7.068493150684932e-06, |
|
"loss": 1.0492, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.010697387368854146, |
|
"grad_norm": 3.294896819292345, |
|
"learning_rate": 7.123287671232877e-06, |
|
"loss": 1.0186, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.010779674963999177, |
|
"grad_norm": 2.652529510878711, |
|
"learning_rate": 7.178082191780823e-06, |
|
"loss": 1.0481, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.01086196255914421, |
|
"grad_norm": 2.5635334133873835, |
|
"learning_rate": 7.232876712328768e-06, |
|
"loss": 1.0189, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.01094425015428924, |
|
"grad_norm": 2.310822969570939, |
|
"learning_rate": 7.287671232876713e-06, |
|
"loss": 1.0804, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.011026537749434273, |
|
"grad_norm": 2.7939745420750532, |
|
"learning_rate": 7.342465753424658e-06, |
|
"loss": 1.0731, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.011108825344579305, |
|
"grad_norm": 10.159052417359996, |
|
"learning_rate": 7.397260273972603e-06, |
|
"loss": 1.0013, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.011191112939724336, |
|
"grad_norm": 2.492104076947929, |
|
"learning_rate": 7.452054794520549e-06, |
|
"loss": 1.058, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.011273400534869369, |
|
"grad_norm": 2.7323610574219512, |
|
"learning_rate": 7.506849315068494e-06, |
|
"loss": 1.0503, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.0113556881300144, |
|
"grad_norm": 2.94667222448598, |
|
"learning_rate": 7.561643835616439e-06, |
|
"loss": 1.0283, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.011437975725159432, |
|
"grad_norm": 4.017422542900321, |
|
"learning_rate": 7.616438356164384e-06, |
|
"loss": 1.0883, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.011520263320304465, |
|
"grad_norm": 3.6715275879486633, |
|
"learning_rate": 7.671232876712329e-06, |
|
"loss": 1.0536, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.011602550915449495, |
|
"grad_norm": 3.0172048685106603, |
|
"learning_rate": 7.726027397260276e-06, |
|
"loss": 1.055, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.011684838510594528, |
|
"grad_norm": 3.077620329335805, |
|
"learning_rate": 7.78082191780822e-06, |
|
"loss": 1.0195, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.01176712610573956, |
|
"grad_norm": 2.959594926294125, |
|
"learning_rate": 7.835616438356164e-06, |
|
"loss": 1.0369, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.011849413700884591, |
|
"grad_norm": 5.2531338908420055, |
|
"learning_rate": 7.89041095890411e-06, |
|
"loss": 1.0524, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.011931701296029624, |
|
"grad_norm": 2.9462988063147755, |
|
"learning_rate": 7.945205479452055e-06, |
|
"loss": 1.0258, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.012013988891174655, |
|
"grad_norm": 2.835501864556677, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.0035, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.012096276486319687, |
|
"grad_norm": 3.1002864915340798, |
|
"learning_rate": 8.054794520547946e-06, |
|
"loss": 1.0379, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.01217856408146472, |
|
"grad_norm": 2.7184860323108464, |
|
"learning_rate": 8.109589041095892e-06, |
|
"loss": 1.0373, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.01226085167660975, |
|
"grad_norm": 3.093424317685046, |
|
"learning_rate": 8.164383561643837e-06, |
|
"loss": 1.0559, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.012343139271754783, |
|
"grad_norm": 2.9403313251924064, |
|
"learning_rate": 8.219178082191782e-06, |
|
"loss": 1.0312, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.012425426866899816, |
|
"grad_norm": 3.334710236004298, |
|
"learning_rate": 8.273972602739727e-06, |
|
"loss": 1.032, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.012507714462044846, |
|
"grad_norm": 3.754339855053731, |
|
"learning_rate": 8.328767123287672e-06, |
|
"loss": 1.007, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.012590002057189879, |
|
"grad_norm": 3.468367068790295, |
|
"learning_rate": 8.383561643835617e-06, |
|
"loss": 1.0352, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.01267228965233491, |
|
"grad_norm": 3.08946479512089, |
|
"learning_rate": 8.438356164383562e-06, |
|
"loss": 1.0285, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.012754577247479942, |
|
"grad_norm": 2.7171722187405463, |
|
"learning_rate": 8.493150684931507e-06, |
|
"loss": 1.0355, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.012836864842624975, |
|
"grad_norm": 2.9125857783989955, |
|
"learning_rate": 8.547945205479454e-06, |
|
"loss": 1.0383, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.012919152437770006, |
|
"grad_norm": 3.431055558365553, |
|
"learning_rate": 8.602739726027397e-06, |
|
"loss": 0.9858, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.013001440032915038, |
|
"grad_norm": 2.5695243675652906, |
|
"learning_rate": 8.657534246575343e-06, |
|
"loss": 1.0257, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.013083727628060069, |
|
"grad_norm": 3.1403965108405645, |
|
"learning_rate": 8.712328767123288e-06, |
|
"loss": 1.0161, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.013166015223205102, |
|
"grad_norm": 3.0914617102513535, |
|
"learning_rate": 8.767123287671233e-06, |
|
"loss": 1.0126, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.013248302818350134, |
|
"grad_norm": 2.974266261740425, |
|
"learning_rate": 8.82191780821918e-06, |
|
"loss": 1.0146, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.013330590413495165, |
|
"grad_norm": 4.453619610906972, |
|
"learning_rate": 8.876712328767125e-06, |
|
"loss": 1.01, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.013412878008640198, |
|
"grad_norm": 3.3339134633525203, |
|
"learning_rate": 8.93150684931507e-06, |
|
"loss": 1.0164, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.01349516560378523, |
|
"grad_norm": 3.096524915506246, |
|
"learning_rate": 8.986301369863015e-06, |
|
"loss": 1.0436, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.013577453198930261, |
|
"grad_norm": 0.5714699105064062, |
|
"learning_rate": 9.04109589041096e-06, |
|
"loss": 0.5844, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.013659740794075293, |
|
"grad_norm": 3.3053733088978294, |
|
"learning_rate": 9.095890410958905e-06, |
|
"loss": 1.01, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.013742028389220324, |
|
"grad_norm": 3.042487650681917, |
|
"learning_rate": 9.15068493150685e-06, |
|
"loss": 1.0258, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.013824315984365357, |
|
"grad_norm": 3.0826602321214267, |
|
"learning_rate": 9.205479452054795e-06, |
|
"loss": 1.0152, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.01390660357951039, |
|
"grad_norm": 4.049305212778963, |
|
"learning_rate": 9.26027397260274e-06, |
|
"loss": 1.0344, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.01398889117465542, |
|
"grad_norm": 2.262878129775452, |
|
"learning_rate": 9.315068493150685e-06, |
|
"loss": 0.9903, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.014071178769800453, |
|
"grad_norm": 2.5478144837312904, |
|
"learning_rate": 9.36986301369863e-06, |
|
"loss": 1.0255, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.014153466364945485, |
|
"grad_norm": 0.5963923221726043, |
|
"learning_rate": 9.424657534246576e-06, |
|
"loss": 0.5835, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.014235753960090516, |
|
"grad_norm": 2.4229291883624775, |
|
"learning_rate": 9.47945205479452e-06, |
|
"loss": 0.9969, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.014318041555235549, |
|
"grad_norm": 2.5861485778295563, |
|
"learning_rate": 9.534246575342466e-06, |
|
"loss": 1.0321, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.01440032915038058, |
|
"grad_norm": 3.0535728376170868, |
|
"learning_rate": 9.589041095890411e-06, |
|
"loss": 1.0545, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.014482616745525612, |
|
"grad_norm": 3.167624134264756, |
|
"learning_rate": 9.643835616438358e-06, |
|
"loss": 1.0212, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.014564904340670645, |
|
"grad_norm": 2.532407359117499, |
|
"learning_rate": 9.698630136986303e-06, |
|
"loss": 1.0395, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.014647191935815675, |
|
"grad_norm": 3.335905765902237, |
|
"learning_rate": 9.753424657534248e-06, |
|
"loss": 1.0444, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.014729479530960708, |
|
"grad_norm": 2.6694368517880376, |
|
"learning_rate": 9.808219178082193e-06, |
|
"loss": 1.0609, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.01481176712610574, |
|
"grad_norm": 2.4432476499205946, |
|
"learning_rate": 9.863013698630138e-06, |
|
"loss": 1.028, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.014894054721250771, |
|
"grad_norm": 3.074867289580692, |
|
"learning_rate": 9.917808219178083e-06, |
|
"loss": 1.0277, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.014976342316395804, |
|
"grad_norm": 2.8234239360995548, |
|
"learning_rate": 9.972602739726028e-06, |
|
"loss": 1.0145, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.015058629911540835, |
|
"grad_norm": 2.7243533214462636, |
|
"learning_rate": 1.0027397260273975e-05, |
|
"loss": 0.9962, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.015140917506685867, |
|
"grad_norm": 9.268831121545867, |
|
"learning_rate": 1.008219178082192e-05, |
|
"loss": 1.0202, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.0152232051018309, |
|
"grad_norm": 0.6032487906705319, |
|
"learning_rate": 1.0136986301369864e-05, |
|
"loss": 0.5914, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.01530549269697593, |
|
"grad_norm": 2.446903956621448, |
|
"learning_rate": 1.0191780821917809e-05, |
|
"loss": 1.0332, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.015387780292120963, |
|
"grad_norm": 2.9898530283159857, |
|
"learning_rate": 1.0246575342465754e-05, |
|
"loss": 1.0058, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.015470067887265994, |
|
"grad_norm": 3.1462756197093147, |
|
"learning_rate": 1.0301369863013699e-05, |
|
"loss": 0.9956, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.015552355482411026, |
|
"grad_norm": 2.603677254795289, |
|
"learning_rate": 1.0356164383561644e-05, |
|
"loss": 1.0567, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.01563464307755606, |
|
"grad_norm": 2.888609337531178, |
|
"learning_rate": 1.0410958904109589e-05, |
|
"loss": 1.0117, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.01571693067270109, |
|
"grad_norm": 3.4481892347405694, |
|
"learning_rate": 1.0465753424657534e-05, |
|
"loss": 1.0312, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.01579921826784612, |
|
"grad_norm": 2.723259220748936, |
|
"learning_rate": 1.052054794520548e-05, |
|
"loss": 1.0011, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.015881505862991155, |
|
"grad_norm": 2.400388335266181, |
|
"learning_rate": 1.0575342465753426e-05, |
|
"loss": 1.0397, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.015963793458136186, |
|
"grad_norm": 2.459799194471057, |
|
"learning_rate": 1.0630136986301371e-05, |
|
"loss": 1.0051, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.016046081053281216, |
|
"grad_norm": 2.493367813709158, |
|
"learning_rate": 1.0684931506849316e-05, |
|
"loss": 0.9877, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.01612836864842625, |
|
"grad_norm": 2.997365023733453, |
|
"learning_rate": 1.0739726027397261e-05, |
|
"loss": 0.9991, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.01621065624357128, |
|
"grad_norm": 3.1534988892754927, |
|
"learning_rate": 1.0794520547945206e-05, |
|
"loss": 1.0088, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.016292943838716312, |
|
"grad_norm": 0.7839570400001313, |
|
"learning_rate": 1.0849315068493152e-05, |
|
"loss": 0.5796, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.016375231433861347, |
|
"grad_norm": 2.968831135340441, |
|
"learning_rate": 1.0904109589041097e-05, |
|
"loss": 1.0169, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.016457519029006377, |
|
"grad_norm": 3.1769343467774736, |
|
"learning_rate": 1.0958904109589042e-05, |
|
"loss": 1.0097, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01653980662415141, |
|
"grad_norm": 2.941876345769733, |
|
"learning_rate": 1.1013698630136987e-05, |
|
"loss": 1.0021, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.016622094219296443, |
|
"grad_norm": 3.3680817014108353, |
|
"learning_rate": 1.1068493150684932e-05, |
|
"loss": 1.0218, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.016704381814441473, |
|
"grad_norm": 2.908397865551594, |
|
"learning_rate": 1.1123287671232879e-05, |
|
"loss": 0.9939, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.016786669409586504, |
|
"grad_norm": 2.822395296594326, |
|
"learning_rate": 1.1178082191780824e-05, |
|
"loss": 1.0172, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.016868957004731535, |
|
"grad_norm": 2.758365809402905, |
|
"learning_rate": 1.1232876712328769e-05, |
|
"loss": 1.05, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.01695124459987657, |
|
"grad_norm": 2.9222144058188984, |
|
"learning_rate": 1.1287671232876714e-05, |
|
"loss": 1.0073, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.0170335321950216, |
|
"grad_norm": 2.7763083571649547, |
|
"learning_rate": 1.1342465753424659e-05, |
|
"loss": 0.9958, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.01711581979016663, |
|
"grad_norm": 0.9573751817349475, |
|
"learning_rate": 1.1397260273972604e-05, |
|
"loss": 0.6336, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.017198107385311665, |
|
"grad_norm": 3.6768856466236857, |
|
"learning_rate": 1.1452054794520548e-05, |
|
"loss": 0.9839, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.017280394980456696, |
|
"grad_norm": 0.6002615125347783, |
|
"learning_rate": 1.1506849315068493e-05, |
|
"loss": 0.5964, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.017362682575601727, |
|
"grad_norm": 3.003839522918383, |
|
"learning_rate": 1.1561643835616438e-05, |
|
"loss": 1.0106, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.01744497017074676, |
|
"grad_norm": 3.0141237654512305, |
|
"learning_rate": 1.1616438356164383e-05, |
|
"loss": 1.005, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.017527257765891792, |
|
"grad_norm": 2.3380796106197583, |
|
"learning_rate": 1.1671232876712331e-05, |
|
"loss": 1.0025, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.017609545361036823, |
|
"grad_norm": 2.749317750470713, |
|
"learning_rate": 1.1726027397260275e-05, |
|
"loss": 1.0208, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.017691832956181857, |
|
"grad_norm": 2.5174324368341363, |
|
"learning_rate": 1.178082191780822e-05, |
|
"loss": 1.0225, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.017774120551326888, |
|
"grad_norm": 2.6939469770631206, |
|
"learning_rate": 1.1835616438356165e-05, |
|
"loss": 1.0181, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.01785640814647192, |
|
"grad_norm": 2.7969043874385218, |
|
"learning_rate": 1.189041095890411e-05, |
|
"loss": 1.0321, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.017938695741616953, |
|
"grad_norm": 2.130515743950604, |
|
"learning_rate": 1.1945205479452055e-05, |
|
"loss": 0.9939, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.018020983336761984, |
|
"grad_norm": 2.8848097718992296, |
|
"learning_rate": 1.2e-05, |
|
"loss": 1.0064, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.018103270931907015, |
|
"grad_norm": 1.496463088281579, |
|
"learning_rate": 1.2054794520547945e-05, |
|
"loss": 0.6077, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.018185558527052045, |
|
"grad_norm": 3.6292481030110935, |
|
"learning_rate": 1.210958904109589e-05, |
|
"loss": 1.0446, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.01826784612219708, |
|
"grad_norm": 2.252792644024641, |
|
"learning_rate": 1.2164383561643837e-05, |
|
"loss": 0.9739, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.01835013371734211, |
|
"grad_norm": 2.4478822538483755, |
|
"learning_rate": 1.2219178082191782e-05, |
|
"loss": 1.0131, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.01843242131248714, |
|
"grad_norm": 2.559717897830331, |
|
"learning_rate": 1.2273972602739727e-05, |
|
"loss": 1.0394, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.018514708907632176, |
|
"grad_norm": 2.869935242686829, |
|
"learning_rate": 1.2328767123287673e-05, |
|
"loss": 0.982, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.018596996502777206, |
|
"grad_norm": 2.5009663006221974, |
|
"learning_rate": 1.2383561643835618e-05, |
|
"loss": 1.0108, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.018679284097922237, |
|
"grad_norm": 2.9956405565150654, |
|
"learning_rate": 1.2438356164383563e-05, |
|
"loss": 0.9902, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.01876157169306727, |
|
"grad_norm": 2.674322004514903, |
|
"learning_rate": 1.2493150684931508e-05, |
|
"loss": 0.9927, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.018843859288212302, |
|
"grad_norm": 2.8674094236769583, |
|
"learning_rate": 1.2547945205479453e-05, |
|
"loss": 1.003, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.018926146883357333, |
|
"grad_norm": 2.9710081363188703, |
|
"learning_rate": 1.2602739726027398e-05, |
|
"loss": 0.9844, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.019008434478502367, |
|
"grad_norm": 2.98201549226896, |
|
"learning_rate": 1.2657534246575343e-05, |
|
"loss": 0.967, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.019090722073647398, |
|
"grad_norm": 2.903452559676373, |
|
"learning_rate": 1.271232876712329e-05, |
|
"loss": 1.0102, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.01917300966879243, |
|
"grad_norm": 2.5049333400477813, |
|
"learning_rate": 1.2767123287671235e-05, |
|
"loss": 1.0096, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.01925529726393746, |
|
"grad_norm": 2.6342420325330522, |
|
"learning_rate": 1.282191780821918e-05, |
|
"loss": 0.9718, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.019337584859082494, |
|
"grad_norm": 2.616314817819011, |
|
"learning_rate": 1.2876712328767125e-05, |
|
"loss": 0.9977, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.019419872454227525, |
|
"grad_norm": 2.420031810864845, |
|
"learning_rate": 1.293150684931507e-05, |
|
"loss": 1.0117, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.019502160049372556, |
|
"grad_norm": 2.9412487319960126, |
|
"learning_rate": 1.2986301369863015e-05, |
|
"loss": 1.0471, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.01958444764451759, |
|
"grad_norm": 2.7984406162708906, |
|
"learning_rate": 1.3041095890410959e-05, |
|
"loss": 0.9501, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.01966673523966262, |
|
"grad_norm": 4.841561737416111, |
|
"learning_rate": 1.3095890410958904e-05, |
|
"loss": 1.0138, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.01974902283480765, |
|
"grad_norm": 2.1778156992905577, |
|
"learning_rate": 1.3150684931506849e-05, |
|
"loss": 1.0101, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.019831310429952686, |
|
"grad_norm": 2.67809296527932, |
|
"learning_rate": 1.3205479452054794e-05, |
|
"loss": 0.982, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.019913598025097717, |
|
"grad_norm": 2.738306662356033, |
|
"learning_rate": 1.3260273972602743e-05, |
|
"loss": 0.9953, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.019995885620242747, |
|
"grad_norm": 3.69258760845872, |
|
"learning_rate": 1.3315068493150686e-05, |
|
"loss": 0.9933, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.020078173215387782, |
|
"grad_norm": 3.4285570541743096, |
|
"learning_rate": 1.3369863013698631e-05, |
|
"loss": 0.9891, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.020160460810532813, |
|
"grad_norm": 2.1884703037736175, |
|
"learning_rate": 1.3424657534246576e-05, |
|
"loss": 0.9615, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.020242748405677843, |
|
"grad_norm": 2.278997433805173, |
|
"learning_rate": 1.3479452054794521e-05, |
|
"loss": 0.9984, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.020325036000822878, |
|
"grad_norm": 0.9732502137516167, |
|
"learning_rate": 1.3534246575342466e-05, |
|
"loss": 0.5964, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.02040732359596791, |
|
"grad_norm": 4.111007905694721, |
|
"learning_rate": 1.3589041095890412e-05, |
|
"loss": 1.03, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.02048961119111294, |
|
"grad_norm": 2.104309544659177, |
|
"learning_rate": 1.3643835616438357e-05, |
|
"loss": 0.9696, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.02057189878625797, |
|
"grad_norm": 2.5670779853119665, |
|
"learning_rate": 1.3698630136986302e-05, |
|
"loss": 0.9589, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.020654186381403004, |
|
"grad_norm": 2.7898261074191777, |
|
"learning_rate": 1.3753424657534247e-05, |
|
"loss": 1.0084, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.020736473976548035, |
|
"grad_norm": 3.2009246830375204, |
|
"learning_rate": 1.3808219178082194e-05, |
|
"loss": 0.9911, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.020818761571693066, |
|
"grad_norm": 3.1563797863262777, |
|
"learning_rate": 1.3863013698630139e-05, |
|
"loss": 0.9947, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.0209010491668381, |
|
"grad_norm": 3.193090081286074, |
|
"learning_rate": 1.3917808219178084e-05, |
|
"loss": 1.0069, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.02098333676198313, |
|
"grad_norm": 5.521797116199944, |
|
"learning_rate": 1.3972602739726029e-05, |
|
"loss": 0.9842, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.021065624357128162, |
|
"grad_norm": 1.243014761274919, |
|
"learning_rate": 1.4027397260273974e-05, |
|
"loss": 0.6147, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.021147911952273196, |
|
"grad_norm": 3.191364616862045, |
|
"learning_rate": 1.4082191780821919e-05, |
|
"loss": 0.974, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.021230199547418227, |
|
"grad_norm": 2.93570172220106, |
|
"learning_rate": 1.4136986301369864e-05, |
|
"loss": 0.9719, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.021312487142563258, |
|
"grad_norm": 4.468162617805659, |
|
"learning_rate": 1.419178082191781e-05, |
|
"loss": 0.9904, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.021394774737708292, |
|
"grad_norm": 2.2571244653960862, |
|
"learning_rate": 1.4246575342465754e-05, |
|
"loss": 0.9613, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.021477062332853323, |
|
"grad_norm": 4.467563699694284, |
|
"learning_rate": 1.43013698630137e-05, |
|
"loss": 0.9944, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.021559349927998354, |
|
"grad_norm": 0.68889362412214, |
|
"learning_rate": 1.4356164383561646e-05, |
|
"loss": 0.5789, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.021641637523143385, |
|
"grad_norm": 0.6373164384054985, |
|
"learning_rate": 1.4410958904109591e-05, |
|
"loss": 0.5688, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.02172392511828842, |
|
"grad_norm": 3.597782460566262, |
|
"learning_rate": 1.4465753424657537e-05, |
|
"loss": 0.9776, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.02180621271343345, |
|
"grad_norm": 2.7541673143111347, |
|
"learning_rate": 1.4520547945205482e-05, |
|
"loss": 0.9927, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.02188850030857848, |
|
"grad_norm": 0.6805788182804722, |
|
"learning_rate": 1.4575342465753427e-05, |
|
"loss": 0.5971, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.021970787903723515, |
|
"grad_norm": 2.725379141853366, |
|
"learning_rate": 1.463013698630137e-05, |
|
"loss": 0.9675, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.022053075498868546, |
|
"grad_norm": 4.08013853272879, |
|
"learning_rate": 1.4684931506849315e-05, |
|
"loss": 0.9786, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.022135363094013576, |
|
"grad_norm": 2.5492247984913483, |
|
"learning_rate": 1.473972602739726e-05, |
|
"loss": 0.9988, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.02221765068915861, |
|
"grad_norm": 3.8860413387854327, |
|
"learning_rate": 1.4794520547945205e-05, |
|
"loss": 0.9697, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.02229993828430364, |
|
"grad_norm": 3.0719505820425925, |
|
"learning_rate": 1.484931506849315e-05, |
|
"loss": 0.9778, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.022382225879448672, |
|
"grad_norm": 3.065813452275364, |
|
"learning_rate": 1.4904109589041097e-05, |
|
"loss": 1.0114, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.022464513474593707, |
|
"grad_norm": 3.119520514603019, |
|
"learning_rate": 1.4958904109589042e-05, |
|
"loss": 1.0143, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.022546801069738737, |
|
"grad_norm": 2.8059490672957823, |
|
"learning_rate": 1.5013698630136988e-05, |
|
"loss": 0.9815, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.022629088664883768, |
|
"grad_norm": 2.6271007340037706, |
|
"learning_rate": 1.5068493150684933e-05, |
|
"loss": 1.0251, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.0227113762600288, |
|
"grad_norm": 3.114887825941429, |
|
"learning_rate": 1.5123287671232878e-05, |
|
"loss": 0.9722, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.022793663855173833, |
|
"grad_norm": 3.222134871844559, |
|
"learning_rate": 1.5178082191780823e-05, |
|
"loss": 0.9895, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.022875951450318864, |
|
"grad_norm": 0.8596732284566506, |
|
"learning_rate": 1.5232876712328768e-05, |
|
"loss": 0.6421, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.022958239045463895, |
|
"grad_norm": 2.688881192050172, |
|
"learning_rate": 1.5287671232876713e-05, |
|
"loss": 0.9709, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.02304052664060893, |
|
"grad_norm": 0.5908184070761948, |
|
"learning_rate": 1.5342465753424658e-05, |
|
"loss": 0.5813, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.02312281423575396, |
|
"grad_norm": 2.5626042733441565, |
|
"learning_rate": 1.5397260273972603e-05, |
|
"loss": 1.0054, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.02320510183089899, |
|
"grad_norm": 0.6319032426639426, |
|
"learning_rate": 1.545205479452055e-05, |
|
"loss": 0.569, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.023287389426044025, |
|
"grad_norm": 3.381429029921771, |
|
"learning_rate": 1.5506849315068497e-05, |
|
"loss": 0.9924, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.023369677021189056, |
|
"grad_norm": 0.6893518849945868, |
|
"learning_rate": 1.556164383561644e-05, |
|
"loss": 0.5947, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.023451964616334087, |
|
"grad_norm": 0.6030322287256665, |
|
"learning_rate": 1.5616438356164384e-05, |
|
"loss": 0.5849, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.02353425221147912, |
|
"grad_norm": 2.584371231162671, |
|
"learning_rate": 1.567123287671233e-05, |
|
"loss": 1.0113, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.023616539806624152, |
|
"grad_norm": 2.617374246670965, |
|
"learning_rate": 1.5726027397260274e-05, |
|
"loss": 0.9952, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.023698827401769183, |
|
"grad_norm": 3.131756380862052, |
|
"learning_rate": 1.578082191780822e-05, |
|
"loss": 0.9978, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.023781114996914217, |
|
"grad_norm": 0.7149086621817794, |
|
"learning_rate": 1.5835616438356164e-05, |
|
"loss": 0.6005, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.023863402592059248, |
|
"grad_norm": 2.8572031223595804, |
|
"learning_rate": 1.589041095890411e-05, |
|
"loss": 0.9764, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.02394569018720428, |
|
"grad_norm": 3.0067656548078525, |
|
"learning_rate": 1.5945205479452054e-05, |
|
"loss": 0.9931, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.02402797778234931, |
|
"grad_norm": 2.9396448545767067, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 1.0167, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.024110265377494344, |
|
"grad_norm": 2.551576593689318, |
|
"learning_rate": 1.6054794520547948e-05, |
|
"loss": 0.9652, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.024192552972639374, |
|
"grad_norm": 3.4929495312083376, |
|
"learning_rate": 1.6109589041095893e-05, |
|
"loss": 0.9741, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.024274840567784405, |
|
"grad_norm": 0.5986861672946895, |
|
"learning_rate": 1.6164383561643838e-05, |
|
"loss": 0.5967, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.02435712816292944, |
|
"grad_norm": 2.3369563375899163, |
|
"learning_rate": 1.6219178082191783e-05, |
|
"loss": 0.9541, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.02443941575807447, |
|
"grad_norm": 3.115001072277964, |
|
"learning_rate": 1.6273972602739728e-05, |
|
"loss": 1.002, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.0245217033532195, |
|
"grad_norm": 3.594307440216849, |
|
"learning_rate": 1.6328767123287673e-05, |
|
"loss": 0.9483, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.024603990948364535, |
|
"grad_norm": 2.4315114201324977, |
|
"learning_rate": 1.638356164383562e-05, |
|
"loss": 0.9844, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.024686278543509566, |
|
"grad_norm": 3.3312431748162528, |
|
"learning_rate": 1.6438356164383563e-05, |
|
"loss": 1.0031, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.024768566138654597, |
|
"grad_norm": 2.7478721222497695, |
|
"learning_rate": 1.649315068493151e-05, |
|
"loss": 0.9942, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.02485085373379963, |
|
"grad_norm": 2.7443057694383097, |
|
"learning_rate": 1.6547945205479454e-05, |
|
"loss": 0.9841, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.024933141328944662, |
|
"grad_norm": 2.5333469665657797, |
|
"learning_rate": 1.66027397260274e-05, |
|
"loss": 0.9751, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.025015428924089693, |
|
"grad_norm": 3.161735273370277, |
|
"learning_rate": 1.6657534246575344e-05, |
|
"loss": 0.9687, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.025097716519234724, |
|
"grad_norm": 2.6737823247108183, |
|
"learning_rate": 1.671232876712329e-05, |
|
"loss": 0.9787, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.025180004114379758, |
|
"grad_norm": 0.6510425400067263, |
|
"learning_rate": 1.6767123287671234e-05, |
|
"loss": 0.5622, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.02526229170952479, |
|
"grad_norm": 4.574909987598007, |
|
"learning_rate": 1.682191780821918e-05, |
|
"loss": 0.9643, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.02534457930466982, |
|
"grad_norm": 3.4438804774031935, |
|
"learning_rate": 1.6876712328767124e-05, |
|
"loss": 0.9615, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.025426866899814854, |
|
"grad_norm": 2.9285136796976015, |
|
"learning_rate": 1.693150684931507e-05, |
|
"loss": 0.9527, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.025509154494959885, |
|
"grad_norm": 2.779888649016243, |
|
"learning_rate": 1.6986301369863014e-05, |
|
"loss": 0.9544, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.025591442090104916, |
|
"grad_norm": 2.7248520567063848, |
|
"learning_rate": 1.7041095890410963e-05, |
|
"loss": 0.9473, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.02567372968524995, |
|
"grad_norm": 3.5709762174348954, |
|
"learning_rate": 1.7095890410958908e-05, |
|
"loss": 0.9575, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.02575601728039498, |
|
"grad_norm": 3.0856327234258827, |
|
"learning_rate": 1.715068493150685e-05, |
|
"loss": 0.9652, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.02583830487554001, |
|
"grad_norm": 2.2692448164089343, |
|
"learning_rate": 1.7205479452054795e-05, |
|
"loss": 0.9735, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.025920592470685046, |
|
"grad_norm": 5.769054110868784, |
|
"learning_rate": 1.726027397260274e-05, |
|
"loss": 0.9703, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.026002880065830077, |
|
"grad_norm": 2.508893910476298, |
|
"learning_rate": 1.7315068493150685e-05, |
|
"loss": 0.944, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.026085167660975107, |
|
"grad_norm": 2.8832916992173767, |
|
"learning_rate": 1.736986301369863e-05, |
|
"loss": 0.9646, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.026167455256120138, |
|
"grad_norm": 2.919174367177141, |
|
"learning_rate": 1.7424657534246575e-05, |
|
"loss": 0.9642, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.026249742851265172, |
|
"grad_norm": 2.3758292544134068, |
|
"learning_rate": 1.747945205479452e-05, |
|
"loss": 0.9819, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.026332030446410203, |
|
"grad_norm": 2.8844662683768822, |
|
"learning_rate": 1.7534246575342465e-05, |
|
"loss": 0.9757, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.026414318041555234, |
|
"grad_norm": 2.2651505276443964, |
|
"learning_rate": 1.7589041095890414e-05, |
|
"loss": 0.9461, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.02649660563670027, |
|
"grad_norm": 3.148064595511082, |
|
"learning_rate": 1.764383561643836e-05, |
|
"loss": 0.9457, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.0265788932318453, |
|
"grad_norm": 2.593793697550568, |
|
"learning_rate": 1.7698630136986304e-05, |
|
"loss": 0.9564, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.02666118082699033, |
|
"grad_norm": 3.5777764577994637, |
|
"learning_rate": 1.775342465753425e-05, |
|
"loss": 0.9585, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.026743468422135364, |
|
"grad_norm": 2.5200344733829434, |
|
"learning_rate": 1.7808219178082194e-05, |
|
"loss": 0.9429, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.026825756017280395, |
|
"grad_norm": 0.7344214528472546, |
|
"learning_rate": 1.786301369863014e-05, |
|
"loss": 0.6191, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.026908043612425426, |
|
"grad_norm": 3.3825851018048962, |
|
"learning_rate": 1.7917808219178085e-05, |
|
"loss": 0.9739, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.02699033120757046, |
|
"grad_norm": 2.4626600175420212, |
|
"learning_rate": 1.797260273972603e-05, |
|
"loss": 0.9813, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.02707261880271549, |
|
"grad_norm": 2.604744324101538, |
|
"learning_rate": 1.8027397260273975e-05, |
|
"loss": 0.9605, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.027154906397860522, |
|
"grad_norm": 2.3443898191922408, |
|
"learning_rate": 1.808219178082192e-05, |
|
"loss": 0.968, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.027237193993005556, |
|
"grad_norm": 2.2972121260527274, |
|
"learning_rate": 1.8136986301369865e-05, |
|
"loss": 0.9636, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.027319481588150587, |
|
"grad_norm": 0.6704215743863139, |
|
"learning_rate": 1.819178082191781e-05, |
|
"loss": 0.5832, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.027401769183295618, |
|
"grad_norm": 2.5588332490587806, |
|
"learning_rate": 1.8246575342465755e-05, |
|
"loss": 0.967, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.02748405677844065, |
|
"grad_norm": 0.5729720504764441, |
|
"learning_rate": 1.83013698630137e-05, |
|
"loss": 0.5796, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.027566344373585683, |
|
"grad_norm": 0.536934165288964, |
|
"learning_rate": 1.8356164383561645e-05, |
|
"loss": 0.586, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.027648631968730714, |
|
"grad_norm": 2.729927929300927, |
|
"learning_rate": 1.841095890410959e-05, |
|
"loss": 1.0006, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.027730919563875744, |
|
"grad_norm": 2.9380300033617193, |
|
"learning_rate": 1.8465753424657535e-05, |
|
"loss": 0.9806, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.02781320715902078, |
|
"grad_norm": 3.1871007449922595, |
|
"learning_rate": 1.852054794520548e-05, |
|
"loss": 1.0205, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.02789549475416581, |
|
"grad_norm": 2.7551362648970454, |
|
"learning_rate": 1.8575342465753426e-05, |
|
"loss": 0.9843, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.02797778234931084, |
|
"grad_norm": 2.341899316621362, |
|
"learning_rate": 1.863013698630137e-05, |
|
"loss": 0.9828, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.028060069944455875, |
|
"grad_norm": 3.0041315739517143, |
|
"learning_rate": 1.8684931506849316e-05, |
|
"loss": 0.9599, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.028142357539600905, |
|
"grad_norm": 1.098290342373438, |
|
"learning_rate": 1.873972602739726e-05, |
|
"loss": 0.5762, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.028224645134745936, |
|
"grad_norm": 2.793401629061216, |
|
"learning_rate": 1.8794520547945206e-05, |
|
"loss": 0.9599, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.02830693272989097, |
|
"grad_norm": 3.381992225466734, |
|
"learning_rate": 1.884931506849315e-05, |
|
"loss": 1.0128, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.028389220325036, |
|
"grad_norm": 3.0552921674313107, |
|
"learning_rate": 1.8904109589041096e-05, |
|
"loss": 0.9683, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.028471507920181032, |
|
"grad_norm": 2.59026883064129, |
|
"learning_rate": 1.895890410958904e-05, |
|
"loss": 0.9361, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.028553795515326063, |
|
"grad_norm": 3.0842540515307473, |
|
"learning_rate": 1.9013698630136986e-05, |
|
"loss": 0.9697, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.028636083110471097, |
|
"grad_norm": 2.443425049236279, |
|
"learning_rate": 1.906849315068493e-05, |
|
"loss": 0.9183, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.028718370705616128, |
|
"grad_norm": 3.127867492745528, |
|
"learning_rate": 1.9123287671232877e-05, |
|
"loss": 0.9601, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.02880065830076116, |
|
"grad_norm": 4.402570399866093, |
|
"learning_rate": 1.9178082191780822e-05, |
|
"loss": 0.9303, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.028882945895906193, |
|
"grad_norm": 0.8543818428159927, |
|
"learning_rate": 1.923287671232877e-05, |
|
"loss": 0.5988, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.028965233491051224, |
|
"grad_norm": 0.7093532126289934, |
|
"learning_rate": 1.9287671232876715e-05, |
|
"loss": 0.5831, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.029047521086196255, |
|
"grad_norm": 0.6407564149823172, |
|
"learning_rate": 1.934246575342466e-05, |
|
"loss": 0.577, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.02912980868134129, |
|
"grad_norm": 3.390283574742443, |
|
"learning_rate": 1.9397260273972606e-05, |
|
"loss": 0.9609, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.02921209627648632, |
|
"grad_norm": 2.53734497566345, |
|
"learning_rate": 1.945205479452055e-05, |
|
"loss": 0.9909, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.02929438387163135, |
|
"grad_norm": 1.0115473868573372, |
|
"learning_rate": 1.9506849315068496e-05, |
|
"loss": 0.6035, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.029376671466776385, |
|
"grad_norm": 0.8686466035185451, |
|
"learning_rate": 1.956164383561644e-05, |
|
"loss": 0.5971, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.029458959061921416, |
|
"grad_norm": 3.039718625814903, |
|
"learning_rate": 1.9616438356164386e-05, |
|
"loss": 0.9912, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.029541246657066447, |
|
"grad_norm": 3.1175114788948473, |
|
"learning_rate": 1.967123287671233e-05, |
|
"loss": 0.9866, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.02962353425221148, |
|
"grad_norm": 6.758106134116968, |
|
"learning_rate": 1.9726027397260276e-05, |
|
"loss": 0.9847, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.02970582184735651, |
|
"grad_norm": 2.589972092841794, |
|
"learning_rate": 1.978082191780822e-05, |
|
"loss": 0.9565, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.029788109442501542, |
|
"grad_norm": 1.073769179644345, |
|
"learning_rate": 1.9835616438356166e-05, |
|
"loss": 0.6201, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.029870397037646573, |
|
"grad_norm": 2.620541255700163, |
|
"learning_rate": 1.989041095890411e-05, |
|
"loss": 0.9694, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.029952684632791608, |
|
"grad_norm": 2.9983273469412, |
|
"learning_rate": 1.9945205479452057e-05, |
|
"loss": 0.9517, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.03003497222793664, |
|
"grad_norm": 3.1705127831701176, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9757, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.03011725982308167, |
|
"grad_norm": 3.0769206086851493, |
|
"learning_rate": 1.9999999644807997e-05, |
|
"loss": 0.9725, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.030199547418226703, |
|
"grad_norm": 2.6381794624352346, |
|
"learning_rate": 1.999999857923201e-05, |
|
"loss": 0.9579, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.030281835013371734, |
|
"grad_norm": 2.524417719057271, |
|
"learning_rate": 1.999999680327212e-05, |
|
"loss": 0.9491, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.030364122608516765, |
|
"grad_norm": 2.0772737485337958, |
|
"learning_rate": 1.9999994316928445e-05, |
|
"loss": 0.9802, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.0304464102036618, |
|
"grad_norm": 0.695305872906948, |
|
"learning_rate": 1.9999991120201172e-05, |
|
"loss": 0.6179, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.03052869779880683, |
|
"grad_norm": 2.034367122214282, |
|
"learning_rate": 1.999998721309052e-05, |
|
"loss": 0.9365, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.03061098539395186, |
|
"grad_norm": 2.5094859416224096, |
|
"learning_rate": 1.999998259559677e-05, |
|
"loss": 0.9806, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.030693272989096895, |
|
"grad_norm": 2.037387180631793, |
|
"learning_rate": 1.9999977267720245e-05, |
|
"loss": 0.9625, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.030775560584241926, |
|
"grad_norm": 1.9827245047395246, |
|
"learning_rate": 1.999997122946133e-05, |
|
"loss": 0.996, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.030857848179386957, |
|
"grad_norm": 2.000201005705768, |
|
"learning_rate": 1.9999964480820448e-05, |
|
"loss": 0.9247, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.030940135774531988, |
|
"grad_norm": 2.237696098262905, |
|
"learning_rate": 1.999995702179809e-05, |
|
"loss": 0.9432, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.031022423369677022, |
|
"grad_norm": 2.1572992959011668, |
|
"learning_rate": 1.999994885239477e-05, |
|
"loss": 0.9567, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.031104710964822053, |
|
"grad_norm": 2.5949178993773656, |
|
"learning_rate": 1.999993997261108e-05, |
|
"loss": 0.9523, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.031186998559967084, |
|
"grad_norm": 4.412522046641788, |
|
"learning_rate": 1.9999930382447644e-05, |
|
"loss": 0.9463, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.03126928615511212, |
|
"grad_norm": 4.095975078147534, |
|
"learning_rate": 1.9999920081905148e-05, |
|
"loss": 0.9562, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.03135157375025715, |
|
"grad_norm": 0.7238222599759508, |
|
"learning_rate": 1.999990907098432e-05, |
|
"loss": 0.6367, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.03143386134540218, |
|
"grad_norm": 2.051737393292375, |
|
"learning_rate": 1.9999897349685948e-05, |
|
"loss": 0.9396, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.03151614894054721, |
|
"grad_norm": 3.608873989338571, |
|
"learning_rate": 1.999988491801086e-05, |
|
"loss": 0.9427, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.03159843653569224, |
|
"grad_norm": 0.5731166749659096, |
|
"learning_rate": 1.999987177595994e-05, |
|
"loss": 0.6066, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.03168072413083728, |
|
"grad_norm": 2.7911800909686244, |
|
"learning_rate": 1.9999857923534117e-05, |
|
"loss": 0.9553, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.03176301172598231, |
|
"grad_norm": 0.5640032520210956, |
|
"learning_rate": 1.9999843360734384e-05, |
|
"loss": 0.6089, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.03184529932112734, |
|
"grad_norm": 3.218289339029279, |
|
"learning_rate": 1.999982808756177e-05, |
|
"loss": 1.002, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.03192758691627237, |
|
"grad_norm": 0.5298496199217386, |
|
"learning_rate": 1.999981210401736e-05, |
|
"loss": 0.6014, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.0320098745114174, |
|
"grad_norm": 2.1651032679205544, |
|
"learning_rate": 1.9999795410102288e-05, |
|
"loss": 0.977, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.03209216210656243, |
|
"grad_norm": 3.0876660454466336, |
|
"learning_rate": 1.999977800581775e-05, |
|
"loss": 0.954, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.03217444970170747, |
|
"grad_norm": 2.8016809296721186, |
|
"learning_rate": 1.999975989116497e-05, |
|
"loss": 0.9773, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.0322567372968525, |
|
"grad_norm": 2.2686954346227584, |
|
"learning_rate": 1.999974106614524e-05, |
|
"loss": 0.9284, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.03233902489199753, |
|
"grad_norm": 2.848599719139828, |
|
"learning_rate": 1.9999721530759896e-05, |
|
"loss": 0.9666, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.03242131248714256, |
|
"grad_norm": 2.5480580332195792, |
|
"learning_rate": 1.9999701285010327e-05, |
|
"loss": 0.9748, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.032503600082287594, |
|
"grad_norm": 3.0659568674712587, |
|
"learning_rate": 1.999968032889797e-05, |
|
"loss": 0.9773, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.032585887677432625, |
|
"grad_norm": 3.2486686691126607, |
|
"learning_rate": 1.9999658662424318e-05, |
|
"loss": 0.9378, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.032668175272577656, |
|
"grad_norm": 2.231555735516029, |
|
"learning_rate": 1.9999636285590903e-05, |
|
"loss": 0.9402, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.03275046286772269, |
|
"grad_norm": 7.750954267677904, |
|
"learning_rate": 1.999961319839932e-05, |
|
"loss": 0.9212, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.032832750462867724, |
|
"grad_norm": 3.9379616174216747, |
|
"learning_rate": 1.9999589400851208e-05, |
|
"loss": 0.957, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.032915038058012755, |
|
"grad_norm": 3.09592161673104, |
|
"learning_rate": 1.9999564892948254e-05, |
|
"loss": 0.9644, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.032997325653157786, |
|
"grad_norm": 0.6258510816084707, |
|
"learning_rate": 1.9999539674692206e-05, |
|
"loss": 0.6, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.03307961324830282, |
|
"grad_norm": 2.757532242911201, |
|
"learning_rate": 1.9999513746084848e-05, |
|
"loss": 0.9627, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.03316190084344785, |
|
"grad_norm": 0.518069489983011, |
|
"learning_rate": 1.999948710712803e-05, |
|
"loss": 0.5736, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.033244188438592885, |
|
"grad_norm": 2.7302377830347293, |
|
"learning_rate": 1.9999459757823632e-05, |
|
"loss": 0.9452, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.033326476033737916, |
|
"grad_norm": 3.8829507326351678, |
|
"learning_rate": 1.9999431698173614e-05, |
|
"loss": 0.9501, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.03340876362888295, |
|
"grad_norm": 3.030860642634053, |
|
"learning_rate": 1.9999402928179953e-05, |
|
"loss": 0.935, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.03349105122402798, |
|
"grad_norm": 2.7297517789446735, |
|
"learning_rate": 1.99993734478447e-05, |
|
"loss": 0.9816, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.03357333881917301, |
|
"grad_norm": 2.9131211283428864, |
|
"learning_rate": 1.999934325716995e-05, |
|
"loss": 0.953, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.03365562641431804, |
|
"grad_norm": 2.8724758175032457, |
|
"learning_rate": 1.999931235615785e-05, |
|
"loss": 0.9543, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.03373791400946307, |
|
"grad_norm": 3.8558067751787894, |
|
"learning_rate": 1.999928074481059e-05, |
|
"loss": 0.9024, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.03382020160460811, |
|
"grad_norm": 4.890426251595657, |
|
"learning_rate": 1.9999248423130414e-05, |
|
"loss": 0.9557, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.03390248919975314, |
|
"grad_norm": 3.9224502088816307, |
|
"learning_rate": 1.9999215391119623e-05, |
|
"loss": 0.9625, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.03398477679489817, |
|
"grad_norm": 4.121169405356662, |
|
"learning_rate": 1.9999181648780564e-05, |
|
"loss": 0.9836, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.0340670643900432, |
|
"grad_norm": 3.2570143865225365, |
|
"learning_rate": 1.999914719611563e-05, |
|
"loss": 0.9548, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.03414935198518823, |
|
"grad_norm": 0.8551591188426197, |
|
"learning_rate": 1.999911203312727e-05, |
|
"loss": 0.6257, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.03423163958033326, |
|
"grad_norm": 2.282348243685617, |
|
"learning_rate": 1.9999076159817984e-05, |
|
"loss": 0.9534, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.0343139271754783, |
|
"grad_norm": 3.1849388817078417, |
|
"learning_rate": 1.999903957619032e-05, |
|
"loss": 0.9559, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.03439621477062333, |
|
"grad_norm": 3.0160267374462744, |
|
"learning_rate": 1.9999002282246877e-05, |
|
"loss": 0.9414, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.03447850236576836, |
|
"grad_norm": 2.8630460192439484, |
|
"learning_rate": 1.99989642779903e-05, |
|
"loss": 0.97, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.03456078996091339, |
|
"grad_norm": 0.6092993503428186, |
|
"learning_rate": 1.999892556342329e-05, |
|
"loss": 0.5762, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.03464307755605842, |
|
"grad_norm": 3.558089457861364, |
|
"learning_rate": 1.9998886138548597e-05, |
|
"loss": 0.9674, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.034725365151203454, |
|
"grad_norm": 0.5392883644170888, |
|
"learning_rate": 1.9998846003369028e-05, |
|
"loss": 0.6002, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.03480765274634849, |
|
"grad_norm": 2.4265611825364175, |
|
"learning_rate": 1.9998805157887432e-05, |
|
"loss": 0.9469, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.03488994034149352, |
|
"grad_norm": 2.5084390180607508, |
|
"learning_rate": 1.9998763602106704e-05, |
|
"loss": 0.9547, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.03497222793663855, |
|
"grad_norm": 3.0592802155387284, |
|
"learning_rate": 1.99987213360298e-05, |
|
"loss": 0.9549, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.035054515531783584, |
|
"grad_norm": 3.0606106243138353, |
|
"learning_rate": 1.9998678359659726e-05, |
|
"loss": 0.925, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.035136803126928615, |
|
"grad_norm": 0.5614840770252022, |
|
"learning_rate": 1.999863467299953e-05, |
|
"loss": 0.6226, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.035219090722073645, |
|
"grad_norm": 2.3274481514972636, |
|
"learning_rate": 1.9998590276052318e-05, |
|
"loss": 0.9627, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.035301378317218676, |
|
"grad_norm": 0.5247325522573751, |
|
"learning_rate": 1.999854516882124e-05, |
|
"loss": 0.5626, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.035383665912363714, |
|
"grad_norm": 2.4963541117374635, |
|
"learning_rate": 1.999849935130951e-05, |
|
"loss": 0.9198, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.035465953507508745, |
|
"grad_norm": 2.470517097187284, |
|
"learning_rate": 1.999845282352037e-05, |
|
"loss": 0.9433, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.035548241102653776, |
|
"grad_norm": 2.7560008424762183, |
|
"learning_rate": 1.9998405585457134e-05, |
|
"loss": 0.9428, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.035630528697798806, |
|
"grad_norm": 2.7637029961336226, |
|
"learning_rate": 1.9998357637123157e-05, |
|
"loss": 0.942, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.03571281629294384, |
|
"grad_norm": 2.9100289752309045, |
|
"learning_rate": 1.9998308978521842e-05, |
|
"loss": 0.9457, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.03579510388808887, |
|
"grad_norm": 4.313071561196342, |
|
"learning_rate": 1.9998259609656645e-05, |
|
"loss": 0.9367, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.035877391483233906, |
|
"grad_norm": 2.9430306639688384, |
|
"learning_rate": 1.999820953053108e-05, |
|
"loss": 0.9292, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.03595967907837894, |
|
"grad_norm": 3.336500502830984, |
|
"learning_rate": 1.9998158741148695e-05, |
|
"loss": 0.9517, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.03604196667352397, |
|
"grad_norm": 2.830315148432978, |
|
"learning_rate": 1.99981072415131e-05, |
|
"loss": 0.9619, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.036124254268669, |
|
"grad_norm": 2.9628110908182506, |
|
"learning_rate": 1.9998055031627964e-05, |
|
"loss": 0.9342, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.03620654186381403, |
|
"grad_norm": 5.046468138436623, |
|
"learning_rate": 1.9998002111496986e-05, |
|
"loss": 0.9577, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.03628882945895906, |
|
"grad_norm": 3.1781915402537324, |
|
"learning_rate": 1.9997948481123925e-05, |
|
"loss": 0.9275, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.03637111705410409, |
|
"grad_norm": 3.291481831836819, |
|
"learning_rate": 1.9997894140512595e-05, |
|
"loss": 0.9504, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.03645340464924913, |
|
"grad_norm": 3.1084220240196254, |
|
"learning_rate": 1.9997839089666854e-05, |
|
"loss": 0.9236, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.03653569224439416, |
|
"grad_norm": 3.1887037749162093, |
|
"learning_rate": 1.9997783328590613e-05, |
|
"loss": 0.8855, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.03661797983953919, |
|
"grad_norm": 3.305256714504642, |
|
"learning_rate": 1.9997726857287834e-05, |
|
"loss": 0.9552, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.03670026743468422, |
|
"grad_norm": 4.754531864085289, |
|
"learning_rate": 1.9997669675762528e-05, |
|
"loss": 0.9504, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.03678255502982925, |
|
"grad_norm": 2.474649426046985, |
|
"learning_rate": 1.9997611784018754e-05, |
|
"loss": 0.9518, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.03686484262497428, |
|
"grad_norm": 2.880288649426941, |
|
"learning_rate": 1.9997553182060633e-05, |
|
"loss": 0.8702, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.03694713022011932, |
|
"grad_norm": 2.9619541365703976, |
|
"learning_rate": 1.999749386989232e-05, |
|
"loss": 0.948, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.03702941781526435, |
|
"grad_norm": 3.0040457692945552, |
|
"learning_rate": 1.999743384751803e-05, |
|
"loss": 0.9161, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.03711170541040938, |
|
"grad_norm": 0.6917840645754628, |
|
"learning_rate": 1.999737311494203e-05, |
|
"loss": 0.5999, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.03719399300555441, |
|
"grad_norm": 2.500969399378362, |
|
"learning_rate": 1.9997311672168632e-05, |
|
"loss": 0.9321, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.037276280600699443, |
|
"grad_norm": 3.4756867592830076, |
|
"learning_rate": 1.99972495192022e-05, |
|
"loss": 0.9468, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.037358568195844474, |
|
"grad_norm": 2.4507954914499974, |
|
"learning_rate": 1.9997186656047154e-05, |
|
"loss": 0.9367, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.037440855790989505, |
|
"grad_norm": 2.3319357748120066, |
|
"learning_rate": 1.9997123082707954e-05, |
|
"loss": 0.9506, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.03752314338613454, |
|
"grad_norm": 2.4614553831803896, |
|
"learning_rate": 1.999705879918912e-05, |
|
"loss": 0.9812, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.037605430981279574, |
|
"grad_norm": 2.7421103733102665, |
|
"learning_rate": 1.999699380549521e-05, |
|
"loss": 0.975, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.037687718576424604, |
|
"grad_norm": 3.193134683800622, |
|
"learning_rate": 1.9996928101630853e-05, |
|
"loss": 0.9462, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.037770006171569635, |
|
"grad_norm": 2.4788434065823353, |
|
"learning_rate": 1.999686168760071e-05, |
|
"loss": 0.9442, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.037852293766714666, |
|
"grad_norm": 2.67715161966991, |
|
"learning_rate": 1.99967945634095e-05, |
|
"loss": 0.9497, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.0379345813618597, |
|
"grad_norm": 2.8286753306256234, |
|
"learning_rate": 1.9996726729061995e-05, |
|
"loss": 0.9371, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.038016868957004735, |
|
"grad_norm": 2.494636914608068, |
|
"learning_rate": 1.999665818456301e-05, |
|
"loss": 0.9369, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.038099156552149765, |
|
"grad_norm": 3.3684641604813312, |
|
"learning_rate": 1.9996588929917413e-05, |
|
"loss": 0.9167, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.038181444147294796, |
|
"grad_norm": 2.8300347810651836, |
|
"learning_rate": 1.9996518965130126e-05, |
|
"loss": 0.96, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.03826373174243983, |
|
"grad_norm": 2.7216914732590634, |
|
"learning_rate": 1.9996448290206117e-05, |
|
"loss": 0.9587, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.03834601933758486, |
|
"grad_norm": 2.8897584926398223, |
|
"learning_rate": 1.999637690515041e-05, |
|
"loss": 0.9424, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.03842830693272989, |
|
"grad_norm": 2.6782745713753364, |
|
"learning_rate": 1.9996304809968074e-05, |
|
"loss": 0.9421, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.03851059452787492, |
|
"grad_norm": 0.8391702922649521, |
|
"learning_rate": 1.9996232004664232e-05, |
|
"loss": 0.6291, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.03859288212301996, |
|
"grad_norm": 2.9110538284406213, |
|
"learning_rate": 1.9996158489244054e-05, |
|
"loss": 0.9548, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.03867516971816499, |
|
"grad_norm": 2.9735024191976813, |
|
"learning_rate": 1.9996084263712764e-05, |
|
"loss": 0.9397, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.03875745731331002, |
|
"grad_norm": 2.459802449779267, |
|
"learning_rate": 1.9996009328075635e-05, |
|
"loss": 0.9516, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.03883974490845505, |
|
"grad_norm": 1.4795476906818943, |
|
"learning_rate": 1.999593368233799e-05, |
|
"loss": 0.6175, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.03892203250360008, |
|
"grad_norm": 2.7329559825050844, |
|
"learning_rate": 1.9995857326505202e-05, |
|
"loss": 0.9279, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.03900432009874511, |
|
"grad_norm": 2.7310837617231307, |
|
"learning_rate": 1.999578026058269e-05, |
|
"loss": 0.9325, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.03908660769389015, |
|
"grad_norm": 3.580150174543716, |
|
"learning_rate": 1.999570248457594e-05, |
|
"loss": 0.9403, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.03916889528903518, |
|
"grad_norm": 3.518367412394758, |
|
"learning_rate": 1.9995623998490473e-05, |
|
"loss": 0.9346, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.03925118288418021, |
|
"grad_norm": 2.1655004063703167, |
|
"learning_rate": 1.999554480233186e-05, |
|
"loss": 0.9294, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.03933347047932524, |
|
"grad_norm": 2.857429287491222, |
|
"learning_rate": 1.9995464896105727e-05, |
|
"loss": 0.9201, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.03941575807447027, |
|
"grad_norm": 2.3230944603500094, |
|
"learning_rate": 1.999538427981776e-05, |
|
"loss": 0.9172, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.0394980456696153, |
|
"grad_norm": 2.686091492583088, |
|
"learning_rate": 1.9995302953473673e-05, |
|
"loss": 0.7009, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.039580333264760334, |
|
"grad_norm": 2.5370139223659445, |
|
"learning_rate": 1.999522091707925e-05, |
|
"loss": 0.9547, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.03966262085990537, |
|
"grad_norm": 2.9114624346952787, |
|
"learning_rate": 1.9995138170640322e-05, |
|
"loss": 0.9309, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.0397449084550504, |
|
"grad_norm": 2.636772148383987, |
|
"learning_rate": 1.9995054714162757e-05, |
|
"loss": 0.9224, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.03982719605019543, |
|
"grad_norm": 2.3887969483327005, |
|
"learning_rate": 1.9994970547652495e-05, |
|
"loss": 0.9509, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.039909483645340464, |
|
"grad_norm": 2.9497130431080256, |
|
"learning_rate": 1.9994885671115506e-05, |
|
"loss": 0.9693, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.039991771240485495, |
|
"grad_norm": 2.225873777913106, |
|
"learning_rate": 1.9994800084557826e-05, |
|
"loss": 0.9382, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.040074058835630526, |
|
"grad_norm": 3.015548118510522, |
|
"learning_rate": 1.9994713787985534e-05, |
|
"loss": 0.9084, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.040156346430775564, |
|
"grad_norm": 3.2147762822609787, |
|
"learning_rate": 1.9994626781404754e-05, |
|
"loss": 0.9432, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.040238634025920594, |
|
"grad_norm": 2.732749831828487, |
|
"learning_rate": 1.9994539064821676e-05, |
|
"loss": 0.9493, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.040320921621065625, |
|
"grad_norm": 2.718095114325169, |
|
"learning_rate": 1.9994450638242524e-05, |
|
"loss": 0.6999, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.040403209216210656, |
|
"grad_norm": 1.192110613853859, |
|
"learning_rate": 1.9994361501673586e-05, |
|
"loss": 0.606, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.04048549681135569, |
|
"grad_norm": 2.6545275290481523, |
|
"learning_rate": 1.9994271655121187e-05, |
|
"loss": 0.9562, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.04056778440650072, |
|
"grad_norm": 2.6306786770452217, |
|
"learning_rate": 1.999418109859171e-05, |
|
"loss": 0.932, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.040650072001645755, |
|
"grad_norm": 0.7723300623794189, |
|
"learning_rate": 1.99940898320916e-05, |
|
"loss": 0.6167, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.040732359596790786, |
|
"grad_norm": 3.4539680548732075, |
|
"learning_rate": 1.9993997855627323e-05, |
|
"loss": 0.9547, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.04081464719193582, |
|
"grad_norm": 8.174151834055909, |
|
"learning_rate": 1.9993905169205425e-05, |
|
"loss": 0.9532, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.04089693478708085, |
|
"grad_norm": 2.4333462034983517, |
|
"learning_rate": 1.9993811772832487e-05, |
|
"loss": 0.9201, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.04097922238222588, |
|
"grad_norm": 2.621241890180304, |
|
"learning_rate": 1.9993717666515143e-05, |
|
"loss": 0.9336, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.04106150997737091, |
|
"grad_norm": 2.8830815398438308, |
|
"learning_rate": 1.999362285026008e-05, |
|
"loss": 0.9254, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.04114379757251594, |
|
"grad_norm": 3.0315366250694136, |
|
"learning_rate": 1.9993527324074028e-05, |
|
"loss": 0.9272, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04122608516766098, |
|
"grad_norm": 2.657554413096405, |
|
"learning_rate": 1.999343108796378e-05, |
|
"loss": 0.9462, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.04130837276280601, |
|
"grad_norm": 2.905472644448609, |
|
"learning_rate": 1.999333414193617e-05, |
|
"loss": 0.9034, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.04139066035795104, |
|
"grad_norm": 3.925086807406567, |
|
"learning_rate": 1.9993236485998085e-05, |
|
"loss": 0.9315, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.04147294795309607, |
|
"grad_norm": 3.0313048521155146, |
|
"learning_rate": 1.999313812015646e-05, |
|
"loss": 0.9535, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.0415552355482411, |
|
"grad_norm": 2.962993951360446, |
|
"learning_rate": 1.9993039044418286e-05, |
|
"loss": 0.9309, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.04163752314338613, |
|
"grad_norm": 0.6779011051688715, |
|
"learning_rate": 1.99929392587906e-05, |
|
"loss": 0.5869, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.04171981073853117, |
|
"grad_norm": 2.579639640184937, |
|
"learning_rate": 1.9992838763280488e-05, |
|
"loss": 0.9118, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.0418020983336762, |
|
"grad_norm": 2.1450772300859655, |
|
"learning_rate": 1.9992737557895093e-05, |
|
"loss": 0.932, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.04188438592882123, |
|
"grad_norm": 2.4058977622816977, |
|
"learning_rate": 1.9992635642641605e-05, |
|
"loss": 0.9301, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.04196667352396626, |
|
"grad_norm": 2.4723871593300584, |
|
"learning_rate": 1.999253301752726e-05, |
|
"loss": 0.9362, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.04204896111911129, |
|
"grad_norm": 2.7787980954607616, |
|
"learning_rate": 1.999242968255935e-05, |
|
"loss": 0.949, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.042131248714256324, |
|
"grad_norm": 2.7091957078534783, |
|
"learning_rate": 1.9992325637745214e-05, |
|
"loss": 0.8939, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.042213536309401355, |
|
"grad_norm": 3.104398485557938, |
|
"learning_rate": 1.9992220883092247e-05, |
|
"loss": 0.9201, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.04229582390454639, |
|
"grad_norm": 2.688893801232366, |
|
"learning_rate": 1.9992115418607886e-05, |
|
"loss": 0.9314, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.04237811149969142, |
|
"grad_norm": 0.6175757936794599, |
|
"learning_rate": 1.999200924429963e-05, |
|
"loss": 0.5823, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.042460399094836454, |
|
"grad_norm": 2.134638530502557, |
|
"learning_rate": 1.9991902360175017e-05, |
|
"loss": 0.8988, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.042542686689981485, |
|
"grad_norm": 2.660777130272323, |
|
"learning_rate": 1.9991794766241638e-05, |
|
"loss": 0.9058, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.042624974285126516, |
|
"grad_norm": 2.519959303045957, |
|
"learning_rate": 1.9991686462507137e-05, |
|
"loss": 0.9157, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.042707261880271546, |
|
"grad_norm": 0.5033254525320345, |
|
"learning_rate": 1.9991577448979213e-05, |
|
"loss": 0.5637, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.042789549475416584, |
|
"grad_norm": 2.3638963921206777, |
|
"learning_rate": 1.9991467725665604e-05, |
|
"loss": 0.9532, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.042871837070561615, |
|
"grad_norm": 2.760667379358993, |
|
"learning_rate": 1.9991357292574106e-05, |
|
"loss": 0.9194, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.042954124665706646, |
|
"grad_norm": 2.285449190484726, |
|
"learning_rate": 1.9991246149712564e-05, |
|
"loss": 0.854, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.04303641226085168, |
|
"grad_norm": 2.9222709070685315, |
|
"learning_rate": 1.9991134297088877e-05, |
|
"loss": 0.9534, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.04311869985599671, |
|
"grad_norm": 3.1630611007009355, |
|
"learning_rate": 1.9991021734710988e-05, |
|
"loss": 0.9505, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.04320098745114174, |
|
"grad_norm": 3.174869013367673, |
|
"learning_rate": 1.999090846258689e-05, |
|
"loss": 0.964, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.04328327504628677, |
|
"grad_norm": 2.4328576962151693, |
|
"learning_rate": 1.9990794480724634e-05, |
|
"loss": 0.9084, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.04336556264143181, |
|
"grad_norm": 0.5700103881605539, |
|
"learning_rate": 1.9990679789132317e-05, |
|
"loss": 0.5734, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.04344785023657684, |
|
"grad_norm": 2.392627489613796, |
|
"learning_rate": 1.9990564387818087e-05, |
|
"loss": 0.916, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.04353013783172187, |
|
"grad_norm": 3.2074775648239453, |
|
"learning_rate": 1.999044827679014e-05, |
|
"loss": 0.9095, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.0436124254268669, |
|
"grad_norm": 3.140601191667111, |
|
"learning_rate": 1.999033145605672e-05, |
|
"loss": 0.904, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.04369471302201193, |
|
"grad_norm": 2.3743918081273505, |
|
"learning_rate": 1.9990213925626135e-05, |
|
"loss": 0.9173, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.04377700061715696, |
|
"grad_norm": 2.803625633325397, |
|
"learning_rate": 1.999009568550673e-05, |
|
"loss": 0.9425, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.043859288212302, |
|
"grad_norm": 2.624304052527756, |
|
"learning_rate": 1.9989976735706903e-05, |
|
"loss": 0.8778, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.04394157580744703, |
|
"grad_norm": 3.611007788459353, |
|
"learning_rate": 1.9989857076235105e-05, |
|
"loss": 0.9454, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.04402386340259206, |
|
"grad_norm": 3.0477796789876885, |
|
"learning_rate": 1.9989736707099836e-05, |
|
"loss": 0.9301, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.04410615099773709, |
|
"grad_norm": 3.661229035903915, |
|
"learning_rate": 1.998961562830965e-05, |
|
"loss": 0.9234, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.04418843859288212, |
|
"grad_norm": 3.014314493078093, |
|
"learning_rate": 1.9989493839873144e-05, |
|
"loss": 0.9205, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.04427072618802715, |
|
"grad_norm": 3.1607667446866348, |
|
"learning_rate": 1.998937134179897e-05, |
|
"loss": 0.9184, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.044353013783172184, |
|
"grad_norm": 0.5679302245778807, |
|
"learning_rate": 1.9989248134095835e-05, |
|
"loss": 0.5808, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.04443530137831722, |
|
"grad_norm": 3.4927267069905827, |
|
"learning_rate": 1.9989124216772486e-05, |
|
"loss": 0.9068, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.04451758897346225, |
|
"grad_norm": 3.2792902354283524, |
|
"learning_rate": 1.9988999589837727e-05, |
|
"loss": 0.9441, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.04459987656860728, |
|
"grad_norm": 3.2813608886269465, |
|
"learning_rate": 1.9988874253300415e-05, |
|
"loss": 0.9135, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.044682164163752314, |
|
"grad_norm": 3.6532563430030387, |
|
"learning_rate": 1.9988748207169448e-05, |
|
"loss": 0.9124, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.044764451758897345, |
|
"grad_norm": 3.0411510483789708, |
|
"learning_rate": 1.9988621451453783e-05, |
|
"loss": 0.9437, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.044846739354042375, |
|
"grad_norm": 2.947067350806481, |
|
"learning_rate": 1.9988493986162426e-05, |
|
"loss": 0.9377, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.04492902694918741, |
|
"grad_norm": 3.733984375480931, |
|
"learning_rate": 1.9988365811304434e-05, |
|
"loss": 0.9302, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.045011314544332444, |
|
"grad_norm": 0.5973399530190582, |
|
"learning_rate": 1.99882369268889e-05, |
|
"loss": 0.5985, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.045093602139477475, |
|
"grad_norm": 3.1946558451893483, |
|
"learning_rate": 1.9988107332924997e-05, |
|
"loss": 0.9306, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.045175889734622506, |
|
"grad_norm": 3.0518182224655184, |
|
"learning_rate": 1.998797702942192e-05, |
|
"loss": 0.9238, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.045258177329767536, |
|
"grad_norm": 0.5186994011171457, |
|
"learning_rate": 1.9987846016388927e-05, |
|
"loss": 0.5534, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.04534046492491257, |
|
"grad_norm": 2.9538180602678072, |
|
"learning_rate": 1.9987714293835326e-05, |
|
"loss": 0.9131, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.0454227525200576, |
|
"grad_norm": 3.583039419798021, |
|
"learning_rate": 1.9987581861770476e-05, |
|
"loss": 0.931, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.045505040115202636, |
|
"grad_norm": 3.872167117824797, |
|
"learning_rate": 1.9987448720203783e-05, |
|
"loss": 0.9149, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.045587327710347667, |
|
"grad_norm": 0.5153323660807152, |
|
"learning_rate": 1.9987314869144704e-05, |
|
"loss": 0.5707, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.0456696153054927, |
|
"grad_norm": 3.2458016621373162, |
|
"learning_rate": 1.9987180308602752e-05, |
|
"loss": 0.9481, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.04575190290063773, |
|
"grad_norm": 0.5131089745749331, |
|
"learning_rate": 1.998704503858748e-05, |
|
"loss": 0.6107, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.04583419049578276, |
|
"grad_norm": 3.826718669936501, |
|
"learning_rate": 1.99869090591085e-05, |
|
"loss": 0.9334, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.04591647809092779, |
|
"grad_norm": 2.808877894852513, |
|
"learning_rate": 1.9986772370175475e-05, |
|
"loss": 0.9313, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.04599876568607283, |
|
"grad_norm": 3.429756806838896, |
|
"learning_rate": 1.998663497179811e-05, |
|
"loss": 0.9041, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.04608105328121786, |
|
"grad_norm": 3.927553685701978, |
|
"learning_rate": 1.998649686398617e-05, |
|
"loss": 0.9229, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.04616334087636289, |
|
"grad_norm": 4.358404357254217, |
|
"learning_rate": 1.9986358046749463e-05, |
|
"loss": 0.9453, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.04624562847150792, |
|
"grad_norm": 0.6974205247527027, |
|
"learning_rate": 1.998621852009785e-05, |
|
"loss": 0.582, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.04632791606665295, |
|
"grad_norm": 2.8790199811794213, |
|
"learning_rate": 1.9986078284041245e-05, |
|
"loss": 0.9073, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.04641020366179798, |
|
"grad_norm": 3.1507198941552343, |
|
"learning_rate": 1.998593733858961e-05, |
|
"loss": 0.9285, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.04649249125694301, |
|
"grad_norm": 3.3010925203438757, |
|
"learning_rate": 1.9985795683752955e-05, |
|
"loss": 0.8975, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.04657477885208805, |
|
"grad_norm": 2.4173724120050277, |
|
"learning_rate": 1.9985653319541345e-05, |
|
"loss": 0.9211, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.04665706644723308, |
|
"grad_norm": 3.219239778661617, |
|
"learning_rate": 1.9985510245964894e-05, |
|
"loss": 0.9414, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.04673935404237811, |
|
"grad_norm": 4.702680418398121, |
|
"learning_rate": 1.9985366463033763e-05, |
|
"loss": 0.8886, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.04682164163752314, |
|
"grad_norm": 2.946137626961066, |
|
"learning_rate": 1.9985221970758166e-05, |
|
"loss": 0.907, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.04690392923266817, |
|
"grad_norm": 3.1637086789258224, |
|
"learning_rate": 1.9985076769148373e-05, |
|
"loss": 0.9063, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.046986216827813204, |
|
"grad_norm": 2.7457117180469286, |
|
"learning_rate": 1.9984930858214695e-05, |
|
"loss": 0.9163, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.04706850442295824, |
|
"grad_norm": 2.8795617581547597, |
|
"learning_rate": 1.9984784237967495e-05, |
|
"loss": 0.9272, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.04715079201810327, |
|
"grad_norm": 3.539552457926088, |
|
"learning_rate": 1.998463690841719e-05, |
|
"loss": 0.9254, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.047233079613248304, |
|
"grad_norm": 2.590893854876316, |
|
"learning_rate": 1.998448886957425e-05, |
|
"loss": 0.9135, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.047315367208393334, |
|
"grad_norm": 3.385121747004568, |
|
"learning_rate": 1.9984340121449187e-05, |
|
"loss": 0.898, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.047397654803538365, |
|
"grad_norm": 2.8668381053066248, |
|
"learning_rate": 1.998419066405257e-05, |
|
"loss": 0.9111, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.047479942398683396, |
|
"grad_norm": 0.5561294337589316, |
|
"learning_rate": 1.9984040497395016e-05, |
|
"loss": 0.6026, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.047562229993828434, |
|
"grad_norm": 2.7790207529975683, |
|
"learning_rate": 1.9983889621487193e-05, |
|
"loss": 0.8813, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.047644517588973465, |
|
"grad_norm": 2.929493346002011, |
|
"learning_rate": 1.9983738036339818e-05, |
|
"loss": 0.934, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.047726805184118495, |
|
"grad_norm": 2.6432622003873294, |
|
"learning_rate": 1.9983585741963655e-05, |
|
"loss": 0.935, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.047809092779263526, |
|
"grad_norm": 2.343596103466015, |
|
"learning_rate": 1.998343273836953e-05, |
|
"loss": 0.8885, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.04789138037440856, |
|
"grad_norm": 2.6377392327317355, |
|
"learning_rate": 1.998327902556831e-05, |
|
"loss": 0.9195, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.04797366796955359, |
|
"grad_norm": 0.5734849677326599, |
|
"learning_rate": 1.9983124603570915e-05, |
|
"loss": 0.5804, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.04805595556469862, |
|
"grad_norm": 2.359098397716237, |
|
"learning_rate": 1.9982969472388313e-05, |
|
"loss": 0.9154, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.048138243159843656, |
|
"grad_norm": 3.07285660000184, |
|
"learning_rate": 1.9982813632031526e-05, |
|
"loss": 0.9293, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.04822053075498869, |
|
"grad_norm": 3.145177565014435, |
|
"learning_rate": 1.9982657082511624e-05, |
|
"loss": 0.909, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.04830281835013372, |
|
"grad_norm": 2.4460324686547, |
|
"learning_rate": 1.9982499823839726e-05, |
|
"loss": 0.9172, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.04838510594527875, |
|
"grad_norm": 2.7860695223687335, |
|
"learning_rate": 1.9982341856027006e-05, |
|
"loss": 0.8962, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.04846739354042378, |
|
"grad_norm": 2.5003193611135126, |
|
"learning_rate": 1.9982183179084683e-05, |
|
"loss": 0.9523, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.04854968113556881, |
|
"grad_norm": 0.5728078039718163, |
|
"learning_rate": 1.998202379302403e-05, |
|
"loss": 0.5939, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.04863196873071385, |
|
"grad_norm": 2.513890686672487, |
|
"learning_rate": 1.9981863697856376e-05, |
|
"loss": 0.9027, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.04871425632585888, |
|
"grad_norm": 6.401109317568734, |
|
"learning_rate": 1.9981702893593086e-05, |
|
"loss": 0.9041, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.04879654392100391, |
|
"grad_norm": 0.526955304818451, |
|
"learning_rate": 1.9981541380245586e-05, |
|
"loss": 0.6109, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.04887883151614894, |
|
"grad_norm": 0.5280472746795982, |
|
"learning_rate": 1.9981379157825346e-05, |
|
"loss": 0.5801, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.04896111911129397, |
|
"grad_norm": 2.831289529507686, |
|
"learning_rate": 1.99812162263439e-05, |
|
"loss": 0.9296, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.049043406706439, |
|
"grad_norm": 2.5183731275746637, |
|
"learning_rate": 1.998105258581281e-05, |
|
"loss": 0.9373, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.04912569430158403, |
|
"grad_norm": 2.290556291606923, |
|
"learning_rate": 1.998088823624371e-05, |
|
"loss": 0.9339, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.04920798189672907, |
|
"grad_norm": 2.9827790643550065, |
|
"learning_rate": 1.998072317764827e-05, |
|
"loss": 0.9341, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.0492902694918741, |
|
"grad_norm": 3.9980040686222535, |
|
"learning_rate": 1.998055741003822e-05, |
|
"loss": 0.9428, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.04937255708701913, |
|
"grad_norm": 2.9421068715344125, |
|
"learning_rate": 1.998039093342533e-05, |
|
"loss": 0.9183, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.04945484468216416, |
|
"grad_norm": 2.3512621164999654, |
|
"learning_rate": 1.998022374782143e-05, |
|
"loss": 0.9139, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.049537132277309194, |
|
"grad_norm": 2.8922341692853863, |
|
"learning_rate": 1.9980055853238394e-05, |
|
"loss": 0.8847, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.049619419872454225, |
|
"grad_norm": 2.5544870335833916, |
|
"learning_rate": 1.9979887249688158e-05, |
|
"loss": 0.9322, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.04970170746759926, |
|
"grad_norm": 2.3713588179833427, |
|
"learning_rate": 1.9979717937182685e-05, |
|
"loss": 0.8953, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.04978399506274429, |
|
"grad_norm": 2.567195793905517, |
|
"learning_rate": 1.9979547915734014e-05, |
|
"loss": 0.9287, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.049866282657889324, |
|
"grad_norm": 2.116439796262553, |
|
"learning_rate": 1.997937718535422e-05, |
|
"loss": 0.9122, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.049948570253034355, |
|
"grad_norm": 2.6728583449200967, |
|
"learning_rate": 1.9979205746055426e-05, |
|
"loss": 0.9409, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.050030857848179386, |
|
"grad_norm": 2.9303321533796147, |
|
"learning_rate": 1.9979033597849817e-05, |
|
"loss": 0.877, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.05011314544332442, |
|
"grad_norm": 2.6453736009345103, |
|
"learning_rate": 1.9978860740749618e-05, |
|
"loss": 0.9264, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.05019543303846945, |
|
"grad_norm": 0.6463475109604742, |
|
"learning_rate": 1.9978687174767115e-05, |
|
"loss": 0.6037, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.050277720633614485, |
|
"grad_norm": 2.1568723876857514, |
|
"learning_rate": 1.9978512899914632e-05, |
|
"loss": 0.9291, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.050360008228759516, |
|
"grad_norm": 2.779974581309181, |
|
"learning_rate": 1.997833791620455e-05, |
|
"loss": 0.9487, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.05044229582390455, |
|
"grad_norm": 2.6541794961423726, |
|
"learning_rate": 1.9978162223649303e-05, |
|
"loss": 0.9314, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.05052458341904958, |
|
"grad_norm": 2.204822617972563, |
|
"learning_rate": 1.9977985822261367e-05, |
|
"loss": 0.9195, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.05060687101419461, |
|
"grad_norm": 2.528877153941993, |
|
"learning_rate": 1.9977808712053276e-05, |
|
"loss": 0.925, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.05068915860933964, |
|
"grad_norm": 2.89407673046398, |
|
"learning_rate": 1.9977630893037613e-05, |
|
"loss": 0.9164, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.05077144620448468, |
|
"grad_norm": 2.8147196835709924, |
|
"learning_rate": 1.9977452365227005e-05, |
|
"loss": 0.9109, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.05085373379962971, |
|
"grad_norm": 2.8624190313017697, |
|
"learning_rate": 1.997727312863414e-05, |
|
"loss": 0.9227, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.05093602139477474, |
|
"grad_norm": 2.6853591545801243, |
|
"learning_rate": 1.9977093183271746e-05, |
|
"loss": 0.9043, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.05101830898991977, |
|
"grad_norm": 2.847809177384018, |
|
"learning_rate": 1.997691252915261e-05, |
|
"loss": 0.8797, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.0511005965850648, |
|
"grad_norm": 2.5413962256979477, |
|
"learning_rate": 1.9976731166289565e-05, |
|
"loss": 0.888, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.05118288418020983, |
|
"grad_norm": 2.4434297876428768, |
|
"learning_rate": 1.997654909469549e-05, |
|
"loss": 0.9193, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.05126517177535486, |
|
"grad_norm": 2.554334961124947, |
|
"learning_rate": 1.9976366314383323e-05, |
|
"loss": 0.945, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.0513474593704999, |
|
"grad_norm": 3.0606359366025155, |
|
"learning_rate": 1.9976182825366052e-05, |
|
"loss": 0.9018, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.05142974696564493, |
|
"grad_norm": 2.7602463387503877, |
|
"learning_rate": 1.9975998627656704e-05, |
|
"loss": 0.9572, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.05151203456078996, |
|
"grad_norm": 2.645779738054759, |
|
"learning_rate": 1.997581372126837e-05, |
|
"loss": 0.8986, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.05159432215593499, |
|
"grad_norm": 2.3004786981907808, |
|
"learning_rate": 1.997562810621418e-05, |
|
"loss": 0.9378, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.05167660975108002, |
|
"grad_norm": 3.0529134410232954, |
|
"learning_rate": 1.9975441782507327e-05, |
|
"loss": 0.9374, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.051758897346225054, |
|
"grad_norm": 6.366982443959264, |
|
"learning_rate": 1.997525475016104e-05, |
|
"loss": 0.9572, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.05184118494137009, |
|
"grad_norm": 7.143057307651942, |
|
"learning_rate": 1.9975067009188608e-05, |
|
"loss": 0.9368, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.05192347253651512, |
|
"grad_norm": 2.486114121904295, |
|
"learning_rate": 1.997487855960337e-05, |
|
"loss": 0.8618, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.05200576013166015, |
|
"grad_norm": 2.909503733964849, |
|
"learning_rate": 1.9974689401418712e-05, |
|
"loss": 0.8998, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.052088047726805184, |
|
"grad_norm": 2.506345699862428, |
|
"learning_rate": 1.9974499534648068e-05, |
|
"loss": 0.9119, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.052170335321950215, |
|
"grad_norm": 0.5966023669088316, |
|
"learning_rate": 1.9974308959304933e-05, |
|
"loss": 0.5656, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.052252622917095246, |
|
"grad_norm": 2.9205909740125784, |
|
"learning_rate": 1.997411767540284e-05, |
|
"loss": 0.9109, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.052334910512240276, |
|
"grad_norm": 2.2641759973862534, |
|
"learning_rate": 1.9973925682955378e-05, |
|
"loss": 0.9023, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.052417198107385314, |
|
"grad_norm": 2.4641130571954086, |
|
"learning_rate": 1.9973732981976188e-05, |
|
"loss": 0.909, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.052499485702530345, |
|
"grad_norm": 2.2247912270982195, |
|
"learning_rate": 1.9973539572478955e-05, |
|
"loss": 0.9111, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.052581773297675376, |
|
"grad_norm": 2.182850954981328, |
|
"learning_rate": 1.9973345454477422e-05, |
|
"loss": 0.885, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.05266406089282041, |
|
"grad_norm": 0.5616279149900174, |
|
"learning_rate": 1.997315062798538e-05, |
|
"loss": 0.5634, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.05274634848796544, |
|
"grad_norm": 2.1709200144119287, |
|
"learning_rate": 1.9972955093016662e-05, |
|
"loss": 0.9021, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.05282863608311047, |
|
"grad_norm": 3.0243470611887853, |
|
"learning_rate": 1.9972758849585167e-05, |
|
"loss": 0.923, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.052910923678255506, |
|
"grad_norm": 0.5181983481216014, |
|
"learning_rate": 1.9972561897704832e-05, |
|
"loss": 0.589, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.05299321127340054, |
|
"grad_norm": 2.3618384003718904, |
|
"learning_rate": 1.997236423738965e-05, |
|
"loss": 0.8893, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.05307549886854557, |
|
"grad_norm": 2.83302899205139, |
|
"learning_rate": 1.997216586865366e-05, |
|
"loss": 0.9056, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.0531577864636906, |
|
"grad_norm": 2.1524435897397756, |
|
"learning_rate": 1.9971966791510952e-05, |
|
"loss": 0.8875, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.05324007405883563, |
|
"grad_norm": 0.5403616002875096, |
|
"learning_rate": 1.9971767005975676e-05, |
|
"loss": 0.5864, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.05332236165398066, |
|
"grad_norm": 3.032727501630103, |
|
"learning_rate": 1.9971566512062016e-05, |
|
"loss": 0.9269, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.0534046492491257, |
|
"grad_norm": 2.677613120586094, |
|
"learning_rate": 1.9971365309784222e-05, |
|
"loss": 0.9319, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.05348693684427073, |
|
"grad_norm": 2.7527601762070626, |
|
"learning_rate": 1.9971163399156577e-05, |
|
"loss": 0.911, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.05356922443941576, |
|
"grad_norm": 2.456807133771137, |
|
"learning_rate": 1.9970960780193435e-05, |
|
"loss": 0.9274, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.05365151203456079, |
|
"grad_norm": 0.5512339745238304, |
|
"learning_rate": 1.9970757452909185e-05, |
|
"loss": 0.5999, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.05373379962970582, |
|
"grad_norm": 3.3078302086877454, |
|
"learning_rate": 1.997055341731827e-05, |
|
"loss": 0.9161, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.05381608722485085, |
|
"grad_norm": 1.9567891820560834, |
|
"learning_rate": 1.9970348673435187e-05, |
|
"loss": 0.8954, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.05389837481999588, |
|
"grad_norm": 2.4558167849951027, |
|
"learning_rate": 1.9970143221274477e-05, |
|
"loss": 0.9041, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.05398066241514092, |
|
"grad_norm": 2.6700615275845214, |
|
"learning_rate": 1.996993706085074e-05, |
|
"loss": 0.9406, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.05406295001028595, |
|
"grad_norm": 2.47054592661293, |
|
"learning_rate": 1.9969730192178618e-05, |
|
"loss": 0.9075, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.05414523760543098, |
|
"grad_norm": 2.527986443897195, |
|
"learning_rate": 1.9969522615272806e-05, |
|
"loss": 0.9012, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.05422752520057601, |
|
"grad_norm": 0.5565334590513972, |
|
"learning_rate": 1.9969314330148056e-05, |
|
"loss": 0.5587, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.054309812795721044, |
|
"grad_norm": 1.8601076711624556, |
|
"learning_rate": 1.9969105336819154e-05, |
|
"loss": 0.8991, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.054392100390866074, |
|
"grad_norm": 2.0210809868042356, |
|
"learning_rate": 1.9968895635300956e-05, |
|
"loss": 0.9302, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.05447438798601111, |
|
"grad_norm": 2.1871429796039363, |
|
"learning_rate": 1.9968685225608353e-05, |
|
"loss": 0.8719, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.05455667558115614, |
|
"grad_norm": 2.699275991596056, |
|
"learning_rate": 1.9968474107756295e-05, |
|
"loss": 0.9107, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.054638963176301174, |
|
"grad_norm": 2.921814293546767, |
|
"learning_rate": 1.996826228175978e-05, |
|
"loss": 0.9124, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.054721250771446205, |
|
"grad_norm": 2.9121454433336917, |
|
"learning_rate": 1.9968049747633848e-05, |
|
"loss": 0.8872, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.054803538366591235, |
|
"grad_norm": 4.665109966003875, |
|
"learning_rate": 1.996783650539361e-05, |
|
"loss": 0.9337, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.054885825961736266, |
|
"grad_norm": 2.2334882062761814, |
|
"learning_rate": 1.9967622555054204e-05, |
|
"loss": 0.9249, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.0549681135568813, |
|
"grad_norm": 1.8093225226331142, |
|
"learning_rate": 1.9967407896630837e-05, |
|
"loss": 0.8666, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.055050401152026335, |
|
"grad_norm": 0.5652676807003993, |
|
"learning_rate": 1.996719253013875e-05, |
|
"loss": 0.5961, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.055132688747171366, |
|
"grad_norm": 0.5100457321950321, |
|
"learning_rate": 1.9966976455593247e-05, |
|
"loss": 0.5618, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.055214976342316396, |
|
"grad_norm": 2.773850609378529, |
|
"learning_rate": 1.9966759673009677e-05, |
|
"loss": 0.9275, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.05529726393746143, |
|
"grad_norm": 2.5443256480658296, |
|
"learning_rate": 1.9966542182403437e-05, |
|
"loss": 0.9077, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.05537955153260646, |
|
"grad_norm": 3.282011580384134, |
|
"learning_rate": 1.9966323983789983e-05, |
|
"loss": 0.921, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.05546183912775149, |
|
"grad_norm": 2.2203588190464885, |
|
"learning_rate": 1.996610507718481e-05, |
|
"loss": 0.8988, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.05554412672289653, |
|
"grad_norm": 4.790143157081725, |
|
"learning_rate": 1.996588546260347e-05, |
|
"loss": 0.9526, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.05562641431804156, |
|
"grad_norm": 2.092143807841506, |
|
"learning_rate": 1.9965665140061565e-05, |
|
"loss": 0.915, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.05570870191318659, |
|
"grad_norm": 1.9784649465852888, |
|
"learning_rate": 1.9965444109574744e-05, |
|
"loss": 0.905, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.05579098950833162, |
|
"grad_norm": 2.7843501048163217, |
|
"learning_rate": 1.9965222371158718e-05, |
|
"loss": 0.8951, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.05587327710347665, |
|
"grad_norm": 2.6331805589786383, |
|
"learning_rate": 1.9964999924829224e-05, |
|
"loss": 0.8614, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.05595556469862168, |
|
"grad_norm": 0.7467735870885243, |
|
"learning_rate": 1.9964776770602078e-05, |
|
"loss": 0.6063, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.05603785229376671, |
|
"grad_norm": 2.680536053721946, |
|
"learning_rate": 1.9964552908493123e-05, |
|
"loss": 0.8782, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.05612013988891175, |
|
"grad_norm": 3.49552823109986, |
|
"learning_rate": 1.9964328338518264e-05, |
|
"loss": 0.902, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.05620242748405678, |
|
"grad_norm": 2.120123047682193, |
|
"learning_rate": 1.996410306069346e-05, |
|
"loss": 0.9496, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.05628471507920181, |
|
"grad_norm": 1.937156037107827, |
|
"learning_rate": 1.9963877075034706e-05, |
|
"loss": 0.8875, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.05636700267434684, |
|
"grad_norm": 2.4742509534066754, |
|
"learning_rate": 1.9963650381558063e-05, |
|
"loss": 0.9192, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.05644929026949187, |
|
"grad_norm": 2.3426169694208903, |
|
"learning_rate": 1.996342298027963e-05, |
|
"loss": 0.9481, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.0565315778646369, |
|
"grad_norm": 2.1543307158741434, |
|
"learning_rate": 1.9963194871215557e-05, |
|
"loss": 0.8948, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.05661386545978194, |
|
"grad_norm": 1.7721734117310426, |
|
"learning_rate": 1.9962966054382062e-05, |
|
"loss": 0.8769, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.05669615305492697, |
|
"grad_norm": 2.637184520870366, |
|
"learning_rate": 1.9962736529795388e-05, |
|
"loss": 0.9305, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.056778440650072, |
|
"grad_norm": 2.5552424968357306, |
|
"learning_rate": 1.9962506297471846e-05, |
|
"loss": 0.9011, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.05686072824521703, |
|
"grad_norm": 2.1091093097631797, |
|
"learning_rate": 1.9962275357427787e-05, |
|
"loss": 0.9153, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.056943015840362064, |
|
"grad_norm": 3.8893843496883775, |
|
"learning_rate": 1.996204370967962e-05, |
|
"loss": 0.9516, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.057025303435507095, |
|
"grad_norm": 0.6989567675386245, |
|
"learning_rate": 1.9961811354243798e-05, |
|
"loss": 0.6088, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.057107591030652126, |
|
"grad_norm": 3.0703220705587326, |
|
"learning_rate": 1.9961578291136834e-05, |
|
"loss": 0.9468, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.057189878625797164, |
|
"grad_norm": 0.5452905698296876, |
|
"learning_rate": 1.9961344520375276e-05, |
|
"loss": 0.5795, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.057272166220942194, |
|
"grad_norm": 3.477621910759164, |
|
"learning_rate": 1.9961110041975732e-05, |
|
"loss": 0.9586, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.057354453816087225, |
|
"grad_norm": 3.5385882928206454, |
|
"learning_rate": 1.9960874855954863e-05, |
|
"loss": 0.9508, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.057436741411232256, |
|
"grad_norm": 2.6972731084205437, |
|
"learning_rate": 1.996063896232938e-05, |
|
"loss": 0.9313, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.05751902900637729, |
|
"grad_norm": 0.6344603977192381, |
|
"learning_rate": 1.9960402361116026e-05, |
|
"loss": 0.6044, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.05760131660152232, |
|
"grad_norm": 5.571545453742246, |
|
"learning_rate": 1.996016505233162e-05, |
|
"loss": 0.92, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.057683604196667355, |
|
"grad_norm": 2.859612009759652, |
|
"learning_rate": 1.9959927035993017e-05, |
|
"loss": 0.897, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.057765891791812386, |
|
"grad_norm": 2.426187536557682, |
|
"learning_rate": 1.9959688312117128e-05, |
|
"loss": 0.9305, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.05784817938695742, |
|
"grad_norm": 2.7388965530788, |
|
"learning_rate": 1.995944888072091e-05, |
|
"loss": 0.9145, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.05793046698210245, |
|
"grad_norm": 2.776291815110774, |
|
"learning_rate": 1.995920874182137e-05, |
|
"loss": 0.9075, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.05801275457724748, |
|
"grad_norm": 2.575679639237728, |
|
"learning_rate": 1.995896789543557e-05, |
|
"loss": 0.9045, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.05809504217239251, |
|
"grad_norm": 3.5403132152741263, |
|
"learning_rate": 1.9958726341580615e-05, |
|
"loss": 0.913, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.05817732976753754, |
|
"grad_norm": 2.58072580176139, |
|
"learning_rate": 1.995848408027367e-05, |
|
"loss": 0.9229, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.05825961736268258, |
|
"grad_norm": 2.5124996774654473, |
|
"learning_rate": 1.9958241111531942e-05, |
|
"loss": 0.9126, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.05834190495782761, |
|
"grad_norm": 2.36119565147592, |
|
"learning_rate": 1.995799743537269e-05, |
|
"loss": 0.9066, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.05842419255297264, |
|
"grad_norm": 3.2376572469679847, |
|
"learning_rate": 1.9957753051813228e-05, |
|
"loss": 0.9107, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.05850648014811767, |
|
"grad_norm": 0.5718002254539629, |
|
"learning_rate": 1.9957507960870908e-05, |
|
"loss": 0.5838, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.0585887677432627, |
|
"grad_norm": 2.9835296928097765, |
|
"learning_rate": 1.9957262162563155e-05, |
|
"loss": 0.9062, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.05867105533840773, |
|
"grad_norm": 2.312335655498833, |
|
"learning_rate": 1.9957015656907417e-05, |
|
"loss": 0.9331, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.05875334293355277, |
|
"grad_norm": 2.3792417930038168, |
|
"learning_rate": 1.9956768443921214e-05, |
|
"loss": 0.9371, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.0588356305286978, |
|
"grad_norm": 3.0747711781753955, |
|
"learning_rate": 1.99565205236221e-05, |
|
"loss": 0.9245, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.05891791812384283, |
|
"grad_norm": 2.469147337654409, |
|
"learning_rate": 1.9956271896027696e-05, |
|
"loss": 0.9053, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.05900020571898786, |
|
"grad_norm": 4.677348829502867, |
|
"learning_rate": 1.9956022561155655e-05, |
|
"loss": 0.9316, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.05908249331413289, |
|
"grad_norm": 2.574073344258724, |
|
"learning_rate": 1.9955772519023694e-05, |
|
"loss": 0.9144, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.059164780909277924, |
|
"grad_norm": 0.6010291838312377, |
|
"learning_rate": 1.995552176964958e-05, |
|
"loss": 0.5969, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.05924706850442296, |
|
"grad_norm": 0.48362592184616704, |
|
"learning_rate": 1.9955270313051115e-05, |
|
"loss": 0.6105, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.05932935609956799, |
|
"grad_norm": 4.6846130266410935, |
|
"learning_rate": 1.995501814924617e-05, |
|
"loss": 0.9146, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.05941164369471302, |
|
"grad_norm": 2.577204170673208, |
|
"learning_rate": 1.9954765278252656e-05, |
|
"loss": 0.9073, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.059493931289858054, |
|
"grad_norm": 4.7923802267754985, |
|
"learning_rate": 1.995451170008854e-05, |
|
"loss": 0.9192, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.059576218885003085, |
|
"grad_norm": 3.637556402050712, |
|
"learning_rate": 1.995425741477183e-05, |
|
"loss": 0.8916, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.059658506480148116, |
|
"grad_norm": 3.318312481516906, |
|
"learning_rate": 1.9954002422320593e-05, |
|
"loss": 0.8979, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.05974079407529315, |
|
"grad_norm": 2.2896767162285476, |
|
"learning_rate": 1.9953746722752944e-05, |
|
"loss": 0.9078, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.059823081670438184, |
|
"grad_norm": 2.4261610228532433, |
|
"learning_rate": 1.9953490316087045e-05, |
|
"loss": 0.9094, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.059905369265583215, |
|
"grad_norm": 3.5742603087267533, |
|
"learning_rate": 1.9953233202341115e-05, |
|
"loss": 0.9668, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.059987656860728246, |
|
"grad_norm": 3.646866686252275, |
|
"learning_rate": 1.995297538153341e-05, |
|
"loss": 0.9081, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.06006994445587328, |
|
"grad_norm": 3.5756298093016134, |
|
"learning_rate": 1.9952716853682258e-05, |
|
"loss": 0.932, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.06015223205101831, |
|
"grad_norm": 2.461737210935374, |
|
"learning_rate": 1.9952457618806016e-05, |
|
"loss": 0.9161, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.06023451964616334, |
|
"grad_norm": 2.9435688364135038, |
|
"learning_rate": 1.99521976769231e-05, |
|
"loss": 0.8791, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.060316807241308376, |
|
"grad_norm": 3.752079579941048, |
|
"learning_rate": 1.995193702805198e-05, |
|
"loss": 0.8864, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.06039909483645341, |
|
"grad_norm": 4.53396790098707, |
|
"learning_rate": 1.9951675672211163e-05, |
|
"loss": 0.8929, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.06048138243159844, |
|
"grad_norm": 4.961620647630342, |
|
"learning_rate": 1.9951413609419225e-05, |
|
"loss": 0.8536, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.06056367002674347, |
|
"grad_norm": 3.891304133200799, |
|
"learning_rate": 1.995115083969478e-05, |
|
"loss": 0.8944, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.0606459576218885, |
|
"grad_norm": 2.712319861053012, |
|
"learning_rate": 1.9950887363056495e-05, |
|
"loss": 0.9206, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.06072824521703353, |
|
"grad_norm": 4.223019111124196, |
|
"learning_rate": 1.9950623179523085e-05, |
|
"loss": 0.9025, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.06081053281217856, |
|
"grad_norm": 5.016232013409377, |
|
"learning_rate": 1.9950358289113317e-05, |
|
"loss": 0.8815, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.0608928204073236, |
|
"grad_norm": 2.6897434242049694, |
|
"learning_rate": 1.995009269184601e-05, |
|
"loss": 0.8836, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.06097510800246863, |
|
"grad_norm": 0.7568433896575619, |
|
"learning_rate": 1.994982638774003e-05, |
|
"loss": 0.5993, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.06105739559761366, |
|
"grad_norm": 2.553452324246678, |
|
"learning_rate": 1.9949559376814296e-05, |
|
"loss": 0.8986, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.06113968319275869, |
|
"grad_norm": 0.5018812785768227, |
|
"learning_rate": 1.9949291659087776e-05, |
|
"loss": 0.5597, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.06122197078790372, |
|
"grad_norm": 2.4064235706469, |
|
"learning_rate": 1.994902323457949e-05, |
|
"loss": 0.8943, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.06130425838304875, |
|
"grad_norm": 2.295948111702661, |
|
"learning_rate": 1.9948754103308504e-05, |
|
"loss": 0.8668, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.06138654597819379, |
|
"grad_norm": 0.6531820015601002, |
|
"learning_rate": 1.9948484265293934e-05, |
|
"loss": 0.5944, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.06146883357333882, |
|
"grad_norm": 2.488686897667554, |
|
"learning_rate": 1.9948213720554955e-05, |
|
"loss": 0.8939, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.06155112116848385, |
|
"grad_norm": 2.2478829073807867, |
|
"learning_rate": 1.994794246911078e-05, |
|
"loss": 0.878, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.06163340876362888, |
|
"grad_norm": 3.21297658438237, |
|
"learning_rate": 1.9947670510980686e-05, |
|
"loss": 0.9367, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.061715696358773914, |
|
"grad_norm": 2.5032219143064296, |
|
"learning_rate": 1.9947397846183986e-05, |
|
"loss": 0.909, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.061797983953918945, |
|
"grad_norm": 2.3821398027611367, |
|
"learning_rate": 1.9947124474740052e-05, |
|
"loss": 0.8767, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.061880271549063975, |
|
"grad_norm": 4.029427101966951, |
|
"learning_rate": 1.99468503966683e-05, |
|
"loss": 0.8618, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.06196255914420901, |
|
"grad_norm": 2.404778806152705, |
|
"learning_rate": 1.9946575611988207e-05, |
|
"loss": 0.9047, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.062044846739354044, |
|
"grad_norm": 2.962612526189809, |
|
"learning_rate": 1.9946300120719287e-05, |
|
"loss": 0.889, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.062127134334499075, |
|
"grad_norm": 2.5437765511188695, |
|
"learning_rate": 1.994602392288112e-05, |
|
"loss": 0.9399, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.062209421929644106, |
|
"grad_norm": 0.5539735241167393, |
|
"learning_rate": 1.9945747018493314e-05, |
|
"loss": 0.5963, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.062291709524789136, |
|
"grad_norm": 3.1779858985642817, |
|
"learning_rate": 1.9945469407575543e-05, |
|
"loss": 0.876, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.06237399711993417, |
|
"grad_norm": 2.687485842671492, |
|
"learning_rate": 1.9945191090147537e-05, |
|
"loss": 0.9022, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.062456284715079205, |
|
"grad_norm": 2.9422463927653766, |
|
"learning_rate": 1.9944912066229058e-05, |
|
"loss": 0.8956, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.06253857231022424, |
|
"grad_norm": 4.157936413648122, |
|
"learning_rate": 1.9944632335839927e-05, |
|
"loss": 0.9138, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.06262085990536927, |
|
"grad_norm": 0.48567249965915693, |
|
"learning_rate": 1.9944351899000026e-05, |
|
"loss": 0.5563, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.0627031475005143, |
|
"grad_norm": 2.7821820465506, |
|
"learning_rate": 1.9944070755729266e-05, |
|
"loss": 0.9122, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.06278543509565933, |
|
"grad_norm": 2.65823773191475, |
|
"learning_rate": 1.9943788906047624e-05, |
|
"loss": 0.9009, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.06286772269080436, |
|
"grad_norm": 0.4745158162176376, |
|
"learning_rate": 1.9943506349975118e-05, |
|
"loss": 0.5845, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.06295001028594939, |
|
"grad_norm": 4.304541123505603, |
|
"learning_rate": 1.9943223087531824e-05, |
|
"loss": 0.911, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.06303229788109442, |
|
"grad_norm": 2.599121308286042, |
|
"learning_rate": 1.9942939118737866e-05, |
|
"loss": 0.9082, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.06311458547623945, |
|
"grad_norm": 2.661380985142305, |
|
"learning_rate": 1.9942654443613413e-05, |
|
"loss": 0.889, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.06319687307138448, |
|
"grad_norm": 2.7289869422777406, |
|
"learning_rate": 1.994236906217869e-05, |
|
"loss": 0.8807, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.06327916066652953, |
|
"grad_norm": 3.552184676009908, |
|
"learning_rate": 1.9942082974453968e-05, |
|
"loss": 0.8869, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.06336144826167456, |
|
"grad_norm": 3.3116779659066222, |
|
"learning_rate": 1.994179618045957e-05, |
|
"loss": 0.886, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.06344373585681959, |
|
"grad_norm": 2.733151926112565, |
|
"learning_rate": 1.9941508680215874e-05, |
|
"loss": 0.878, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.06352602345196462, |
|
"grad_norm": 3.689575278866226, |
|
"learning_rate": 1.9941220473743297e-05, |
|
"loss": 0.9012, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.06360831104710965, |
|
"grad_norm": 3.6509278934675344, |
|
"learning_rate": 1.994093156106232e-05, |
|
"loss": 0.8859, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.06369059864225468, |
|
"grad_norm": 3.4408763078150373, |
|
"learning_rate": 1.9940641942193462e-05, |
|
"loss": 0.9895, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.06377288623739971, |
|
"grad_norm": 3.356367722166113, |
|
"learning_rate": 1.9940351617157298e-05, |
|
"loss": 0.9321, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.06385517383254474, |
|
"grad_norm": 2.6685489053310905, |
|
"learning_rate": 1.994006058597445e-05, |
|
"loss": 0.871, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.06393746142768977, |
|
"grad_norm": 2.1000398415565447, |
|
"learning_rate": 1.99397688486656e-05, |
|
"loss": 0.8799, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.0640197490228348, |
|
"grad_norm": 2.1292877692214462, |
|
"learning_rate": 1.9939476405251464e-05, |
|
"loss": 0.8955, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.06410203661797984, |
|
"grad_norm": 3.4132241841166073, |
|
"learning_rate": 1.9939183255752817e-05, |
|
"loss": 0.8757, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.06418432421312487, |
|
"grad_norm": 2.62487277122737, |
|
"learning_rate": 1.9938889400190494e-05, |
|
"loss": 0.8884, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.0642666118082699, |
|
"grad_norm": 2.044302329571613, |
|
"learning_rate": 1.993859483858536e-05, |
|
"loss": 0.9023, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.06434889940341494, |
|
"grad_norm": 0.5567547220538414, |
|
"learning_rate": 1.993829957095834e-05, |
|
"loss": 0.5694, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.06443118699855997, |
|
"grad_norm": 0.48731474493235843, |
|
"learning_rate": 1.9938003597330415e-05, |
|
"loss": 0.5764, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.064513474593705, |
|
"grad_norm": 2.335128235917664, |
|
"learning_rate": 1.9937706917722607e-05, |
|
"loss": 0.9091, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.06459576218885003, |
|
"grad_norm": 2.6840226763995383, |
|
"learning_rate": 1.9937409532155992e-05, |
|
"loss": 0.8881, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.06467804978399506, |
|
"grad_norm": 2.3949102024541653, |
|
"learning_rate": 1.99371114406517e-05, |
|
"loss": 0.9183, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.0647603373791401, |
|
"grad_norm": 2.6216703824274488, |
|
"learning_rate": 1.99368126432309e-05, |
|
"loss": 0.9207, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.06484262497428513, |
|
"grad_norm": 2.614435269135524, |
|
"learning_rate": 1.993651313991482e-05, |
|
"loss": 0.9145, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.06492491256943016, |
|
"grad_norm": 1.9122678315195296, |
|
"learning_rate": 1.9936212930724742e-05, |
|
"loss": 0.8829, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.06500720016457519, |
|
"grad_norm": 0.5913835221535177, |
|
"learning_rate": 1.9935912015681984e-05, |
|
"loss": 0.6145, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.06508948775972022, |
|
"grad_norm": 2.528199419410872, |
|
"learning_rate": 1.993561039480793e-05, |
|
"loss": 0.8655, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.06517177535486525, |
|
"grad_norm": 3.3798538121747326, |
|
"learning_rate": 1.9935308068124e-05, |
|
"loss": 0.9251, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.06525406295001028, |
|
"grad_norm": 2.6588327121370194, |
|
"learning_rate": 1.9935005035651676e-05, |
|
"loss": 0.8983, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.06533635054515531, |
|
"grad_norm": 0.5232567113259947, |
|
"learning_rate": 1.9934701297412482e-05, |
|
"loss": 0.578, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.06541863814030036, |
|
"grad_norm": 4.752300485944965, |
|
"learning_rate": 1.9934396853427998e-05, |
|
"loss": 0.8953, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.06550092573544539, |
|
"grad_norm": 2.2269507955655987, |
|
"learning_rate": 1.9934091703719846e-05, |
|
"loss": 0.9245, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.06558321333059042, |
|
"grad_norm": 3.122445969674065, |
|
"learning_rate": 1.9933785848309708e-05, |
|
"loss": 0.8914, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.06566550092573545, |
|
"grad_norm": 3.1204724551293426, |
|
"learning_rate": 1.9933479287219312e-05, |
|
"loss": 0.9287, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.06574778852088048, |
|
"grad_norm": 14.479758337139925, |
|
"learning_rate": 1.9933172020470433e-05, |
|
"loss": 0.8677, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.06583007611602551, |
|
"grad_norm": 2.1224285416282953, |
|
"learning_rate": 1.99328640480849e-05, |
|
"loss": 0.8755, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.06591236371117054, |
|
"grad_norm": 2.487164087508179, |
|
"learning_rate": 1.9932555370084588e-05, |
|
"loss": 0.8775, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.06599465130631557, |
|
"grad_norm": 0.5728404010402629, |
|
"learning_rate": 1.9932245986491425e-05, |
|
"loss": 0.5477, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.0660769389014606, |
|
"grad_norm": 3.245446623126787, |
|
"learning_rate": 1.9931935897327396e-05, |
|
"loss": 0.9005, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.06615922649660563, |
|
"grad_norm": 2.5198170754823237, |
|
"learning_rate": 1.9931625102614524e-05, |
|
"loss": 0.9251, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.06624151409175066, |
|
"grad_norm": 2.7124091417439447, |
|
"learning_rate": 1.9931313602374886e-05, |
|
"loss": 0.9043, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.0663238016868957, |
|
"grad_norm": 2.295917945326921, |
|
"learning_rate": 1.9931001396630613e-05, |
|
"loss": 0.9037, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.06640608928204073, |
|
"grad_norm": 2.5595180677086176, |
|
"learning_rate": 1.9930688485403885e-05, |
|
"loss": 0.8916, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.06648837687718577, |
|
"grad_norm": 2.54401264532517, |
|
"learning_rate": 1.993037486871693e-05, |
|
"loss": 0.8865, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.0665706644723308, |
|
"grad_norm": 2.7644346282703567, |
|
"learning_rate": 1.993006054659202e-05, |
|
"loss": 0.875, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.06665295206747583, |
|
"grad_norm": 2.145314542653547, |
|
"learning_rate": 1.9929745519051497e-05, |
|
"loss": 0.9358, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.06673523966262086, |
|
"grad_norm": 3.2713117109960583, |
|
"learning_rate": 1.9929429786117724e-05, |
|
"loss": 0.8777, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.0668175272577659, |
|
"grad_norm": 0.5829653015669467, |
|
"learning_rate": 1.9929113347813145e-05, |
|
"loss": 0.5366, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.06689981485291092, |
|
"grad_norm": 2.4233464969419516, |
|
"learning_rate": 1.992879620416023e-05, |
|
"loss": 0.9099, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.06698210244805596, |
|
"grad_norm": 2.7021068296091624, |
|
"learning_rate": 1.9928478355181512e-05, |
|
"loss": 0.9092, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.06706439004320099, |
|
"grad_norm": 2.522776219516862, |
|
"learning_rate": 1.992815980089957e-05, |
|
"loss": 0.9024, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.06714667763834602, |
|
"grad_norm": 2.232284370603574, |
|
"learning_rate": 1.9927840541337037e-05, |
|
"loss": 0.9233, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.06722896523349105, |
|
"grad_norm": 2.9343145896014255, |
|
"learning_rate": 1.9927520576516587e-05, |
|
"loss": 0.9312, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.06731125282863608, |
|
"grad_norm": 3.3222486630048764, |
|
"learning_rate": 1.9927199906460947e-05, |
|
"loss": 0.8681, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.06739354042378111, |
|
"grad_norm": 2.1225744897957153, |
|
"learning_rate": 1.9926878531192908e-05, |
|
"loss": 0.8916, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.06747582801892614, |
|
"grad_norm": 5.166258547080567, |
|
"learning_rate": 1.992655645073529e-05, |
|
"loss": 0.9153, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.06755811561407118, |
|
"grad_norm": 3.2639889220707077, |
|
"learning_rate": 1.992623366511098e-05, |
|
"loss": 0.8715, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.06764040320921622, |
|
"grad_norm": 4.714497016717951, |
|
"learning_rate": 1.9925910174342907e-05, |
|
"loss": 0.8723, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.06772269080436125, |
|
"grad_norm": 2.5352280280058315, |
|
"learning_rate": 1.9925585978454043e-05, |
|
"loss": 0.9045, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.06780497839950628, |
|
"grad_norm": 3.485579632575649, |
|
"learning_rate": 1.992526107746743e-05, |
|
"loss": 0.8797, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.06788726599465131, |
|
"grad_norm": 12.454695730191421, |
|
"learning_rate": 1.992493547140614e-05, |
|
"loss": 0.8755, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.06796955358979634, |
|
"grad_norm": 0.5679287848373274, |
|
"learning_rate": 1.9924609160293308e-05, |
|
"loss": 0.5737, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.06805184118494137, |
|
"grad_norm": 6.733588252523935, |
|
"learning_rate": 1.9924282144152115e-05, |
|
"loss": 0.8607, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.0681341287800864, |
|
"grad_norm": 2.8353728427421965, |
|
"learning_rate": 1.9923954423005786e-05, |
|
"loss": 0.8658, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.06821641637523143, |
|
"grad_norm": 2.226675047912921, |
|
"learning_rate": 1.9923625996877607e-05, |
|
"loss": 0.8908, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.06829870397037646, |
|
"grad_norm": 2.090011013197403, |
|
"learning_rate": 1.9923296865790907e-05, |
|
"loss": 0.9027, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.06838099156552149, |
|
"grad_norm": 2.4269097740027687, |
|
"learning_rate": 1.992296702976907e-05, |
|
"loss": 0.8743, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.06846327916066652, |
|
"grad_norm": 2.4454075613373174, |
|
"learning_rate": 1.9922636488835528e-05, |
|
"loss": 0.9188, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.06854556675581157, |
|
"grad_norm": 2.708156376904729, |
|
"learning_rate": 1.992230524301375e-05, |
|
"loss": 0.8753, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.0686278543509566, |
|
"grad_norm": 6.9289687760917955, |
|
"learning_rate": 1.9921973292327285e-05, |
|
"loss": 0.8714, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.06871014194610163, |
|
"grad_norm": 2.833475838520833, |
|
"learning_rate": 1.9921640636799697e-05, |
|
"loss": 0.878, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.06879242954124666, |
|
"grad_norm": 0.6390100760660502, |
|
"learning_rate": 1.992130727645463e-05, |
|
"loss": 0.5892, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.06887471713639169, |
|
"grad_norm": 3.503075844449775, |
|
"learning_rate": 1.992097321131576e-05, |
|
"loss": 0.9134, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.06895700473153672, |
|
"grad_norm": 2.928003367939948, |
|
"learning_rate": 1.992063844140682e-05, |
|
"loss": 0.916, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.06903929232668175, |
|
"grad_norm": 2.79325002366026, |
|
"learning_rate": 1.992030296675159e-05, |
|
"loss": 0.8767, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.06912157992182678, |
|
"grad_norm": 2.312184411585912, |
|
"learning_rate": 1.9919966787373902e-05, |
|
"loss": 0.9053, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.06920386751697181, |
|
"grad_norm": 2.9138317208293594, |
|
"learning_rate": 1.991962990329764e-05, |
|
"loss": 0.9005, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.06928615511211685, |
|
"grad_norm": 2.418947503313838, |
|
"learning_rate": 1.991929231454673e-05, |
|
"loss": 0.8876, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.06936844270726188, |
|
"grad_norm": 2.746227734046784, |
|
"learning_rate": 1.9918954021145162e-05, |
|
"loss": 0.9174, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.06945073030240691, |
|
"grad_norm": 4.054877897574317, |
|
"learning_rate": 1.991861502311696e-05, |
|
"loss": 0.8785, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.06953301789755194, |
|
"grad_norm": 3.3645447414769856, |
|
"learning_rate": 1.9918275320486212e-05, |
|
"loss": 0.8885, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.06961530549269698, |
|
"grad_norm": 0.6257651466469342, |
|
"learning_rate": 1.9917934913277047e-05, |
|
"loss": 0.5679, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.06969759308784201, |
|
"grad_norm": 2.9579632903454987, |
|
"learning_rate": 1.9917593801513645e-05, |
|
"loss": 0.8892, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.06977988068298704, |
|
"grad_norm": 2.3255674692633703, |
|
"learning_rate": 1.991725198522024e-05, |
|
"loss": 0.8969, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.06986216827813208, |
|
"grad_norm": 1.8812338541653777, |
|
"learning_rate": 1.9916909464421118e-05, |
|
"loss": 0.84, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.0699444558732771, |
|
"grad_norm": 4.348093261520783, |
|
"learning_rate": 1.9916566239140605e-05, |
|
"loss": 0.9035, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.07002674346842214, |
|
"grad_norm": 2.2375985456191003, |
|
"learning_rate": 1.9916222309403085e-05, |
|
"loss": 0.8754, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.07010903106356717, |
|
"grad_norm": 3.613200403801302, |
|
"learning_rate": 1.9915877675232992e-05, |
|
"loss": 0.8815, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.0701913186587122, |
|
"grad_norm": 3.839543987455212, |
|
"learning_rate": 1.9915532336654807e-05, |
|
"loss": 0.9072, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.07027360625385723, |
|
"grad_norm": 2.105567560984786, |
|
"learning_rate": 1.991518629369306e-05, |
|
"loss": 0.896, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.07035589384900226, |
|
"grad_norm": 2.267537355899574, |
|
"learning_rate": 1.9914839546372336e-05, |
|
"loss": 0.9158, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.07043818144414729, |
|
"grad_norm": 3.589047414435187, |
|
"learning_rate": 1.991449209471727e-05, |
|
"loss": 0.8734, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.07052046903929232, |
|
"grad_norm": 3.1819343869570536, |
|
"learning_rate": 1.991414393875254e-05, |
|
"loss": 0.9089, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.07060275663443735, |
|
"grad_norm": 2.5055069972264503, |
|
"learning_rate": 1.991379507850288e-05, |
|
"loss": 0.8681, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.0706850442295824, |
|
"grad_norm": 2.545062208600291, |
|
"learning_rate": 1.991344551399307e-05, |
|
"loss": 0.8835, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.07076733182472743, |
|
"grad_norm": 2.8423181256983487, |
|
"learning_rate": 1.9913095245247948e-05, |
|
"loss": 0.8855, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.07084961941987246, |
|
"grad_norm": 2.623939420394984, |
|
"learning_rate": 1.9912744272292392e-05, |
|
"loss": 0.8912, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.07093190701501749, |
|
"grad_norm": 2.456776383887346, |
|
"learning_rate": 1.9912392595151336e-05, |
|
"loss": 0.9026, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.07101419461016252, |
|
"grad_norm": 2.7531225878969177, |
|
"learning_rate": 1.9912040213849762e-05, |
|
"loss": 0.8875, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.07109648220530755, |
|
"grad_norm": 4.481796954208249, |
|
"learning_rate": 1.9911687128412708e-05, |
|
"loss": 0.8636, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.07117876980045258, |
|
"grad_norm": 2.545397332779262, |
|
"learning_rate": 1.9911333338865245e-05, |
|
"loss": 0.8803, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.07126105739559761, |
|
"grad_norm": 3.045980428767302, |
|
"learning_rate": 1.9910978845232517e-05, |
|
"loss": 0.9035, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.07134334499074264, |
|
"grad_norm": 3.6871914250355715, |
|
"learning_rate": 1.9910623647539702e-05, |
|
"loss": 0.8666, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.07142563258588767, |
|
"grad_norm": 2.116550202268351, |
|
"learning_rate": 1.991026774581203e-05, |
|
"loss": 0.9031, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.0715079201810327, |
|
"grad_norm": 2.532009330642646, |
|
"learning_rate": 1.9909911140074788e-05, |
|
"loss": 0.8661, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.07159020777617774, |
|
"grad_norm": 3.33485917673071, |
|
"learning_rate": 1.9909553830353308e-05, |
|
"loss": 0.8776, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.07167249537132277, |
|
"grad_norm": 2.3439342371747167, |
|
"learning_rate": 1.990919581667297e-05, |
|
"loss": 0.9151, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.07175478296646781, |
|
"grad_norm": 2.488600787006511, |
|
"learning_rate": 1.9908837099059212e-05, |
|
"loss": 0.9165, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.07183707056161284, |
|
"grad_norm": 3.95670742389146, |
|
"learning_rate": 1.990847767753751e-05, |
|
"loss": 0.8659, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.07191935815675787, |
|
"grad_norm": 0.5947750160477462, |
|
"learning_rate": 1.99081175521334e-05, |
|
"loss": 0.5886, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.0720016457519029, |
|
"grad_norm": 2.033586754058639, |
|
"learning_rate": 1.9907756722872465e-05, |
|
"loss": 0.8897, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.07208393334704793, |
|
"grad_norm": 3.346298659721499, |
|
"learning_rate": 1.9907395189780335e-05, |
|
"loss": 0.902, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.07216622094219297, |
|
"grad_norm": 3.004056249927372, |
|
"learning_rate": 1.9907032952882703e-05, |
|
"loss": 0.8715, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.072248508537338, |
|
"grad_norm": 5.4098932917643285, |
|
"learning_rate": 1.9906670012205286e-05, |
|
"loss": 0.8866, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.07233079613248303, |
|
"grad_norm": 6.828654192266096, |
|
"learning_rate": 1.990630636777388e-05, |
|
"loss": 0.8689, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.07241308372762806, |
|
"grad_norm": 2.6337207605941737, |
|
"learning_rate": 1.9905942019614312e-05, |
|
"loss": 0.8647, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.07249537132277309, |
|
"grad_norm": 0.5235737963953581, |
|
"learning_rate": 1.990557696775246e-05, |
|
"loss": 0.5661, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.07257765891791812, |
|
"grad_norm": 11.548238836629363, |
|
"learning_rate": 1.9905211212214266e-05, |
|
"loss": 0.9294, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.07265994651306315, |
|
"grad_norm": 5.489164212385315, |
|
"learning_rate": 1.990484475302571e-05, |
|
"loss": 0.8685, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.07274223410820818, |
|
"grad_norm": 7.88390924258145, |
|
"learning_rate": 1.990447759021282e-05, |
|
"loss": 0.874, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.07282452170335323, |
|
"grad_norm": 4.299200684634295, |
|
"learning_rate": 1.9904109723801684e-05, |
|
"loss": 0.9146, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.07290680929849826, |
|
"grad_norm": 6.21170690266594, |
|
"learning_rate": 1.990374115381843e-05, |
|
"loss": 0.8728, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.07298909689364329, |
|
"grad_norm": 4.563438990093578, |
|
"learning_rate": 1.9903371880289247e-05, |
|
"loss": 0.8747, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.07307138448878832, |
|
"grad_norm": 3.6273703961737187, |
|
"learning_rate": 1.990300190324036e-05, |
|
"loss": 0.9008, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.07315367208393335, |
|
"grad_norm": 7.441233530871766, |
|
"learning_rate": 1.9902631222698057e-05, |
|
"loss": 0.9141, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.07323595967907838, |
|
"grad_norm": 4.82833921873659, |
|
"learning_rate": 1.990225983868867e-05, |
|
"loss": 0.9339, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.07331824727422341, |
|
"grad_norm": 5.887738980648113, |
|
"learning_rate": 1.9901887751238577e-05, |
|
"loss": 0.8799, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.07340053486936844, |
|
"grad_norm": 2.5245499693701072, |
|
"learning_rate": 1.9901514960374217e-05, |
|
"loss": 0.8835, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.07348282246451347, |
|
"grad_norm": 6.763974106441189, |
|
"learning_rate": 1.990114146612207e-05, |
|
"loss": 0.891, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.0735651100596585, |
|
"grad_norm": 2.8844071869365835, |
|
"learning_rate": 1.9900767268508666e-05, |
|
"loss": 0.9097, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.07364739765480353, |
|
"grad_norm": 5.440132687337712, |
|
"learning_rate": 1.9900392367560588e-05, |
|
"loss": 0.8831, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.07372968524994856, |
|
"grad_norm": 3.745407109325051, |
|
"learning_rate": 1.9900016763304472e-05, |
|
"loss": 0.8805, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.0738119728450936, |
|
"grad_norm": 4.288740968099518, |
|
"learning_rate": 1.9899640455766997e-05, |
|
"loss": 0.8891, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.07389426044023864, |
|
"grad_norm": 2.755838421562454, |
|
"learning_rate": 1.9899263444974894e-05, |
|
"loss": 0.8973, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.07397654803538367, |
|
"grad_norm": 2.63866374184814, |
|
"learning_rate": 1.9898885730954948e-05, |
|
"loss": 0.8418, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.0740588356305287, |
|
"grad_norm": 3.0901321494386598, |
|
"learning_rate": 1.9898507313733995e-05, |
|
"loss": 0.8614, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.07414112322567373, |
|
"grad_norm": 2.754917360078824, |
|
"learning_rate": 1.9898128193338907e-05, |
|
"loss": 0.8964, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.07422341082081876, |
|
"grad_norm": 2.4717700343085163, |
|
"learning_rate": 1.9897748369796627e-05, |
|
"loss": 0.8793, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.0743056984159638, |
|
"grad_norm": 2.2819538240312585, |
|
"learning_rate": 1.989736784313413e-05, |
|
"loss": 0.9086, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.07438798601110883, |
|
"grad_norm": 2.7031870546344385, |
|
"learning_rate": 1.989698661337845e-05, |
|
"loss": 0.8601, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.07447027360625386, |
|
"grad_norm": 2.2788277737039757, |
|
"learning_rate": 1.9896604680556664e-05, |
|
"loss": 0.8464, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.07455256120139889, |
|
"grad_norm": 2.0567769102378954, |
|
"learning_rate": 1.9896222044695914e-05, |
|
"loss": 0.8807, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.07463484879654392, |
|
"grad_norm": 2.384203325674513, |
|
"learning_rate": 1.9895838705823377e-05, |
|
"loss": 0.8923, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.07471713639168895, |
|
"grad_norm": 2.0967277384590535, |
|
"learning_rate": 1.989545466396628e-05, |
|
"loss": 0.8793, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.07479942398683398, |
|
"grad_norm": 9.442852725541027, |
|
"learning_rate": 1.9895069919151915e-05, |
|
"loss": 0.8965, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.07488171158197901, |
|
"grad_norm": 5.109761027664979, |
|
"learning_rate": 1.9894684471407605e-05, |
|
"loss": 0.8983, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.07496399917712405, |
|
"grad_norm": 2.2367018687313185, |
|
"learning_rate": 1.9894298320760733e-05, |
|
"loss": 0.8879, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.07504628677226909, |
|
"grad_norm": 2.6873708972425656, |
|
"learning_rate": 1.989391146723873e-05, |
|
"loss": 0.8975, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.07512857436741412, |
|
"grad_norm": 0.5656242706848698, |
|
"learning_rate": 1.9893523910869085e-05, |
|
"loss": 0.617, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.07521086196255915, |
|
"grad_norm": 3.9316911134297814, |
|
"learning_rate": 1.989313565167932e-05, |
|
"loss": 0.9385, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.07529314955770418, |
|
"grad_norm": 2.783913423475105, |
|
"learning_rate": 1.9892746689697024e-05, |
|
"loss": 0.898, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.07537543715284921, |
|
"grad_norm": 4.235687618463353, |
|
"learning_rate": 1.989235702494982e-05, |
|
"loss": 0.8539, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.07545772474799424, |
|
"grad_norm": 2.387819568149409, |
|
"learning_rate": 1.9891966657465397e-05, |
|
"loss": 0.8369, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.07554001234313927, |
|
"grad_norm": 3.6947231383398424, |
|
"learning_rate": 1.989157558727148e-05, |
|
"loss": 0.8834, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.0756222999382843, |
|
"grad_norm": 2.604963394831731, |
|
"learning_rate": 1.989118381439585e-05, |
|
"loss": 0.9019, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.07570458753342933, |
|
"grad_norm": 0.5332477363950743, |
|
"learning_rate": 1.9890791338866344e-05, |
|
"loss": 0.5771, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.07578687512857436, |
|
"grad_norm": 3.2104258542562953, |
|
"learning_rate": 1.9890398160710837e-05, |
|
"loss": 0.9337, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.0758691627237194, |
|
"grad_norm": 0.48633325822320617, |
|
"learning_rate": 1.9890004279957266e-05, |
|
"loss": 0.5602, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.07595145031886442, |
|
"grad_norm": 12.835475358323716, |
|
"learning_rate": 1.9889609696633606e-05, |
|
"loss": 0.8553, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.07603373791400947, |
|
"grad_norm": 3.2124511867282037, |
|
"learning_rate": 1.9889214410767887e-05, |
|
"loss": 0.8674, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.0761160255091545, |
|
"grad_norm": 2.904116877033008, |
|
"learning_rate": 1.9888818422388193e-05, |
|
"loss": 0.8747, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.07619831310429953, |
|
"grad_norm": 3.157871788078832, |
|
"learning_rate": 1.9888421731522656e-05, |
|
"loss": 0.8891, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.07628060069944456, |
|
"grad_norm": 2.3718730999123547, |
|
"learning_rate": 1.9888024338199448e-05, |
|
"loss": 0.8993, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.07636288829458959, |
|
"grad_norm": 2.4565769064213723, |
|
"learning_rate": 1.988762624244681e-05, |
|
"loss": 0.9013, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.07644517588973462, |
|
"grad_norm": 2.540968098318489, |
|
"learning_rate": 1.988722744429301e-05, |
|
"loss": 0.8633, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.07652746348487965, |
|
"grad_norm": 3.56518007003656, |
|
"learning_rate": 1.988682794376639e-05, |
|
"loss": 0.8882, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.07660975108002469, |
|
"grad_norm": 2.176182910474906, |
|
"learning_rate": 1.9886427740895325e-05, |
|
"loss": 0.9149, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.07669203867516972, |
|
"grad_norm": 0.5807290241092793, |
|
"learning_rate": 1.9886026835708242e-05, |
|
"loss": 0.5897, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.07677432627031475, |
|
"grad_norm": 0.5568253540494434, |
|
"learning_rate": 1.9885625228233624e-05, |
|
"loss": 0.5944, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.07685661386545978, |
|
"grad_norm": 0.46307351633355415, |
|
"learning_rate": 1.9885222918499998e-05, |
|
"loss": 0.5687, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.07693890146060481, |
|
"grad_norm": 2.21686936101954, |
|
"learning_rate": 1.9884819906535946e-05, |
|
"loss": 0.899, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.07702118905574984, |
|
"grad_norm": 2.7051990886793758, |
|
"learning_rate": 1.9884416192370096e-05, |
|
"loss": 0.9015, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.07710347665089488, |
|
"grad_norm": 2.1375647901334385, |
|
"learning_rate": 1.988401177603113e-05, |
|
"loss": 0.9001, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.07718576424603991, |
|
"grad_norm": 4.132265546672556, |
|
"learning_rate": 1.988360665754777e-05, |
|
"loss": 0.8908, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.07726805184118495, |
|
"grad_norm": 2.1359019957192533, |
|
"learning_rate": 1.9883200836948803e-05, |
|
"loss": 0.8717, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.07735033943632998, |
|
"grad_norm": 3.9513646854514386, |
|
"learning_rate": 1.9882794314263053e-05, |
|
"loss": 0.8718, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.07743262703147501, |
|
"grad_norm": 2.321609974282721, |
|
"learning_rate": 1.9882387089519398e-05, |
|
"loss": 0.869, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.07751491462662004, |
|
"grad_norm": 3.70309268916697, |
|
"learning_rate": 1.9881979162746772e-05, |
|
"loss": 0.8649, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.07759720222176507, |
|
"grad_norm": 3.361767416529052, |
|
"learning_rate": 1.9881570533974148e-05, |
|
"loss": 0.8683, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.0776794898169101, |
|
"grad_norm": 3.4179325921845036, |
|
"learning_rate": 1.988116120323056e-05, |
|
"loss": 0.8963, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.07776177741205513, |
|
"grad_norm": 3.021751145368183, |
|
"learning_rate": 1.988075117054508e-05, |
|
"loss": 0.8746, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.07784406500720016, |
|
"grad_norm": 3.5878829514900974, |
|
"learning_rate": 1.9880340435946837e-05, |
|
"loss": 0.8516, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.07792635260234519, |
|
"grad_norm": 1.920072678794743, |
|
"learning_rate": 1.9879928999465016e-05, |
|
"loss": 0.8937, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.07800864019749022, |
|
"grad_norm": 2.2091268186489796, |
|
"learning_rate": 1.9879516861128835e-05, |
|
"loss": 0.8475, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.07809092779263525, |
|
"grad_norm": 2.2168445139505644, |
|
"learning_rate": 1.9879104020967577e-05, |
|
"loss": 0.8633, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.0781732153877803, |
|
"grad_norm": 1.0323698606460356, |
|
"learning_rate": 1.9878690479010568e-05, |
|
"loss": 0.6111, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.07825550298292533, |
|
"grad_norm": 2.682420816107399, |
|
"learning_rate": 1.987827623528719e-05, |
|
"loss": 0.9341, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.07833779057807036, |
|
"grad_norm": 0.6240540448167275, |
|
"learning_rate": 1.987786128982686e-05, |
|
"loss": 0.5523, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.07842007817321539, |
|
"grad_norm": 3.6752862094905905, |
|
"learning_rate": 1.9877445642659066e-05, |
|
"loss": 0.9273, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.07850236576836042, |
|
"grad_norm": 2.3734201750601858, |
|
"learning_rate": 1.987702929381333e-05, |
|
"loss": 0.8919, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.07858465336350545, |
|
"grad_norm": 0.7387548503010232, |
|
"learning_rate": 1.9876612243319228e-05, |
|
"loss": 0.5746, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.07866694095865048, |
|
"grad_norm": 0.6959735516945202, |
|
"learning_rate": 1.9876194491206388e-05, |
|
"loss": 0.5751, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.07874922855379551, |
|
"grad_norm": 2.1882974936345394, |
|
"learning_rate": 1.9875776037504482e-05, |
|
"loss": 0.9006, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.07883151614894054, |
|
"grad_norm": 2.341847998608011, |
|
"learning_rate": 1.9875356882243245e-05, |
|
"loss": 0.9041, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.07891380374408558, |
|
"grad_norm": 2.1628210206575433, |
|
"learning_rate": 1.9874937025452445e-05, |
|
"loss": 0.8883, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.0789960913392306, |
|
"grad_norm": 2.8510221399462483, |
|
"learning_rate": 1.9874516467161914e-05, |
|
"loss": 0.9231, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.07907837893437564, |
|
"grad_norm": 4.694838855869676, |
|
"learning_rate": 1.9874095207401526e-05, |
|
"loss": 0.9156, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.07916066652952067, |
|
"grad_norm": 2.877307386668155, |
|
"learning_rate": 1.98736732462012e-05, |
|
"loss": 0.8686, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.07924295412466571, |
|
"grad_norm": 2.581259841624273, |
|
"learning_rate": 1.9873250583590923e-05, |
|
"loss": 0.9125, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.07932524171981074, |
|
"grad_norm": 2.3158798477006037, |
|
"learning_rate": 1.9872827219600716e-05, |
|
"loss": 0.8926, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.07940752931495577, |
|
"grad_norm": 3.0098712265326784, |
|
"learning_rate": 1.987240315426065e-05, |
|
"loss": 0.8758, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.0794898169101008, |
|
"grad_norm": 3.1422180864323233, |
|
"learning_rate": 1.987197838760085e-05, |
|
"loss": 0.8908, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.07957210450524584, |
|
"grad_norm": 0.9645131727703571, |
|
"learning_rate": 1.9871552919651494e-05, |
|
"loss": 0.6045, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.07965439210039087, |
|
"grad_norm": 3.56520313826412, |
|
"learning_rate": 1.9871126750442807e-05, |
|
"loss": 0.8696, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.0797366796955359, |
|
"grad_norm": 2.0059409411059113, |
|
"learning_rate": 1.9870699880005063e-05, |
|
"loss": 0.8799, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.07981896729068093, |
|
"grad_norm": 4.983123742682501, |
|
"learning_rate": 1.9870272308368584e-05, |
|
"loss": 0.8693, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.07990125488582596, |
|
"grad_norm": 2.1182309366583474, |
|
"learning_rate": 1.9869844035563747e-05, |
|
"loss": 0.8649, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.07998354248097099, |
|
"grad_norm": 2.157976641839583, |
|
"learning_rate": 1.986941506162097e-05, |
|
"loss": 0.8844, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.08006583007611602, |
|
"grad_norm": 3.1179516322271117, |
|
"learning_rate": 1.9868985386570734e-05, |
|
"loss": 0.8702, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.08014811767126105, |
|
"grad_norm": 2.1804704549093246, |
|
"learning_rate": 1.986855501044356e-05, |
|
"loss": 0.8963, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.08023040526640608, |
|
"grad_norm": 2.825665735780858, |
|
"learning_rate": 1.986812393327002e-05, |
|
"loss": 0.9028, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.08031269286155113, |
|
"grad_norm": 2.7064578154820276, |
|
"learning_rate": 1.9867692155080736e-05, |
|
"loss": 0.8922, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.08039498045669616, |
|
"grad_norm": 4.940848988099329, |
|
"learning_rate": 1.9867259675906383e-05, |
|
"loss": 0.9096, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.08047726805184119, |
|
"grad_norm": 3.7159663449631943, |
|
"learning_rate": 1.9866826495777683e-05, |
|
"loss": 0.8946, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.08055955564698622, |
|
"grad_norm": 4.235722900766384, |
|
"learning_rate": 1.9866392614725408e-05, |
|
"loss": 0.8844, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.08064184324213125, |
|
"grad_norm": 2.5725805077545796, |
|
"learning_rate": 1.9865958032780383e-05, |
|
"loss": 0.8849, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.08072413083727628, |
|
"grad_norm": 3.2900229009140367, |
|
"learning_rate": 1.986552274997348e-05, |
|
"loss": 0.8712, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.08080641843242131, |
|
"grad_norm": 2.7018112393037206, |
|
"learning_rate": 1.986508676633561e-05, |
|
"loss": 0.881, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.08088870602756634, |
|
"grad_norm": 3.2565064868257356, |
|
"learning_rate": 1.986465008189776e-05, |
|
"loss": 0.8741, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.08097099362271137, |
|
"grad_norm": 2.977427479800942, |
|
"learning_rate": 1.986421269669094e-05, |
|
"loss": 0.864, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.0810532812178564, |
|
"grad_norm": 2.8391838913702734, |
|
"learning_rate": 1.986377461074623e-05, |
|
"loss": 0.8777, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.08113556881300144, |
|
"grad_norm": 2.228144074432828, |
|
"learning_rate": 1.9863335824094742e-05, |
|
"loss": 0.8873, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.08121785640814647, |
|
"grad_norm": 2.6153835393886444, |
|
"learning_rate": 1.9862896336767654e-05, |
|
"loss": 0.8565, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.08130014400329151, |
|
"grad_norm": 2.469488378896095, |
|
"learning_rate": 1.9862456148796182e-05, |
|
"loss": 0.9062, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.08138243159843654, |
|
"grad_norm": 0.9008951474609029, |
|
"learning_rate": 1.98620152602116e-05, |
|
"loss": 0.5855, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.08146471919358157, |
|
"grad_norm": 3.1010964992276335, |
|
"learning_rate": 1.986157367104522e-05, |
|
"loss": 0.8901, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.0815470067887266, |
|
"grad_norm": 2.745575020455269, |
|
"learning_rate": 1.9861131381328422e-05, |
|
"loss": 0.8992, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.08162929438387163, |
|
"grad_norm": 2.319333762749616, |
|
"learning_rate": 1.9860688391092623e-05, |
|
"loss": 0.8489, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.08171158197901666, |
|
"grad_norm": 1.8701951574677815, |
|
"learning_rate": 1.9860244700369288e-05, |
|
"loss": 0.8895, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.0817938695741617, |
|
"grad_norm": 2.4973895580746928, |
|
"learning_rate": 1.985980030918994e-05, |
|
"loss": 0.8414, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.08187615716930673, |
|
"grad_norm": 2.542292639884159, |
|
"learning_rate": 1.9859355217586144e-05, |
|
"loss": 0.8865, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.08195844476445176, |
|
"grad_norm": 0.5992255264191748, |
|
"learning_rate": 1.9858909425589524e-05, |
|
"loss": 0.5575, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.08204073235959679, |
|
"grad_norm": 2.143472686925439, |
|
"learning_rate": 1.9858462933231742e-05, |
|
"loss": 0.8543, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.08212301995474182, |
|
"grad_norm": 2.49083696229216, |
|
"learning_rate": 1.9858015740544524e-05, |
|
"loss": 0.8961, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.08220530754988685, |
|
"grad_norm": 5.032363107017064, |
|
"learning_rate": 1.985756784755963e-05, |
|
"loss": 0.869, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.08228759514503188, |
|
"grad_norm": 3.456646347683982, |
|
"learning_rate": 1.9857119254308885e-05, |
|
"loss": 0.868, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.08236988274017693, |
|
"grad_norm": 3.7630419410589755, |
|
"learning_rate": 1.9856669960824147e-05, |
|
"loss": 0.9249, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 0.08245217033532196, |
|
"grad_norm": 3.1625549709552994, |
|
"learning_rate": 1.985621996713734e-05, |
|
"loss": 0.8869, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.08253445793046699, |
|
"grad_norm": 3.881507636381793, |
|
"learning_rate": 1.985576927328043e-05, |
|
"loss": 0.888, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 0.08261674552561202, |
|
"grad_norm": 2.544247409259161, |
|
"learning_rate": 1.9855317879285434e-05, |
|
"loss": 0.8715, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.08269903312075705, |
|
"grad_norm": 2.5279916413903583, |
|
"learning_rate": 1.9854865785184417e-05, |
|
"loss": 0.8849, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.08278132071590208, |
|
"grad_norm": 3.4196695037594576, |
|
"learning_rate": 1.9854412991009494e-05, |
|
"loss": 0.8364, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.08286360831104711, |
|
"grad_norm": 2.759961086631554, |
|
"learning_rate": 1.985395949679283e-05, |
|
"loss": 0.854, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 0.08294589590619214, |
|
"grad_norm": 0.5731316878529051, |
|
"learning_rate": 1.9853505302566646e-05, |
|
"loss": 0.6152, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.08302818350133717, |
|
"grad_norm": 2.9549671685361525, |
|
"learning_rate": 1.98530504083632e-05, |
|
"loss": 0.861, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 0.0831104710964822, |
|
"grad_norm": 2.3193711696281025, |
|
"learning_rate": 1.9852594814214812e-05, |
|
"loss": 0.865, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.08319275869162723, |
|
"grad_norm": 3.0076758009209636, |
|
"learning_rate": 1.9852138520153846e-05, |
|
"loss": 0.8852, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.08327504628677226, |
|
"grad_norm": 2.732008977686221, |
|
"learning_rate": 1.9851681526212716e-05, |
|
"loss": 0.8928, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.0833573338819173, |
|
"grad_norm": 2.37950207279815, |
|
"learning_rate": 1.9851223832423886e-05, |
|
"loss": 0.8617, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 0.08343962147706234, |
|
"grad_norm": 2.464424002675186, |
|
"learning_rate": 1.985076543881987e-05, |
|
"loss": 0.8625, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.08352190907220737, |
|
"grad_norm": 2.9080302916718015, |
|
"learning_rate": 1.985030634543323e-05, |
|
"loss": 0.8832, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.0836041966673524, |
|
"grad_norm": 2.6287476224799655, |
|
"learning_rate": 1.984984655229658e-05, |
|
"loss": 0.8728, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.08368648426249743, |
|
"grad_norm": 2.5936175763493052, |
|
"learning_rate": 1.9849386059442585e-05, |
|
"loss": 0.8678, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.08376877185764246, |
|
"grad_norm": 2.3604963235792904, |
|
"learning_rate": 1.9848924866903955e-05, |
|
"loss": 0.8783, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.0838510594527875, |
|
"grad_norm": 0.5341112663835049, |
|
"learning_rate": 1.984846297471345e-05, |
|
"loss": 0.605, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.08393334704793252, |
|
"grad_norm": 2.9860218730439057, |
|
"learning_rate": 1.984800038290389e-05, |
|
"loss": 0.8525, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.08401563464307756, |
|
"grad_norm": 2.4630212214875025, |
|
"learning_rate": 1.9847537091508134e-05, |
|
"loss": 0.8825, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 0.08409792223822259, |
|
"grad_norm": 2.424908485494412, |
|
"learning_rate": 1.984707310055909e-05, |
|
"loss": 0.891, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.08418020983336762, |
|
"grad_norm": 2.886480910540036, |
|
"learning_rate": 1.984660841008972e-05, |
|
"loss": 0.8935, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.08426249742851265, |
|
"grad_norm": 2.4246756718684384, |
|
"learning_rate": 1.9846143020133035e-05, |
|
"loss": 0.8679, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.08434478502365768, |
|
"grad_norm": 4.020038177987053, |
|
"learning_rate": 1.98456769307221e-05, |
|
"loss": 0.8191, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.08442707261880271, |
|
"grad_norm": 2.6823999549769795, |
|
"learning_rate": 1.9845210141890018e-05, |
|
"loss": 0.8618, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.08450936021394775, |
|
"grad_norm": 2.2350487266641035, |
|
"learning_rate": 1.9844742653669953e-05, |
|
"loss": 0.8595, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.08459164780909278, |
|
"grad_norm": 4.977761117586025, |
|
"learning_rate": 1.9844274466095117e-05, |
|
"loss": 0.8516, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.08467393540423782, |
|
"grad_norm": 3.31805191100729, |
|
"learning_rate": 1.9843805579198766e-05, |
|
"loss": 0.8636, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.08475622299938285, |
|
"grad_norm": 2.5881873279624648, |
|
"learning_rate": 1.9843335993014206e-05, |
|
"loss": 0.8667, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.08483851059452788, |
|
"grad_norm": 3.9560157884462, |
|
"learning_rate": 1.98428657075748e-05, |
|
"loss": 0.8799, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 0.08492079818967291, |
|
"grad_norm": 2.5965271671259753, |
|
"learning_rate": 1.984239472291396e-05, |
|
"loss": 0.8714, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.08500308578481794, |
|
"grad_norm": 2.9384162786300094, |
|
"learning_rate": 1.9841923039065136e-05, |
|
"loss": 0.8784, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 0.08508537337996297, |
|
"grad_norm": 4.575841979886102, |
|
"learning_rate": 1.984145065606184e-05, |
|
"loss": 0.871, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.085167660975108, |
|
"grad_norm": 2.6762798398130205, |
|
"learning_rate": 1.984097757393763e-05, |
|
"loss": 0.8884, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.08524994857025303, |
|
"grad_norm": 2.3317749715867757, |
|
"learning_rate": 1.9840503792726107e-05, |
|
"loss": 0.8582, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.08533223616539806, |
|
"grad_norm": 2.5192408862448925, |
|
"learning_rate": 1.9840029312460936e-05, |
|
"loss": 0.8987, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 0.08541452376054309, |
|
"grad_norm": 3.0314447963476954, |
|
"learning_rate": 1.9839554133175815e-05, |
|
"loss": 0.9115, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.08549681135568812, |
|
"grad_norm": 2.718611923577393, |
|
"learning_rate": 1.983907825490451e-05, |
|
"loss": 0.8768, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.08557909895083317, |
|
"grad_norm": 3.2506331598038063, |
|
"learning_rate": 1.9838601677680818e-05, |
|
"loss": 0.8892, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.0856613865459782, |
|
"grad_norm": 2.8785960552339844, |
|
"learning_rate": 1.9838124401538596e-05, |
|
"loss": 0.8762, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.08574367414112323, |
|
"grad_norm": 3.255205364224761, |
|
"learning_rate": 1.9837646426511755e-05, |
|
"loss": 0.8878, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.08582596173626826, |
|
"grad_norm": 2.152447959926313, |
|
"learning_rate": 1.9837167752634243e-05, |
|
"loss": 0.8939, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 0.08590824933141329, |
|
"grad_norm": 6.038167525170103, |
|
"learning_rate": 1.983668837994006e-05, |
|
"loss": 0.854, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.08599053692655832, |
|
"grad_norm": 2.4872882270608296, |
|
"learning_rate": 1.983620830846327e-05, |
|
"loss": 0.865, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.08607282452170335, |
|
"grad_norm": 5.0878964623293905, |
|
"learning_rate": 1.9835727538237977e-05, |
|
"loss": 0.8848, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.08615511211684838, |
|
"grad_norm": 0.5466809522376739, |
|
"learning_rate": 1.9835246069298325e-05, |
|
"loss": 0.5879, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.08623739971199341, |
|
"grad_norm": 2.8930059060138134, |
|
"learning_rate": 1.9834763901678523e-05, |
|
"loss": 0.9032, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.08631968730713845, |
|
"grad_norm": 3.481150201855255, |
|
"learning_rate": 1.983428103541282e-05, |
|
"loss": 0.895, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 0.08640197490228348, |
|
"grad_norm": 2.2668611618771806, |
|
"learning_rate": 1.983379747053552e-05, |
|
"loss": 0.8841, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.08648426249742851, |
|
"grad_norm": 0.5012767267519984, |
|
"learning_rate": 1.9833313207080976e-05, |
|
"loss": 0.5584, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 0.08656655009257354, |
|
"grad_norm": 4.03230401593853, |
|
"learning_rate": 1.983282824508359e-05, |
|
"loss": 0.8722, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.08664883768771858, |
|
"grad_norm": 3.2238027639613662, |
|
"learning_rate": 1.9832342584577808e-05, |
|
"loss": 0.9061, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 0.08673112528286361, |
|
"grad_norm": 2.5875473888993827, |
|
"learning_rate": 1.9831856225598134e-05, |
|
"loss": 0.8655, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.08681341287800864, |
|
"grad_norm": 2.9531227295823435, |
|
"learning_rate": 1.9831369168179116e-05, |
|
"loss": 0.9014, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.08689570047315368, |
|
"grad_norm": 3.2403950768604273, |
|
"learning_rate": 1.9830881412355356e-05, |
|
"loss": 0.8802, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.0869779880682987, |
|
"grad_norm": 2.6421330385224406, |
|
"learning_rate": 1.9830392958161505e-05, |
|
"loss": 0.8624, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 0.08706027566344374, |
|
"grad_norm": 2.796247945415367, |
|
"learning_rate": 1.9829903805632257e-05, |
|
"loss": 0.8465, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 0.08714256325858877, |
|
"grad_norm": 0.5356691167104551, |
|
"learning_rate": 1.982941395480236e-05, |
|
"loss": 0.5749, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 0.0872248508537338, |
|
"grad_norm": 2.543782162970702, |
|
"learning_rate": 1.9828923405706622e-05, |
|
"loss": 0.8651, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.08730713844887883, |
|
"grad_norm": 5.052374438346327, |
|
"learning_rate": 1.982843215837988e-05, |
|
"loss": 0.8556, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 0.08738942604402386, |
|
"grad_norm": 2.709282429422679, |
|
"learning_rate": 1.9827940212857038e-05, |
|
"loss": 0.8739, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.08747171363916889, |
|
"grad_norm": 12.014153200069254, |
|
"learning_rate": 1.982744756917304e-05, |
|
"loss": 0.8685, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 0.08755400123431392, |
|
"grad_norm": 4.7874082941622875, |
|
"learning_rate": 1.9826954227362883e-05, |
|
"loss": 0.8968, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.08763628882945895, |
|
"grad_norm": 3.094799934600602, |
|
"learning_rate": 1.9826460187461616e-05, |
|
"loss": 0.8678, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.087718576424604, |
|
"grad_norm": 2.2422659009449664, |
|
"learning_rate": 1.982596544950433e-05, |
|
"loss": 0.8764, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 0.08780086401974903, |
|
"grad_norm": 3.436687255418153, |
|
"learning_rate": 1.982547001352617e-05, |
|
"loss": 0.8516, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 0.08788315161489406, |
|
"grad_norm": 0.4947838359746663, |
|
"learning_rate": 1.982497387956234e-05, |
|
"loss": 0.5591, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.08796543921003909, |
|
"grad_norm": 2.6289534390817098, |
|
"learning_rate": 1.9824477047648073e-05, |
|
"loss": 0.8481, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 0.08804772680518412, |
|
"grad_norm": 0.4837575812403313, |
|
"learning_rate": 1.9823979517818672e-05, |
|
"loss": 0.5778, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.08813001440032915, |
|
"grad_norm": 3.538024856422455, |
|
"learning_rate": 1.9823481290109478e-05, |
|
"loss": 0.8619, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 0.08821230199547418, |
|
"grad_norm": 4.321407175482124, |
|
"learning_rate": 1.982298236455588e-05, |
|
"loss": 0.8846, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.08829458959061921, |
|
"grad_norm": 3.616450253072054, |
|
"learning_rate": 1.9822482741193324e-05, |
|
"loss": 0.8856, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 0.08837687718576424, |
|
"grad_norm": 4.473435045577941, |
|
"learning_rate": 1.9821982420057308e-05, |
|
"loss": 0.8608, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.08845916478090927, |
|
"grad_norm": 0.5344599795616546, |
|
"learning_rate": 1.9821481401183364e-05, |
|
"loss": 0.5741, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.0885414523760543, |
|
"grad_norm": 3.608389298386541, |
|
"learning_rate": 1.982097968460709e-05, |
|
"loss": 0.8832, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.08862373997119934, |
|
"grad_norm": 4.223422665021111, |
|
"learning_rate": 1.9820477270364123e-05, |
|
"loss": 0.8854, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 0.08870602756634437, |
|
"grad_norm": 3.236757188788279, |
|
"learning_rate": 1.981997415849016e-05, |
|
"loss": 0.8727, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 0.08878831516148941, |
|
"grad_norm": 0.5297374533084104, |
|
"learning_rate": 1.9819470349020936e-05, |
|
"loss": 0.5883, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 0.08887060275663444, |
|
"grad_norm": 2.8725890412006656, |
|
"learning_rate": 1.9818965841992243e-05, |
|
"loss": 0.8719, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.08895289035177947, |
|
"grad_norm": 0.4917914943060142, |
|
"learning_rate": 1.9818460637439917e-05, |
|
"loss": 0.5497, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 0.0890351779469245, |
|
"grad_norm": 3.666129989863918, |
|
"learning_rate": 1.9817954735399853e-05, |
|
"loss": 0.855, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 0.08911746554206953, |
|
"grad_norm": 3.667558282780085, |
|
"learning_rate": 1.9817448135907984e-05, |
|
"loss": 0.8618, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 0.08919975313721457, |
|
"grad_norm": 2.8134358753083597, |
|
"learning_rate": 1.9816940839000303e-05, |
|
"loss": 0.8639, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.0892820407323596, |
|
"grad_norm": 3.8554001706730907, |
|
"learning_rate": 1.981643284471284e-05, |
|
"loss": 0.8449, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.08936432832750463, |
|
"grad_norm": 3.767364747903415, |
|
"learning_rate": 1.981592415308169e-05, |
|
"loss": 0.8549, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 0.08944661592264966, |
|
"grad_norm": 2.8398571302805453, |
|
"learning_rate": 1.9815414764142986e-05, |
|
"loss": 0.8735, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 0.08952890351779469, |
|
"grad_norm": 2.980261363247237, |
|
"learning_rate": 1.9814904677932912e-05, |
|
"loss": 0.8725, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.08961119111293972, |
|
"grad_norm": 3.7219107197197916, |
|
"learning_rate": 1.9814393894487713e-05, |
|
"loss": 0.9151, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 0.08969347870808475, |
|
"grad_norm": 4.035211371174713, |
|
"learning_rate": 1.981388241384366e-05, |
|
"loss": 0.8825, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.08977576630322978, |
|
"grad_norm": 3.053085785512212, |
|
"learning_rate": 1.9813370236037098e-05, |
|
"loss": 0.8497, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 0.08985805389837483, |
|
"grad_norm": 0.5368604454434628, |
|
"learning_rate": 1.981285736110441e-05, |
|
"loss": 0.5812, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.08994034149351986, |
|
"grad_norm": 4.355844807027429, |
|
"learning_rate": 1.981234378908203e-05, |
|
"loss": 0.8887, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 0.09002262908866489, |
|
"grad_norm": 2.649968557975437, |
|
"learning_rate": 1.9811829520006433e-05, |
|
"loss": 0.8415, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 0.09010491668380992, |
|
"grad_norm": 3.4417587859008214, |
|
"learning_rate": 1.9811314553914166e-05, |
|
"loss": 0.8685, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.09018720427895495, |
|
"grad_norm": 0.48295286929932113, |
|
"learning_rate": 1.98107988908418e-05, |
|
"loss": 0.5608, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.09026949187409998, |
|
"grad_norm": 4.948234702126818, |
|
"learning_rate": 1.981028253082597e-05, |
|
"loss": 0.8638, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 0.09035177946924501, |
|
"grad_norm": 2.8257336957776733, |
|
"learning_rate": 1.9809765473903362e-05, |
|
"loss": 0.8402, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 0.09043406706439004, |
|
"grad_norm": 0.48328014205289604, |
|
"learning_rate": 1.98092477201107e-05, |
|
"loss": 0.5797, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 0.09051635465953507, |
|
"grad_norm": 3.1346349138814418, |
|
"learning_rate": 1.980872926948477e-05, |
|
"loss": 0.8675, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.0905986422546801, |
|
"grad_norm": 2.707381646623277, |
|
"learning_rate": 1.9808210122062396e-05, |
|
"loss": 0.8588, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 0.09068092984982513, |
|
"grad_norm": 0.4754150829561111, |
|
"learning_rate": 1.9807690277880464e-05, |
|
"loss": 0.5962, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 0.09076321744497017, |
|
"grad_norm": 3.2149488041323946, |
|
"learning_rate": 1.98071697369759e-05, |
|
"loss": 0.849, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 0.0908455050401152, |
|
"grad_norm": 3.1468421046064887, |
|
"learning_rate": 1.9806648499385678e-05, |
|
"loss": 0.8525, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.09092779263526024, |
|
"grad_norm": 3.011551334891878, |
|
"learning_rate": 1.9806126565146835e-05, |
|
"loss": 0.862, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.09101008023040527, |
|
"grad_norm": 3.7542041127163235, |
|
"learning_rate": 1.980560393429644e-05, |
|
"loss": 0.878, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 0.0910923678255503, |
|
"grad_norm": 3.924675309445745, |
|
"learning_rate": 1.9805080606871625e-05, |
|
"loss": 0.8932, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 0.09117465542069533, |
|
"grad_norm": 3.149434195229172, |
|
"learning_rate": 1.980455658290956e-05, |
|
"loss": 0.8968, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.09125694301584036, |
|
"grad_norm": 0.4528941005660691, |
|
"learning_rate": 1.9804031862447483e-05, |
|
"loss": 0.5658, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 0.0913392306109854, |
|
"grad_norm": 3.2710296854560688, |
|
"learning_rate": 1.9803506445522658e-05, |
|
"loss": 0.8739, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.09142151820613043, |
|
"grad_norm": 0.48322757491755364, |
|
"learning_rate": 1.9802980332172415e-05, |
|
"loss": 0.592, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 0.09150380580127546, |
|
"grad_norm": 3.600092282955291, |
|
"learning_rate": 1.9802453522434123e-05, |
|
"loss": 0.8524, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.09158609339642049, |
|
"grad_norm": 3.7142303319750773, |
|
"learning_rate": 1.980192601634521e-05, |
|
"loss": 0.8811, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 0.09166838099156552, |
|
"grad_norm": 3.133621188104266, |
|
"learning_rate": 1.9801397813943156e-05, |
|
"loss": 0.8937, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 0.09175066858671055, |
|
"grad_norm": 5.265940334189566, |
|
"learning_rate": 1.980086891526547e-05, |
|
"loss": 0.8761, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.09183295618185558, |
|
"grad_norm": 0.5062751751465183, |
|
"learning_rate": 1.9800339320349732e-05, |
|
"loss": 0.5516, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.09191524377700061, |
|
"grad_norm": 3.772473804543901, |
|
"learning_rate": 1.9799809029233558e-05, |
|
"loss": 0.8375, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 0.09199753137214566, |
|
"grad_norm": 3.8490743801526803, |
|
"learning_rate": 1.9799278041954628e-05, |
|
"loss": 0.877, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.09207981896729069, |
|
"grad_norm": 3.5820410192444174, |
|
"learning_rate": 1.9798746358550656e-05, |
|
"loss": 0.8833, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 0.09216210656243572, |
|
"grad_norm": 8.839295550642253, |
|
"learning_rate": 1.9798213979059412e-05, |
|
"loss": 0.8553, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.09224439415758075, |
|
"grad_norm": 3.7706882959014205, |
|
"learning_rate": 1.979768090351872e-05, |
|
"loss": 0.8564, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 0.09232668175272578, |
|
"grad_norm": 4.312690219016083, |
|
"learning_rate": 1.9797147131966445e-05, |
|
"loss": 0.8605, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.09240896934787081, |
|
"grad_norm": 6.342821693734463, |
|
"learning_rate": 1.9796612664440503e-05, |
|
"loss": 0.8863, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 0.09249125694301584, |
|
"grad_norm": 3.480039566309057, |
|
"learning_rate": 1.979607750097887e-05, |
|
"loss": 0.8676, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.09257354453816087, |
|
"grad_norm": 0.5209974485249531, |
|
"learning_rate": 1.9795541641619552e-05, |
|
"loss": 0.6128, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.0926558321333059, |
|
"grad_norm": 3.0644541451290106, |
|
"learning_rate": 1.9795005086400623e-05, |
|
"loss": 0.8596, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 0.09273811972845093, |
|
"grad_norm": 4.0339545836639585, |
|
"learning_rate": 1.9794467835360198e-05, |
|
"loss": 0.8956, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 0.09282040732359596, |
|
"grad_norm": 3.606396064787203, |
|
"learning_rate": 1.9793929888536443e-05, |
|
"loss": 0.8446, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.092902694918741, |
|
"grad_norm": 3.266963278351553, |
|
"learning_rate": 1.979339124596757e-05, |
|
"loss": 0.8804, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 0.09298498251388602, |
|
"grad_norm": 4.171351560316691, |
|
"learning_rate": 1.9792851907691847e-05, |
|
"loss": 0.8764, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.09306727010903107, |
|
"grad_norm": 3.1333885189366066, |
|
"learning_rate": 1.9792311873747584e-05, |
|
"loss": 0.8882, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 0.0931495577041761, |
|
"grad_norm": 4.115748009743592, |
|
"learning_rate": 1.9791771144173146e-05, |
|
"loss": 0.8693, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.09323184529932113, |
|
"grad_norm": 4.248749716560056, |
|
"learning_rate": 1.9791229719006947e-05, |
|
"loss": 0.866, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 0.09331413289446616, |
|
"grad_norm": 0.5602770220421947, |
|
"learning_rate": 1.979068759828745e-05, |
|
"loss": 0.5729, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 0.09339642048961119, |
|
"grad_norm": 3.208526975104471, |
|
"learning_rate": 1.979014478205316e-05, |
|
"loss": 0.8447, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.09347870808475622, |
|
"grad_norm": 3.837179354794119, |
|
"learning_rate": 1.978960127034264e-05, |
|
"loss": 0.8395, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.09356099567990125, |
|
"grad_norm": 4.22608442690413, |
|
"learning_rate": 1.9789057063194505e-05, |
|
"loss": 0.8345, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 0.09364328327504629, |
|
"grad_norm": 4.512917248957414, |
|
"learning_rate": 1.978851216064741e-05, |
|
"loss": 0.8755, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 0.09372557087019132, |
|
"grad_norm": 4.485181370046995, |
|
"learning_rate": 1.978796656274007e-05, |
|
"loss": 0.9001, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 0.09380785846533635, |
|
"grad_norm": 4.311526149543538, |
|
"learning_rate": 1.978742026951123e-05, |
|
"loss": 0.8147, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.09389014606048138, |
|
"grad_norm": 3.400869370992463, |
|
"learning_rate": 1.9786873280999716e-05, |
|
"loss": 0.8458, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 0.09397243365562641, |
|
"grad_norm": 3.484007931145798, |
|
"learning_rate": 1.978632559724437e-05, |
|
"loss": 0.8396, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 0.09405472125077145, |
|
"grad_norm": 5.974225023368629, |
|
"learning_rate": 1.9785777218284107e-05, |
|
"loss": 0.8544, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 0.09413700884591648, |
|
"grad_norm": 4.758176933846711, |
|
"learning_rate": 1.978522814415788e-05, |
|
"loss": 0.8738, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 0.09421929644106151, |
|
"grad_norm": 4.054376339470337, |
|
"learning_rate": 1.9784678374904694e-05, |
|
"loss": 0.8647, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.09430158403620655, |
|
"grad_norm": 3.254256033254886, |
|
"learning_rate": 1.9784127910563606e-05, |
|
"loss": 0.8353, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 0.09438387163135158, |
|
"grad_norm": 0.5816738083728531, |
|
"learning_rate": 1.978357675117372e-05, |
|
"loss": 0.5812, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 0.09446615922649661, |
|
"grad_norm": 0.49793035339456754, |
|
"learning_rate": 1.9783024896774187e-05, |
|
"loss": 0.5791, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 0.09454844682164164, |
|
"grad_norm": 4.179537892792988, |
|
"learning_rate": 1.9782472347404206e-05, |
|
"loss": 0.8907, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 0.09463073441678667, |
|
"grad_norm": 4.067029184300302, |
|
"learning_rate": 1.978191910310304e-05, |
|
"loss": 0.8541, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.0947130220119317, |
|
"grad_norm": 4.248345665782451, |
|
"learning_rate": 1.9781365163909984e-05, |
|
"loss": 0.8632, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 0.09479530960707673, |
|
"grad_norm": 6.439138971096778, |
|
"learning_rate": 1.978081052986439e-05, |
|
"loss": 0.8629, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.09487759720222176, |
|
"grad_norm": 6.71298685938902, |
|
"learning_rate": 1.9780255201005656e-05, |
|
"loss": 0.8549, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 0.09495988479736679, |
|
"grad_norm": 3.967437431624442, |
|
"learning_rate": 1.9779699177373236e-05, |
|
"loss": 0.8732, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 0.09504217239251182, |
|
"grad_norm": 0.8392360999561069, |
|
"learning_rate": 1.9779142459006626e-05, |
|
"loss": 0.5872, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.09512445998765687, |
|
"grad_norm": 4.657178845971167, |
|
"learning_rate": 1.9778585045945374e-05, |
|
"loss": 0.8495, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 0.0952067475828019, |
|
"grad_norm": 4.123727952348605, |
|
"learning_rate": 1.977802693822908e-05, |
|
"loss": 0.9142, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 0.09528903517794693, |
|
"grad_norm": 0.5860758553236142, |
|
"learning_rate": 1.9777468135897387e-05, |
|
"loss": 0.5549, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 0.09537132277309196, |
|
"grad_norm": 0.5401053295003246, |
|
"learning_rate": 1.9776908638989996e-05, |
|
"loss": 0.5801, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 0.09545361036823699, |
|
"grad_norm": 0.5496816005625466, |
|
"learning_rate": 1.9776348447546653e-05, |
|
"loss": 0.5839, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.09553589796338202, |
|
"grad_norm": 6.020685438337091, |
|
"learning_rate": 1.977578756160715e-05, |
|
"loss": 0.866, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.09561818555852705, |
|
"grad_norm": 2.792057637957128, |
|
"learning_rate": 1.9775225981211333e-05, |
|
"loss": 0.8638, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 0.09570047315367208, |
|
"grad_norm": 0.5553177375677683, |
|
"learning_rate": 1.9774663706399092e-05, |
|
"loss": 0.5612, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 0.09578276074881711, |
|
"grad_norm": 5.245834669495098, |
|
"learning_rate": 1.9774100737210376e-05, |
|
"loss": 0.8688, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 0.09586504834396214, |
|
"grad_norm": 3.5768926302294344, |
|
"learning_rate": 1.977353707368518e-05, |
|
"loss": 0.897, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.09594733593910718, |
|
"grad_norm": 3.381007087662086, |
|
"learning_rate": 1.9772972715863534e-05, |
|
"loss": 0.8956, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 0.0960296235342522, |
|
"grad_norm": 4.24711216964703, |
|
"learning_rate": 1.9772407663785538e-05, |
|
"loss": 0.8546, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 0.09611191112939724, |
|
"grad_norm": 0.5978826180005935, |
|
"learning_rate": 1.977184191749133e-05, |
|
"loss": 0.5658, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 0.09619419872454228, |
|
"grad_norm": 5.6864731543708285, |
|
"learning_rate": 1.9771275477021102e-05, |
|
"loss": 0.8573, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 0.09627648631968731, |
|
"grad_norm": 0.5306016735606011, |
|
"learning_rate": 1.9770708342415087e-05, |
|
"loss": 0.5443, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.09635877391483234, |
|
"grad_norm": 3.4108513712835733, |
|
"learning_rate": 1.9770140513713582e-05, |
|
"loss": 0.9162, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 0.09644106150997737, |
|
"grad_norm": 3.0240876250486775, |
|
"learning_rate": 1.976957199095692e-05, |
|
"loss": 0.8959, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 0.0965233491051224, |
|
"grad_norm": 4.329264160111276, |
|
"learning_rate": 1.9769002774185483e-05, |
|
"loss": 0.8581, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 0.09660563670026744, |
|
"grad_norm": 2.8538371301611045, |
|
"learning_rate": 1.9768432863439714e-05, |
|
"loss": 0.8472, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 0.09668792429541247, |
|
"grad_norm": 4.192529144078922, |
|
"learning_rate": 1.97678622587601e-05, |
|
"loss": 0.8697, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.0967702118905575, |
|
"grad_norm": 3.729038589656874, |
|
"learning_rate": 1.976729096018717e-05, |
|
"loss": 0.8319, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.09685249948570253, |
|
"grad_norm": 0.6437788103093597, |
|
"learning_rate": 1.976671896776151e-05, |
|
"loss": 0.5736, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 0.09693478708084756, |
|
"grad_norm": 3.9035454070115017, |
|
"learning_rate": 1.9766146281523753e-05, |
|
"loss": 0.8874, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 0.09701707467599259, |
|
"grad_norm": 3.819713897204886, |
|
"learning_rate": 1.9765572901514583e-05, |
|
"loss": 0.8422, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 0.09709936227113762, |
|
"grad_norm": 5.277006488684462, |
|
"learning_rate": 1.9764998827774734e-05, |
|
"loss": 0.8849, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.09718164986628265, |
|
"grad_norm": 5.189466257849834, |
|
"learning_rate": 1.9764424060344988e-05, |
|
"loss": 0.8612, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 0.0972639374614277, |
|
"grad_norm": 3.4415909778873743, |
|
"learning_rate": 1.9763848599266168e-05, |
|
"loss": 0.8649, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 0.09734622505657273, |
|
"grad_norm": 3.5762421871051, |
|
"learning_rate": 1.976327244457916e-05, |
|
"loss": 0.8643, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 0.09742851265171776, |
|
"grad_norm": 2.9475630534612116, |
|
"learning_rate": 1.976269559632489e-05, |
|
"loss": 0.8756, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.09751080024686279, |
|
"grad_norm": 2.865959286407617, |
|
"learning_rate": 1.976211805454434e-05, |
|
"loss": 0.8317, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.09759308784200782, |
|
"grad_norm": 0.5278838170529865, |
|
"learning_rate": 1.976153981927853e-05, |
|
"loss": 0.5707, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 0.09767537543715285, |
|
"grad_norm": 0.5151202226322995, |
|
"learning_rate": 1.976096089056855e-05, |
|
"loss": 0.5589, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 0.09775766303229788, |
|
"grad_norm": 5.474549135950859, |
|
"learning_rate": 1.9760381268455515e-05, |
|
"loss": 0.8707, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 0.09783995062744291, |
|
"grad_norm": 2.886942130305931, |
|
"learning_rate": 1.9759800952980604e-05, |
|
"loss": 0.8764, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 0.09792223822258794, |
|
"grad_norm": 3.5448856849038015, |
|
"learning_rate": 1.9759219944185045e-05, |
|
"loss": 0.8546, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.09800452581773297, |
|
"grad_norm": 2.3163053463145022, |
|
"learning_rate": 1.9758638242110105e-05, |
|
"loss": 0.827, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 0.098086813412878, |
|
"grad_norm": 3.2678753876711903, |
|
"learning_rate": 1.9758055846797113e-05, |
|
"loss": 0.8456, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 0.09816910100802304, |
|
"grad_norm": 4.046087494412628, |
|
"learning_rate": 1.9757472758287437e-05, |
|
"loss": 0.8565, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 0.09825138860316807, |
|
"grad_norm": 5.312871548189173, |
|
"learning_rate": 1.9756888976622504e-05, |
|
"loss": 0.8316, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 0.09833367619831311, |
|
"grad_norm": 3.5965506794172035, |
|
"learning_rate": 1.9756304501843782e-05, |
|
"loss": 0.8479, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.09841596379345814, |
|
"grad_norm": 4.869038156703397, |
|
"learning_rate": 1.975571933399279e-05, |
|
"loss": 0.8957, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 0.09849825138860317, |
|
"grad_norm": 5.073504198475643, |
|
"learning_rate": 1.9755133473111097e-05, |
|
"loss": 0.8748, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 0.0985805389837482, |
|
"grad_norm": 4.129896753535656, |
|
"learning_rate": 1.9754546919240325e-05, |
|
"loss": 0.8624, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 0.09866282657889323, |
|
"grad_norm": 0.75499109894716, |
|
"learning_rate": 1.975395967242214e-05, |
|
"loss": 0.5753, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 0.09874511417403826, |
|
"grad_norm": 4.926214741317277, |
|
"learning_rate": 1.9753371732698255e-05, |
|
"loss": 0.8514, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.0988274017691833, |
|
"grad_norm": 4.113995566064139, |
|
"learning_rate": 1.9752783100110443e-05, |
|
"loss": 0.8735, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 0.09890968936432833, |
|
"grad_norm": 0.5883860438611207, |
|
"learning_rate": 1.975219377470052e-05, |
|
"loss": 0.6035, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 0.09899197695947336, |
|
"grad_norm": 3.3466076308514863, |
|
"learning_rate": 1.9751603756510344e-05, |
|
"loss": 0.8769, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 0.09907426455461839, |
|
"grad_norm": 0.47595350765066086, |
|
"learning_rate": 1.9751013045581835e-05, |
|
"loss": 0.5663, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.09915655214976342, |
|
"grad_norm": 3.4049170080353615, |
|
"learning_rate": 1.975042164195695e-05, |
|
"loss": 0.8363, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.09923883974490845, |
|
"grad_norm": 3.7661200169302327, |
|
"learning_rate": 1.974982954567771e-05, |
|
"loss": 0.8437, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 0.09932112734005348, |
|
"grad_norm": 3.6094210284619286, |
|
"learning_rate": 1.9749236756786167e-05, |
|
"loss": 0.861, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 0.09940341493519853, |
|
"grad_norm": 3.145969814243711, |
|
"learning_rate": 1.9748643275324438e-05, |
|
"loss": 0.8454, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 0.09948570253034356, |
|
"grad_norm": 3.6067880218861568, |
|
"learning_rate": 1.9748049101334684e-05, |
|
"loss": 0.8682, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 0.09956799012548859, |
|
"grad_norm": 3.0185050449291984, |
|
"learning_rate": 1.974745423485911e-05, |
|
"loss": 0.8708, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.09965027772063362, |
|
"grad_norm": 3.128449103884966, |
|
"learning_rate": 1.9746858675939974e-05, |
|
"loss": 0.8594, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 0.09973256531577865, |
|
"grad_norm": 0.6028578588325906, |
|
"learning_rate": 1.9746262424619585e-05, |
|
"loss": 0.6006, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 0.09981485291092368, |
|
"grad_norm": 0.5378805528352323, |
|
"learning_rate": 1.9745665480940304e-05, |
|
"loss": 0.5702, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 0.09989714050606871, |
|
"grad_norm": 2.9709104250769025, |
|
"learning_rate": 1.974506784494453e-05, |
|
"loss": 0.8769, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 0.09997942810121374, |
|
"grad_norm": 3.5710834059738983, |
|
"learning_rate": 1.974446951667472e-05, |
|
"loss": 0.8524, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.10006171569635877, |
|
"grad_norm": 3.564453597862319, |
|
"learning_rate": 1.9743870496173385e-05, |
|
"loss": 0.8602, |
|
"step": 1216 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 12152, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 608, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2834347550703616.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|