|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 1221, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002457002457002457, |
|
"grad_norm": 4.019534111022949, |
|
"learning_rate": 5.405405405405406e-07, |
|
"loss": 1.1042, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004914004914004914, |
|
"grad_norm": 3.4502384662628174, |
|
"learning_rate": 1.0810810810810812e-06, |
|
"loss": 1.1217, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.007371007371007371, |
|
"grad_norm": 3.351306438446045, |
|
"learning_rate": 1.6216216216216219e-06, |
|
"loss": 1.0932, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.009828009828009828, |
|
"grad_norm": 3.862949848175049, |
|
"learning_rate": 2.1621621621621623e-06, |
|
"loss": 1.0964, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.012285012285012284, |
|
"grad_norm": 3.430417776107788, |
|
"learning_rate": 2.702702702702703e-06, |
|
"loss": 1.1157, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.014742014742014743, |
|
"grad_norm": 3.059828758239746, |
|
"learning_rate": 3.2432432432432437e-06, |
|
"loss": 1.1316, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0171990171990172, |
|
"grad_norm": 2.8164730072021484, |
|
"learning_rate": 3.7837837837837844e-06, |
|
"loss": 1.1034, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.019656019656019656, |
|
"grad_norm": 2.527447462081909, |
|
"learning_rate": 4.324324324324325e-06, |
|
"loss": 1.0492, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.022113022113022112, |
|
"grad_norm": 2.58087158203125, |
|
"learning_rate": 4.864864864864866e-06, |
|
"loss": 1.0547, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02457002457002457, |
|
"grad_norm": 2.0490829944610596, |
|
"learning_rate": 5.405405405405406e-06, |
|
"loss": 1.0195, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02702702702702703, |
|
"grad_norm": 1.5082110166549683, |
|
"learning_rate": 5.945945945945947e-06, |
|
"loss": 0.9986, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.029484029484029485, |
|
"grad_norm": 1.6830556392669678, |
|
"learning_rate": 6.486486486486487e-06, |
|
"loss": 0.9964, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03194103194103194, |
|
"grad_norm": 1.7410277128219604, |
|
"learning_rate": 7.027027027027028e-06, |
|
"loss": 0.9768, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0343980343980344, |
|
"grad_norm": 1.6225413084030151, |
|
"learning_rate": 7.567567567567569e-06, |
|
"loss": 0.9261, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.036855036855036855, |
|
"grad_norm": 1.786592960357666, |
|
"learning_rate": 8.108108108108109e-06, |
|
"loss": 1.0012, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03931203931203931, |
|
"grad_norm": 1.6199265718460083, |
|
"learning_rate": 8.64864864864865e-06, |
|
"loss": 1.0118, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04176904176904177, |
|
"grad_norm": 1.5307223796844482, |
|
"learning_rate": 9.189189189189191e-06, |
|
"loss": 1.0147, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.044226044226044224, |
|
"grad_norm": 1.498974084854126, |
|
"learning_rate": 9.729729729729732e-06, |
|
"loss": 0.9955, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04668304668304668, |
|
"grad_norm": 1.3995916843414307, |
|
"learning_rate": 1.027027027027027e-05, |
|
"loss": 0.9256, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04914004914004914, |
|
"grad_norm": 1.4726876020431519, |
|
"learning_rate": 1.0810810810810812e-05, |
|
"loss": 0.974, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.051597051597051594, |
|
"grad_norm": 1.3012052774429321, |
|
"learning_rate": 1.1351351351351352e-05, |
|
"loss": 0.9271, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.05405405405405406, |
|
"grad_norm": 1.274909257888794, |
|
"learning_rate": 1.1891891891891894e-05, |
|
"loss": 0.9561, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.056511056511056514, |
|
"grad_norm": 1.120010495185852, |
|
"learning_rate": 1.2432432432432433e-05, |
|
"loss": 0.939, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.05896805896805897, |
|
"grad_norm": 1.1698671579360962, |
|
"learning_rate": 1.2972972972972975e-05, |
|
"loss": 1.001, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.06142506142506143, |
|
"grad_norm": 1.1809970140457153, |
|
"learning_rate": 1.3513513513513515e-05, |
|
"loss": 0.9752, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06388206388206388, |
|
"grad_norm": 1.2462128400802612, |
|
"learning_rate": 1.4054054054054055e-05, |
|
"loss": 0.922, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.06633906633906633, |
|
"grad_norm": 1.2176518440246582, |
|
"learning_rate": 1.4594594594594596e-05, |
|
"loss": 1.0068, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0687960687960688, |
|
"grad_norm": 1.1821041107177734, |
|
"learning_rate": 1.5135135135135138e-05, |
|
"loss": 0.9086, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.07125307125307126, |
|
"grad_norm": 1.2778708934783936, |
|
"learning_rate": 1.5675675675675676e-05, |
|
"loss": 0.9738, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.07371007371007371, |
|
"grad_norm": 1.3080499172210693, |
|
"learning_rate": 1.6216216216216218e-05, |
|
"loss": 0.9731, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07616707616707617, |
|
"grad_norm": 1.1985275745391846, |
|
"learning_rate": 1.6756756756756757e-05, |
|
"loss": 0.9423, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.07862407862407862, |
|
"grad_norm": 1.2243951559066772, |
|
"learning_rate": 1.72972972972973e-05, |
|
"loss": 0.9437, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.08108108108108109, |
|
"grad_norm": 1.4167068004608154, |
|
"learning_rate": 1.783783783783784e-05, |
|
"loss": 0.9431, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.08353808353808354, |
|
"grad_norm": 1.3067026138305664, |
|
"learning_rate": 1.8378378378378383e-05, |
|
"loss": 0.9741, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.085995085995086, |
|
"grad_norm": 1.2219585180282593, |
|
"learning_rate": 1.891891891891892e-05, |
|
"loss": 0.9488, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08845208845208845, |
|
"grad_norm": 1.4997390508651733, |
|
"learning_rate": 1.9459459459459463e-05, |
|
"loss": 0.9776, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.09090909090909091, |
|
"grad_norm": 1.211965560913086, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9315, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.09336609336609336, |
|
"grad_norm": 1.2097049951553345, |
|
"learning_rate": 1.9999964798101195e-05, |
|
"loss": 1.0067, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.09582309582309582, |
|
"grad_norm": 1.3059693574905396, |
|
"learning_rate": 1.9999859192652612e-05, |
|
"loss": 0.9853, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.09828009828009827, |
|
"grad_norm": 1.125588297843933, |
|
"learning_rate": 1.9999683184397752e-05, |
|
"loss": 0.8909, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.10073710073710074, |
|
"grad_norm": 1.353481650352478, |
|
"learning_rate": 1.9999436774575783e-05, |
|
"loss": 0.9858, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.10319410319410319, |
|
"grad_norm": 1.2949520349502563, |
|
"learning_rate": 1.999911996492152e-05, |
|
"loss": 0.9285, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.10565110565110565, |
|
"grad_norm": 1.2486616373062134, |
|
"learning_rate": 1.9998732757665428e-05, |
|
"loss": 0.982, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.10810810810810811, |
|
"grad_norm": 1.268140435218811, |
|
"learning_rate": 1.9998275155533585e-05, |
|
"loss": 0.9903, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.11056511056511056, |
|
"grad_norm": 1.1526011228561401, |
|
"learning_rate": 1.9997747161747696e-05, |
|
"loss": 0.9467, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.11302211302211303, |
|
"grad_norm": 1.2035915851593018, |
|
"learning_rate": 1.9997148780025028e-05, |
|
"loss": 0.9644, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.11547911547911548, |
|
"grad_norm": 1.179955005645752, |
|
"learning_rate": 1.9996480014578422e-05, |
|
"loss": 0.9554, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.11793611793611794, |
|
"grad_norm": 1.2358040809631348, |
|
"learning_rate": 1.9995740870116233e-05, |
|
"loss": 1.0244, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.12039312039312039, |
|
"grad_norm": 1.2099424600601196, |
|
"learning_rate": 1.9994931351842327e-05, |
|
"loss": 0.9799, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.12285012285012285, |
|
"grad_norm": 1.2106077671051025, |
|
"learning_rate": 1.9994051465456014e-05, |
|
"loss": 0.9805, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12530712530712532, |
|
"grad_norm": 1.1614030599594116, |
|
"learning_rate": 1.999310121715203e-05, |
|
"loss": 0.9502, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.12776412776412777, |
|
"grad_norm": 1.3291958570480347, |
|
"learning_rate": 1.9992080613620486e-05, |
|
"loss": 0.9936, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.13022113022113022, |
|
"grad_norm": 1.2553635835647583, |
|
"learning_rate": 1.999098966204682e-05, |
|
"loss": 0.9781, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.13267813267813267, |
|
"grad_norm": 1.2526620626449585, |
|
"learning_rate": 1.9989828370111737e-05, |
|
"loss": 0.9853, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.13513513513513514, |
|
"grad_norm": 1.15430748462677, |
|
"learning_rate": 1.998859674599118e-05, |
|
"loss": 0.9315, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1375921375921376, |
|
"grad_norm": 1.278533935546875, |
|
"learning_rate": 1.998729479835625e-05, |
|
"loss": 0.9431, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.14004914004914004, |
|
"grad_norm": 1.1361323595046997, |
|
"learning_rate": 1.998592253637315e-05, |
|
"loss": 0.9736, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.14250614250614252, |
|
"grad_norm": 1.0846306085586548, |
|
"learning_rate": 1.998447996970313e-05, |
|
"loss": 0.9886, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.14496314496314497, |
|
"grad_norm": 1.1292674541473389, |
|
"learning_rate": 1.99829671085024e-05, |
|
"loss": 0.9776, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.14742014742014742, |
|
"grad_norm": 1.276936650276184, |
|
"learning_rate": 1.9981383963422086e-05, |
|
"loss": 1.036, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14987714987714987, |
|
"grad_norm": 1.232346534729004, |
|
"learning_rate": 1.9979730545608128e-05, |
|
"loss": 0.9879, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.15233415233415235, |
|
"grad_norm": 1.2383723258972168, |
|
"learning_rate": 1.9978006866701212e-05, |
|
"loss": 1.0056, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.1547911547911548, |
|
"grad_norm": 1.160910725593567, |
|
"learning_rate": 1.9976212938836692e-05, |
|
"loss": 0.9159, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.15724815724815724, |
|
"grad_norm": 1.2346571683883667, |
|
"learning_rate": 1.9974348774644503e-05, |
|
"loss": 0.9623, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.1597051597051597, |
|
"grad_norm": 1.127328634262085, |
|
"learning_rate": 1.9972414387249074e-05, |
|
"loss": 0.9398, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.16216216216216217, |
|
"grad_norm": 1.113250732421875, |
|
"learning_rate": 1.9970409790269216e-05, |
|
"loss": 1.0158, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.16461916461916462, |
|
"grad_norm": 1.197217345237732, |
|
"learning_rate": 1.9968334997818062e-05, |
|
"loss": 0.9438, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.16707616707616707, |
|
"grad_norm": 1.1856095790863037, |
|
"learning_rate": 1.996619002450294e-05, |
|
"loss": 0.9531, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.16953316953316952, |
|
"grad_norm": 1.1724814176559448, |
|
"learning_rate": 1.9963974885425267e-05, |
|
"loss": 0.9764, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.171990171990172, |
|
"grad_norm": 1.1636273860931396, |
|
"learning_rate": 1.996168959618047e-05, |
|
"loss": 0.9479, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17444717444717445, |
|
"grad_norm": 1.2033659219741821, |
|
"learning_rate": 1.9959334172857852e-05, |
|
"loss": 0.9896, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.1769041769041769, |
|
"grad_norm": 1.2620456218719482, |
|
"learning_rate": 1.9956908632040492e-05, |
|
"loss": 0.9773, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.17936117936117937, |
|
"grad_norm": 1.1564908027648926, |
|
"learning_rate": 1.9954412990805107e-05, |
|
"loss": 0.9545, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.18181818181818182, |
|
"grad_norm": 1.0925333499908447, |
|
"learning_rate": 1.995184726672197e-05, |
|
"loss": 0.9443, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.18427518427518427, |
|
"grad_norm": 1.0715115070343018, |
|
"learning_rate": 1.9949211477854747e-05, |
|
"loss": 0.9111, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.18673218673218672, |
|
"grad_norm": 1.1949642896652222, |
|
"learning_rate": 1.9946505642760398e-05, |
|
"loss": 0.988, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.1891891891891892, |
|
"grad_norm": 1.209015130996704, |
|
"learning_rate": 1.994372978048903e-05, |
|
"loss": 0.9326, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.19164619164619165, |
|
"grad_norm": 1.2035118341445923, |
|
"learning_rate": 1.9940883910583757e-05, |
|
"loss": 0.9723, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.1941031941031941, |
|
"grad_norm": 1.2777043581008911, |
|
"learning_rate": 1.993796805308059e-05, |
|
"loss": 0.9982, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.19656019656019655, |
|
"grad_norm": 1.1283223628997803, |
|
"learning_rate": 1.9934982228508278e-05, |
|
"loss": 0.9934, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19901719901719903, |
|
"grad_norm": 1.2025024890899658, |
|
"learning_rate": 1.9931926457888155e-05, |
|
"loss": 0.9628, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.20147420147420148, |
|
"grad_norm": 1.0961697101593018, |
|
"learning_rate": 1.9928800762734007e-05, |
|
"loss": 0.9896, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.20393120393120392, |
|
"grad_norm": 1.0925512313842773, |
|
"learning_rate": 1.9925605165051917e-05, |
|
"loss": 0.9538, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.20638820638820637, |
|
"grad_norm": 1.1571182012557983, |
|
"learning_rate": 1.9922339687340102e-05, |
|
"loss": 0.9515, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.20884520884520885, |
|
"grad_norm": 1.1376878023147583, |
|
"learning_rate": 1.9919004352588768e-05, |
|
"loss": 0.9172, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.2113022113022113, |
|
"grad_norm": 1.1190766096115112, |
|
"learning_rate": 1.9915599184279943e-05, |
|
"loss": 0.9436, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.21375921375921375, |
|
"grad_norm": 1.1635316610336304, |
|
"learning_rate": 1.9912124206387297e-05, |
|
"loss": 0.9493, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.21621621621621623, |
|
"grad_norm": 1.1531853675842285, |
|
"learning_rate": 1.9908579443375995e-05, |
|
"loss": 0.972, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.21867321867321868, |
|
"grad_norm": 1.0905935764312744, |
|
"learning_rate": 1.990496492020252e-05, |
|
"loss": 0.9686, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.22113022113022113, |
|
"grad_norm": 1.1130902767181396, |
|
"learning_rate": 1.9901280662314483e-05, |
|
"loss": 0.9579, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.22358722358722358, |
|
"grad_norm": 1.2269314527511597, |
|
"learning_rate": 1.9897526695650458e-05, |
|
"loss": 1.0352, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.22604422604422605, |
|
"grad_norm": 1.1804312467575073, |
|
"learning_rate": 1.9893703046639806e-05, |
|
"loss": 0.9444, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.2285012285012285, |
|
"grad_norm": 1.222614049911499, |
|
"learning_rate": 1.9889809742202454e-05, |
|
"loss": 0.9486, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.23095823095823095, |
|
"grad_norm": 1.2290183305740356, |
|
"learning_rate": 1.9885846809748754e-05, |
|
"loss": 1.0318, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.2334152334152334, |
|
"grad_norm": 1.0695152282714844, |
|
"learning_rate": 1.9881814277179248e-05, |
|
"loss": 1.0019, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.23587223587223588, |
|
"grad_norm": 1.105061650276184, |
|
"learning_rate": 1.9877712172884504e-05, |
|
"loss": 0.983, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.23832923832923833, |
|
"grad_norm": 1.1585967540740967, |
|
"learning_rate": 1.9873540525744888e-05, |
|
"loss": 0.9417, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.24078624078624078, |
|
"grad_norm": 1.1335004568099976, |
|
"learning_rate": 1.9869299365130384e-05, |
|
"loss": 0.952, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.24324324324324326, |
|
"grad_norm": 1.181249737739563, |
|
"learning_rate": 1.9864988720900367e-05, |
|
"loss": 0.9524, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.2457002457002457, |
|
"grad_norm": 1.2462314367294312, |
|
"learning_rate": 1.986060862340342e-05, |
|
"loss": 0.9499, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.24815724815724816, |
|
"grad_norm": 1.15605628490448, |
|
"learning_rate": 1.9856159103477085e-05, |
|
"loss": 0.9569, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.25061425061425063, |
|
"grad_norm": 1.0611865520477295, |
|
"learning_rate": 1.9851640192447675e-05, |
|
"loss": 0.8781, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.25307125307125306, |
|
"grad_norm": 1.0584359169006348, |
|
"learning_rate": 1.984705192213004e-05, |
|
"loss": 0.9487, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.25552825552825553, |
|
"grad_norm": 1.1824793815612793, |
|
"learning_rate": 1.9842394324827342e-05, |
|
"loss": 0.97, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.257985257985258, |
|
"grad_norm": 1.1818736791610718, |
|
"learning_rate": 1.983766743333084e-05, |
|
"loss": 0.9366, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.26044226044226043, |
|
"grad_norm": 1.1190263032913208, |
|
"learning_rate": 1.9832871280919638e-05, |
|
"loss": 0.91, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.2628992628992629, |
|
"grad_norm": 1.2692769765853882, |
|
"learning_rate": 1.9828005901360476e-05, |
|
"loss": 1.0086, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.26535626535626533, |
|
"grad_norm": 1.1339046955108643, |
|
"learning_rate": 1.982307132890747e-05, |
|
"loss": 0.9537, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.2678132678132678, |
|
"grad_norm": 1.2185297012329102, |
|
"learning_rate": 1.9818067598301894e-05, |
|
"loss": 0.9532, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.2702702702702703, |
|
"grad_norm": 1.0758349895477295, |
|
"learning_rate": 1.9812994744771898e-05, |
|
"loss": 0.9056, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2727272727272727, |
|
"grad_norm": 1.1684318780899048, |
|
"learning_rate": 1.9807852804032306e-05, |
|
"loss": 0.9466, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.2751842751842752, |
|
"grad_norm": 1.2133638858795166, |
|
"learning_rate": 1.980264181228433e-05, |
|
"loss": 1.0147, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.27764127764127766, |
|
"grad_norm": 1.2245734930038452, |
|
"learning_rate": 1.9797361806215335e-05, |
|
"loss": 0.9824, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.2800982800982801, |
|
"grad_norm": 1.23886239528656, |
|
"learning_rate": 1.979201282299856e-05, |
|
"loss": 0.9882, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.28255528255528256, |
|
"grad_norm": 1.0840333700180054, |
|
"learning_rate": 1.978659490029289e-05, |
|
"loss": 0.952, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.28501228501228504, |
|
"grad_norm": 1.2387017011642456, |
|
"learning_rate": 1.9781108076242548e-05, |
|
"loss": 0.9989, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.28746928746928746, |
|
"grad_norm": 1.1117432117462158, |
|
"learning_rate": 1.9775552389476865e-05, |
|
"loss": 0.9092, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.28992628992628994, |
|
"grad_norm": 1.1947935819625854, |
|
"learning_rate": 1.9769927879109982e-05, |
|
"loss": 0.9392, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.29238329238329236, |
|
"grad_norm": 1.2482883930206299, |
|
"learning_rate": 1.9764234584740592e-05, |
|
"loss": 0.9591, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.29484029484029484, |
|
"grad_norm": 1.0863734483718872, |
|
"learning_rate": 1.9758472546451645e-05, |
|
"loss": 0.9346, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2972972972972973, |
|
"grad_norm": 1.2650574445724487, |
|
"learning_rate": 1.9752641804810083e-05, |
|
"loss": 1.009, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.29975429975429974, |
|
"grad_norm": 1.0649502277374268, |
|
"learning_rate": 1.974674240086654e-05, |
|
"loss": 0.9568, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.3022113022113022, |
|
"grad_norm": 1.0599509477615356, |
|
"learning_rate": 1.974077437615506e-05, |
|
"loss": 0.9663, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.3046683046683047, |
|
"grad_norm": 1.253313660621643, |
|
"learning_rate": 1.97347377726928e-05, |
|
"loss": 0.9606, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.3071253071253071, |
|
"grad_norm": 1.143733263015747, |
|
"learning_rate": 1.9728632632979746e-05, |
|
"loss": 0.9633, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.3095823095823096, |
|
"grad_norm": 1.154212236404419, |
|
"learning_rate": 1.9722458999998398e-05, |
|
"loss": 1.0065, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.31203931203931207, |
|
"grad_norm": 1.0114328861236572, |
|
"learning_rate": 1.971621691721348e-05, |
|
"loss": 0.9483, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.3144963144963145, |
|
"grad_norm": 1.1100865602493286, |
|
"learning_rate": 1.9709906428571616e-05, |
|
"loss": 0.9425, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.31695331695331697, |
|
"grad_norm": 1.141754388809204, |
|
"learning_rate": 1.9703527578501052e-05, |
|
"loss": 0.9952, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.3194103194103194, |
|
"grad_norm": 1.172715187072754, |
|
"learning_rate": 1.9697080411911313e-05, |
|
"loss": 1.0016, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.32186732186732187, |
|
"grad_norm": 1.298166036605835, |
|
"learning_rate": 1.9690564974192893e-05, |
|
"loss": 0.9802, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.32432432432432434, |
|
"grad_norm": 1.15988028049469, |
|
"learning_rate": 1.968398131121696e-05, |
|
"loss": 0.9493, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.32678132678132676, |
|
"grad_norm": 1.1662455797195435, |
|
"learning_rate": 1.967732946933499e-05, |
|
"loss": 0.9291, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.32923832923832924, |
|
"grad_norm": 1.1036912202835083, |
|
"learning_rate": 1.9670609495378484e-05, |
|
"loss": 1.0128, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.3316953316953317, |
|
"grad_norm": 1.0609047412872314, |
|
"learning_rate": 1.9663821436658607e-05, |
|
"loss": 0.9378, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.33415233415233414, |
|
"grad_norm": 1.1160823106765747, |
|
"learning_rate": 1.9656965340965872e-05, |
|
"loss": 0.9166, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.3366093366093366, |
|
"grad_norm": 1.2418513298034668, |
|
"learning_rate": 1.9650041256569792e-05, |
|
"loss": 0.9673, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.33906633906633904, |
|
"grad_norm": 1.1111809015274048, |
|
"learning_rate": 1.9643049232218554e-05, |
|
"loss": 0.959, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.3415233415233415, |
|
"grad_norm": 1.1744064092636108, |
|
"learning_rate": 1.9635989317138666e-05, |
|
"loss": 0.957, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.343980343980344, |
|
"grad_norm": 1.1628965139389038, |
|
"learning_rate": 1.962886156103461e-05, |
|
"loss": 0.9292, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3464373464373464, |
|
"grad_norm": 1.0930556058883667, |
|
"learning_rate": 1.9621666014088495e-05, |
|
"loss": 0.9468, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.3488943488943489, |
|
"grad_norm": 1.2241873741149902, |
|
"learning_rate": 1.9614402726959703e-05, |
|
"loss": 0.9632, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.35135135135135137, |
|
"grad_norm": 1.1155918836593628, |
|
"learning_rate": 1.960707175078454e-05, |
|
"loss": 0.9729, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.3538083538083538, |
|
"grad_norm": 1.1242003440856934, |
|
"learning_rate": 1.9599673137175855e-05, |
|
"loss": 0.9955, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.35626535626535627, |
|
"grad_norm": 1.0511232614517212, |
|
"learning_rate": 1.9592206938222703e-05, |
|
"loss": 0.9269, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.35872235872235875, |
|
"grad_norm": 1.162567138671875, |
|
"learning_rate": 1.9584673206489955e-05, |
|
"loss": 0.9695, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.36117936117936117, |
|
"grad_norm": 1.0482780933380127, |
|
"learning_rate": 1.9577071995017945e-05, |
|
"loss": 0.9019, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.36363636363636365, |
|
"grad_norm": 1.086195468902588, |
|
"learning_rate": 1.956940335732209e-05, |
|
"loss": 0.9473, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.36609336609336607, |
|
"grad_norm": 1.1613413095474243, |
|
"learning_rate": 1.956166734739251e-05, |
|
"loss": 0.9044, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.36855036855036855, |
|
"grad_norm": 1.0689467191696167, |
|
"learning_rate": 1.9553864019693652e-05, |
|
"loss": 0.9168, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.371007371007371, |
|
"grad_norm": 1.0362517833709717, |
|
"learning_rate": 1.9545993429163913e-05, |
|
"loss": 0.9087, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.37346437346437344, |
|
"grad_norm": 1.3585790395736694, |
|
"learning_rate": 1.9538055631215233e-05, |
|
"loss": 0.9635, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.3759213759213759, |
|
"grad_norm": 1.1443356275558472, |
|
"learning_rate": 1.953005068173272e-05, |
|
"loss": 0.9576, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.3783783783783784, |
|
"grad_norm": 1.1501580476760864, |
|
"learning_rate": 1.952197863707427e-05, |
|
"loss": 0.9189, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.3808353808353808, |
|
"grad_norm": 1.143819808959961, |
|
"learning_rate": 1.9513839554070132e-05, |
|
"loss": 0.9878, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.3832923832923833, |
|
"grad_norm": 1.1135509014129639, |
|
"learning_rate": 1.9505633490022545e-05, |
|
"loss": 0.9122, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.3857493857493858, |
|
"grad_norm": 1.051954746246338, |
|
"learning_rate": 1.949736050270532e-05, |
|
"loss": 0.9742, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.3882063882063882, |
|
"grad_norm": 1.259486198425293, |
|
"learning_rate": 1.9489020650363427e-05, |
|
"loss": 0.9365, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.3906633906633907, |
|
"grad_norm": 1.1075799465179443, |
|
"learning_rate": 1.948061399171259e-05, |
|
"loss": 0.971, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.3931203931203931, |
|
"grad_norm": 1.145005226135254, |
|
"learning_rate": 1.9472140585938882e-05, |
|
"loss": 0.9839, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3955773955773956, |
|
"grad_norm": 1.0776715278625488, |
|
"learning_rate": 1.9463600492698297e-05, |
|
"loss": 0.9238, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.39803439803439805, |
|
"grad_norm": 1.10464346408844, |
|
"learning_rate": 1.9454993772116336e-05, |
|
"loss": 0.9642, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.4004914004914005, |
|
"grad_norm": 1.1394087076187134, |
|
"learning_rate": 1.9446320484787576e-05, |
|
"loss": 0.9942, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.40294840294840295, |
|
"grad_norm": 1.2384252548217773, |
|
"learning_rate": 1.943758069177526e-05, |
|
"loss": 0.9503, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.40540540540540543, |
|
"grad_norm": 1.1317917108535767, |
|
"learning_rate": 1.9428774454610845e-05, |
|
"loss": 0.9185, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.40786240786240785, |
|
"grad_norm": 1.1217604875564575, |
|
"learning_rate": 1.9419901835293585e-05, |
|
"loss": 1.0281, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.4103194103194103, |
|
"grad_norm": 1.1591936349868774, |
|
"learning_rate": 1.9410962896290092e-05, |
|
"loss": 0.989, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.41277641277641275, |
|
"grad_norm": 1.1651326417922974, |
|
"learning_rate": 1.940195770053389e-05, |
|
"loss": 0.9724, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.4152334152334152, |
|
"grad_norm": 1.0306227207183838, |
|
"learning_rate": 1.9392886311424975e-05, |
|
"loss": 0.8945, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.4176904176904177, |
|
"grad_norm": 1.0212548971176147, |
|
"learning_rate": 1.9383748792829374e-05, |
|
"loss": 0.9647, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.4201474201474201, |
|
"grad_norm": 1.0641008615493774, |
|
"learning_rate": 1.9374545209078687e-05, |
|
"loss": 0.9535, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.4226044226044226, |
|
"grad_norm": 1.0543537139892578, |
|
"learning_rate": 1.936527562496964e-05, |
|
"loss": 0.9291, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.4250614250614251, |
|
"grad_norm": 1.0414016246795654, |
|
"learning_rate": 1.9355940105763622e-05, |
|
"loss": 0.9358, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.4275184275184275, |
|
"grad_norm": 1.0907888412475586, |
|
"learning_rate": 1.934653871718624e-05, |
|
"loss": 0.8968, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.42997542997543, |
|
"grad_norm": 1.0105867385864258, |
|
"learning_rate": 1.933707152542683e-05, |
|
"loss": 0.915, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.43243243243243246, |
|
"grad_norm": 1.1251968145370483, |
|
"learning_rate": 1.932753859713803e-05, |
|
"loss": 0.9587, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.4348894348894349, |
|
"grad_norm": 1.0517654418945312, |
|
"learning_rate": 1.9317939999435262e-05, |
|
"loss": 0.963, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.43734643734643736, |
|
"grad_norm": 1.1260952949523926, |
|
"learning_rate": 1.930827579989631e-05, |
|
"loss": 0.9967, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.4398034398034398, |
|
"grad_norm": 1.174589991569519, |
|
"learning_rate": 1.9298546066560802e-05, |
|
"loss": 0.9883, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.44226044226044225, |
|
"grad_norm": 1.0986108779907227, |
|
"learning_rate": 1.928875086792976e-05, |
|
"loss": 0.9477, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.44471744471744473, |
|
"grad_norm": 1.0878371000289917, |
|
"learning_rate": 1.9278890272965097e-05, |
|
"loss": 0.9822, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.44717444717444715, |
|
"grad_norm": 1.1421008110046387, |
|
"learning_rate": 1.926896435108915e-05, |
|
"loss": 0.9644, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.44963144963144963, |
|
"grad_norm": 1.1047364473342896, |
|
"learning_rate": 1.9258973172184176e-05, |
|
"loss": 0.9562, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.4520884520884521, |
|
"grad_norm": 1.0693336725234985, |
|
"learning_rate": 1.924891680659187e-05, |
|
"loss": 0.9329, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.45454545454545453, |
|
"grad_norm": 1.0154356956481934, |
|
"learning_rate": 1.9238795325112867e-05, |
|
"loss": 0.9447, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.457002457002457, |
|
"grad_norm": 1.0603218078613281, |
|
"learning_rate": 1.922860879900624e-05, |
|
"loss": 0.9828, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.4594594594594595, |
|
"grad_norm": 1.0560318231582642, |
|
"learning_rate": 1.9218357299988998e-05, |
|
"loss": 0.9847, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.4619164619164619, |
|
"grad_norm": 1.0979032516479492, |
|
"learning_rate": 1.920804090023559e-05, |
|
"loss": 0.9308, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.4643734643734644, |
|
"grad_norm": 1.1306419372558594, |
|
"learning_rate": 1.9197659672377388e-05, |
|
"loss": 0.9425, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.4668304668304668, |
|
"grad_norm": 1.0981731414794922, |
|
"learning_rate": 1.9187213689502177e-05, |
|
"loss": 0.9414, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4692874692874693, |
|
"grad_norm": 1.0769071578979492, |
|
"learning_rate": 1.9176703025153643e-05, |
|
"loss": 0.9608, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.47174447174447176, |
|
"grad_norm": 1.1851149797439575, |
|
"learning_rate": 1.9166127753330856e-05, |
|
"loss": 0.9365, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.4742014742014742, |
|
"grad_norm": 1.1022464036941528, |
|
"learning_rate": 1.915548794848775e-05, |
|
"loss": 0.9721, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.47665847665847666, |
|
"grad_norm": 1.0086687803268433, |
|
"learning_rate": 1.914478368553258e-05, |
|
"loss": 0.9209, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.47911547911547914, |
|
"grad_norm": 1.169425368309021, |
|
"learning_rate": 1.9134015039827433e-05, |
|
"loss": 0.9337, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.48157248157248156, |
|
"grad_norm": 1.0467708110809326, |
|
"learning_rate": 1.9123182087187657e-05, |
|
"loss": 0.9864, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.48402948402948404, |
|
"grad_norm": 1.0687167644500732, |
|
"learning_rate": 1.911228490388136e-05, |
|
"loss": 0.9307, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.4864864864864865, |
|
"grad_norm": 1.1125750541687012, |
|
"learning_rate": 1.9101323566628842e-05, |
|
"loss": 0.9279, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.48894348894348894, |
|
"grad_norm": 1.10071861743927, |
|
"learning_rate": 1.909029815260209e-05, |
|
"loss": 0.9786, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.4914004914004914, |
|
"grad_norm": 1.141037940979004, |
|
"learning_rate": 1.9079208739424198e-05, |
|
"loss": 1.0065, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.49385749385749383, |
|
"grad_norm": 1.0701899528503418, |
|
"learning_rate": 1.906805540516885e-05, |
|
"loss": 0.9552, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.4963144963144963, |
|
"grad_norm": 1.155552864074707, |
|
"learning_rate": 1.905683822835975e-05, |
|
"loss": 0.9931, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.4987714987714988, |
|
"grad_norm": 1.1882957220077515, |
|
"learning_rate": 1.904555728797009e-05, |
|
"loss": 0.9462, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.5012285012285013, |
|
"grad_norm": 1.0740363597869873, |
|
"learning_rate": 1.903421266342197e-05, |
|
"loss": 0.9247, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.5036855036855037, |
|
"grad_norm": 1.1630939245224, |
|
"learning_rate": 1.9022804434585854e-05, |
|
"loss": 0.9948, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.5061425061425061, |
|
"grad_norm": 1.099727988243103, |
|
"learning_rate": 1.9011332681780007e-05, |
|
"loss": 0.9637, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.5085995085995086, |
|
"grad_norm": 1.0455065965652466, |
|
"learning_rate": 1.8999797485769925e-05, |
|
"loss": 0.977, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.5110565110565111, |
|
"grad_norm": 1.1362916231155396, |
|
"learning_rate": 1.898819892776777e-05, |
|
"loss": 0.9289, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.5135135135135135, |
|
"grad_norm": 1.0821365118026733, |
|
"learning_rate": 1.8976537089431793e-05, |
|
"loss": 0.9782, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.515970515970516, |
|
"grad_norm": 1.1401381492614746, |
|
"learning_rate": 1.8964812052865764e-05, |
|
"loss": 0.9612, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5184275184275184, |
|
"grad_norm": 1.192937970161438, |
|
"learning_rate": 1.8953023900618395e-05, |
|
"loss": 0.9762, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.5208845208845209, |
|
"grad_norm": 1.0559264421463013, |
|
"learning_rate": 1.8941172715682756e-05, |
|
"loss": 0.8589, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.5233415233415234, |
|
"grad_norm": 1.0663461685180664, |
|
"learning_rate": 1.8929258581495688e-05, |
|
"loss": 0.9301, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.5257985257985258, |
|
"grad_norm": 1.107938289642334, |
|
"learning_rate": 1.8917281581937216e-05, |
|
"loss": 0.9668, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.5282555282555282, |
|
"grad_norm": 1.1464968919754028, |
|
"learning_rate": 1.8905241801329972e-05, |
|
"loss": 0.9129, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.5307125307125307, |
|
"grad_norm": 1.0721112489700317, |
|
"learning_rate": 1.889313932443858e-05, |
|
"loss": 0.951, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.5331695331695332, |
|
"grad_norm": 0.9699763655662537, |
|
"learning_rate": 1.888097423646907e-05, |
|
"loss": 0.8962, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.5356265356265356, |
|
"grad_norm": 1.1581181287765503, |
|
"learning_rate": 1.8868746623068292e-05, |
|
"loss": 0.9586, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.538083538083538, |
|
"grad_norm": 1.0095258951187134, |
|
"learning_rate": 1.885645657032328e-05, |
|
"loss": 0.9373, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.5405405405405406, |
|
"grad_norm": 1.034725546836853, |
|
"learning_rate": 1.884410416476067e-05, |
|
"loss": 0.9411, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.542997542997543, |
|
"grad_norm": 1.0320111513137817, |
|
"learning_rate": 1.8831689493346095e-05, |
|
"loss": 0.9121, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.5454545454545454, |
|
"grad_norm": 1.1234315633773804, |
|
"learning_rate": 1.881921264348355e-05, |
|
"loss": 1.0093, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.547911547911548, |
|
"grad_norm": 1.0910882949829102, |
|
"learning_rate": 1.8806673703014805e-05, |
|
"loss": 0.9478, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.5503685503685504, |
|
"grad_norm": 1.1484427452087402, |
|
"learning_rate": 1.8794072760218752e-05, |
|
"loss": 1.0299, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.5528255528255528, |
|
"grad_norm": 1.1294251680374146, |
|
"learning_rate": 1.8781409903810823e-05, |
|
"loss": 0.9493, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5552825552825553, |
|
"grad_norm": 1.1251084804534912, |
|
"learning_rate": 1.876868522294233e-05, |
|
"loss": 0.9393, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.5577395577395577, |
|
"grad_norm": 1.0826867818832397, |
|
"learning_rate": 1.8755898807199856e-05, |
|
"loss": 0.9591, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.5601965601965602, |
|
"grad_norm": 1.1034644842147827, |
|
"learning_rate": 1.8743050746604635e-05, |
|
"loss": 0.9302, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.5626535626535627, |
|
"grad_norm": 1.0749222040176392, |
|
"learning_rate": 1.8730141131611882e-05, |
|
"loss": 0.9186, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.5651105651105651, |
|
"grad_norm": 1.0956974029541016, |
|
"learning_rate": 1.8717170053110198e-05, |
|
"loss": 0.9394, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5675675675675675, |
|
"grad_norm": 1.0449669361114502, |
|
"learning_rate": 1.870413760242089e-05, |
|
"loss": 0.9042, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.5700245700245701, |
|
"grad_norm": 1.0706597566604614, |
|
"learning_rate": 1.869104387129737e-05, |
|
"loss": 0.9079, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.5724815724815725, |
|
"grad_norm": 1.070791244506836, |
|
"learning_rate": 1.8677888951924473e-05, |
|
"loss": 0.9668, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.5749385749385749, |
|
"grad_norm": 1.1020586490631104, |
|
"learning_rate": 1.8664672936917828e-05, |
|
"loss": 0.9391, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.5773955773955773, |
|
"grad_norm": 1.073449969291687, |
|
"learning_rate": 1.8651395919323203e-05, |
|
"loss": 0.9479, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5798525798525799, |
|
"grad_norm": 1.1879843473434448, |
|
"learning_rate": 1.863805799261584e-05, |
|
"loss": 0.9958, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.5823095823095823, |
|
"grad_norm": 1.049581527709961, |
|
"learning_rate": 1.8624659250699807e-05, |
|
"loss": 0.9722, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.5847665847665847, |
|
"grad_norm": 1.0783240795135498, |
|
"learning_rate": 1.861119978790734e-05, |
|
"loss": 0.901, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.5872235872235873, |
|
"grad_norm": 1.0940930843353271, |
|
"learning_rate": 1.8597679698998164e-05, |
|
"loss": 0.9154, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.5896805896805897, |
|
"grad_norm": 1.0629860162734985, |
|
"learning_rate": 1.8584099079158842e-05, |
|
"loss": 0.9529, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5921375921375921, |
|
"grad_norm": 1.016574740409851, |
|
"learning_rate": 1.8570458024002094e-05, |
|
"loss": 0.9281, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.5945945945945946, |
|
"grad_norm": 1.0727007389068604, |
|
"learning_rate": 1.855675662956613e-05, |
|
"loss": 0.9797, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.597051597051597, |
|
"grad_norm": 1.02950119972229, |
|
"learning_rate": 1.854299499231397e-05, |
|
"loss": 0.9029, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.5995085995085995, |
|
"grad_norm": 1.0800044536590576, |
|
"learning_rate": 1.852917320913276e-05, |
|
"loss": 0.911, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.601965601965602, |
|
"grad_norm": 1.1379327774047852, |
|
"learning_rate": 1.8515291377333114e-05, |
|
"loss": 0.9316, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.6044226044226044, |
|
"grad_norm": 1.001541256904602, |
|
"learning_rate": 1.8501349594648394e-05, |
|
"loss": 0.9213, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.6068796068796068, |
|
"grad_norm": 1.0450564622879028, |
|
"learning_rate": 1.8487347959234042e-05, |
|
"loss": 0.9709, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.6093366093366094, |
|
"grad_norm": 1.1812976598739624, |
|
"learning_rate": 1.847328656966689e-05, |
|
"loss": 1.0262, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.6117936117936118, |
|
"grad_norm": 1.1157077550888062, |
|
"learning_rate": 1.8459165524944463e-05, |
|
"loss": 0.9947, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.6142506142506142, |
|
"grad_norm": 1.0730394124984741, |
|
"learning_rate": 1.8444984924484278e-05, |
|
"loss": 0.9009, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6167076167076168, |
|
"grad_norm": 1.0620498657226562, |
|
"learning_rate": 1.8430744868123146e-05, |
|
"loss": 0.9692, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.6191646191646192, |
|
"grad_norm": 1.0852974653244019, |
|
"learning_rate": 1.8416445456116473e-05, |
|
"loss": 0.9467, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.6216216216216216, |
|
"grad_norm": 1.0586998462677002, |
|
"learning_rate": 1.8402086789137547e-05, |
|
"loss": 0.9297, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.6240786240786241, |
|
"grad_norm": 1.0674982070922852, |
|
"learning_rate": 1.8387668968276836e-05, |
|
"loss": 0.8964, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.6265356265356266, |
|
"grad_norm": 1.1577105522155762, |
|
"learning_rate": 1.8373192095041278e-05, |
|
"loss": 1.0358, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.628992628992629, |
|
"grad_norm": 1.0243662595748901, |
|
"learning_rate": 1.8358656271353558e-05, |
|
"loss": 0.9246, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.6314496314496314, |
|
"grad_norm": 1.0408570766448975, |
|
"learning_rate": 1.8344061599551397e-05, |
|
"loss": 0.9392, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.6339066339066339, |
|
"grad_norm": 1.0425841808319092, |
|
"learning_rate": 1.832940818238682e-05, |
|
"loss": 0.9868, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.6363636363636364, |
|
"grad_norm": 1.0140665769577026, |
|
"learning_rate": 1.8314696123025456e-05, |
|
"loss": 0.9199, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.6388206388206388, |
|
"grad_norm": 1.1359258890151978, |
|
"learning_rate": 1.8299925525045782e-05, |
|
"loss": 0.9352, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6412776412776413, |
|
"grad_norm": 1.052809238433838, |
|
"learning_rate": 1.8285096492438424e-05, |
|
"loss": 0.9269, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.6437346437346437, |
|
"grad_norm": 1.1689685583114624, |
|
"learning_rate": 1.8270209129605397e-05, |
|
"loss": 0.9481, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.6461916461916462, |
|
"grad_norm": 1.1114914417266846, |
|
"learning_rate": 1.8255263541359397e-05, |
|
"loss": 0.9334, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.6486486486486487, |
|
"grad_norm": 1.059890627861023, |
|
"learning_rate": 1.8240259832923035e-05, |
|
"loss": 0.9283, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.6511056511056511, |
|
"grad_norm": 1.071092128753662, |
|
"learning_rate": 1.8225198109928116e-05, |
|
"loss": 0.9556, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.6535626535626535, |
|
"grad_norm": 1.1584666967391968, |
|
"learning_rate": 1.8210078478414895e-05, |
|
"loss": 0.9638, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.6560196560196561, |
|
"grad_norm": 1.0983332395553589, |
|
"learning_rate": 1.8194901044831313e-05, |
|
"loss": 1.0076, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.6584766584766585, |
|
"grad_norm": 1.0883970260620117, |
|
"learning_rate": 1.817966591603227e-05, |
|
"loss": 0.9824, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.6609336609336609, |
|
"grad_norm": 1.1348230838775635, |
|
"learning_rate": 1.8164373199278858e-05, |
|
"loss": 0.9559, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.6633906633906634, |
|
"grad_norm": 1.0387697219848633, |
|
"learning_rate": 1.8149023002237612e-05, |
|
"loss": 0.9434, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6658476658476659, |
|
"grad_norm": 1.044997215270996, |
|
"learning_rate": 1.8133615432979742e-05, |
|
"loss": 0.9624, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.6683046683046683, |
|
"grad_norm": 1.0559712648391724, |
|
"learning_rate": 1.8118150599980398e-05, |
|
"loss": 0.9228, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.6707616707616708, |
|
"grad_norm": 1.036546230316162, |
|
"learning_rate": 1.8102628612117868e-05, |
|
"loss": 0.9468, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.6732186732186732, |
|
"grad_norm": 1.0674127340316772, |
|
"learning_rate": 1.8087049578672847e-05, |
|
"loss": 0.9646, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.6756756756756757, |
|
"grad_norm": 1.0553275346755981, |
|
"learning_rate": 1.8071413609327638e-05, |
|
"loss": 0.9321, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.6781326781326781, |
|
"grad_norm": 0.9926275610923767, |
|
"learning_rate": 1.8055720814165415e-05, |
|
"loss": 0.9003, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.6805896805896806, |
|
"grad_norm": 1.0728232860565186, |
|
"learning_rate": 1.8039971303669407e-05, |
|
"loss": 0.9242, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.683046683046683, |
|
"grad_norm": 1.0888348817825317, |
|
"learning_rate": 1.8024165188722153e-05, |
|
"loss": 0.9561, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.6855036855036855, |
|
"grad_norm": 1.090308427810669, |
|
"learning_rate": 1.80083025806047e-05, |
|
"loss": 0.9635, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.687960687960688, |
|
"grad_norm": 1.0397887229919434, |
|
"learning_rate": 1.799238359099584e-05, |
|
"loss": 0.917, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6904176904176904, |
|
"grad_norm": 1.0251840353012085, |
|
"learning_rate": 1.79764083319713e-05, |
|
"loss": 0.9511, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.6928746928746928, |
|
"grad_norm": 1.192077398300171, |
|
"learning_rate": 1.7960376916002974e-05, |
|
"loss": 0.9834, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.6953316953316954, |
|
"grad_norm": 1.1752359867095947, |
|
"learning_rate": 1.7944289455958114e-05, |
|
"loss": 0.9448, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.6977886977886978, |
|
"grad_norm": 1.0350326299667358, |
|
"learning_rate": 1.792814606509855e-05, |
|
"loss": 0.9827, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.7002457002457002, |
|
"grad_norm": 1.0373021364212036, |
|
"learning_rate": 1.7911946857079886e-05, |
|
"loss": 0.979, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.7027027027027027, |
|
"grad_norm": 1.0575249195098877, |
|
"learning_rate": 1.7895691945950696e-05, |
|
"loss": 0.9272, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.7051597051597052, |
|
"grad_norm": 0.96884685754776, |
|
"learning_rate": 1.787938144615173e-05, |
|
"loss": 0.8975, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.7076167076167076, |
|
"grad_norm": 0.9968435168266296, |
|
"learning_rate": 1.78630154725151e-05, |
|
"loss": 0.9359, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.7100737100737101, |
|
"grad_norm": 1.1364377737045288, |
|
"learning_rate": 1.7846594140263475e-05, |
|
"loss": 0.9421, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.7125307125307125, |
|
"grad_norm": 1.0569933652877808, |
|
"learning_rate": 1.783011756500927e-05, |
|
"loss": 0.9258, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.714987714987715, |
|
"grad_norm": 1.023688793182373, |
|
"learning_rate": 1.7813585862753832e-05, |
|
"loss": 0.9086, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.7174447174447175, |
|
"grad_norm": 1.0676270723342896, |
|
"learning_rate": 1.779699914988662e-05, |
|
"loss": 0.9039, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.7199017199017199, |
|
"grad_norm": 1.1354315280914307, |
|
"learning_rate": 1.7780357543184396e-05, |
|
"loss": 0.9492, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.7223587223587223, |
|
"grad_norm": 1.0893232822418213, |
|
"learning_rate": 1.776366115981039e-05, |
|
"loss": 0.9203, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.7248157248157249, |
|
"grad_norm": 1.1178086996078491, |
|
"learning_rate": 1.7746910117313482e-05, |
|
"loss": 0.9977, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.7272727272727273, |
|
"grad_norm": 1.0091534852981567, |
|
"learning_rate": 1.773010453362737e-05, |
|
"loss": 0.9312, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.7297297297297297, |
|
"grad_norm": 1.0890346765518188, |
|
"learning_rate": 1.771324452706975e-05, |
|
"loss": 1.0266, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.7321867321867321, |
|
"grad_norm": 0.989084005355835, |
|
"learning_rate": 1.7696330216341465e-05, |
|
"loss": 0.88, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.7346437346437347, |
|
"grad_norm": 1.1306614875793457, |
|
"learning_rate": 1.767936172052569e-05, |
|
"loss": 0.9458, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.7371007371007371, |
|
"grad_norm": 1.0816539525985718, |
|
"learning_rate": 1.7662339159087077e-05, |
|
"loss": 0.9342, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7395577395577395, |
|
"grad_norm": 1.1475470066070557, |
|
"learning_rate": 1.7645262651870926e-05, |
|
"loss": 0.9888, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.742014742014742, |
|
"grad_norm": 1.0547964572906494, |
|
"learning_rate": 1.762813231910233e-05, |
|
"loss": 0.873, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.7444717444717445, |
|
"grad_norm": 1.0881352424621582, |
|
"learning_rate": 1.761094828138534e-05, |
|
"loss": 0.9252, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.7469287469287469, |
|
"grad_norm": 1.0736604928970337, |
|
"learning_rate": 1.7593710659702105e-05, |
|
"loss": 0.9348, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.7493857493857494, |
|
"grad_norm": 1.092803716659546, |
|
"learning_rate": 1.7576419575412028e-05, |
|
"loss": 0.94, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.7518427518427518, |
|
"grad_norm": 1.0088696479797363, |
|
"learning_rate": 1.7559075150250913e-05, |
|
"loss": 0.9635, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.7542997542997543, |
|
"grad_norm": 1.0789767503738403, |
|
"learning_rate": 1.754167750633009e-05, |
|
"loss": 0.9647, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.7567567567567568, |
|
"grad_norm": 0.9914807081222534, |
|
"learning_rate": 1.7524226766135587e-05, |
|
"loss": 0.9332, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.7592137592137592, |
|
"grad_norm": 1.0111716985702515, |
|
"learning_rate": 1.7506723052527243e-05, |
|
"loss": 0.9126, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.7616707616707616, |
|
"grad_norm": 0.9699746370315552, |
|
"learning_rate": 1.7489166488737847e-05, |
|
"loss": 0.9494, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7641277641277642, |
|
"grad_norm": 1.048949956893921, |
|
"learning_rate": 1.7471557198372277e-05, |
|
"loss": 0.9702, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.7665847665847666, |
|
"grad_norm": 1.0581941604614258, |
|
"learning_rate": 1.7453895305406615e-05, |
|
"loss": 0.9911, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.769041769041769, |
|
"grad_norm": 1.0768938064575195, |
|
"learning_rate": 1.7436180934187307e-05, |
|
"loss": 0.9492, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.7714987714987716, |
|
"grad_norm": 1.0591368675231934, |
|
"learning_rate": 1.741841420943025e-05, |
|
"loss": 0.9585, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.773955773955774, |
|
"grad_norm": 1.0444432497024536, |
|
"learning_rate": 1.740059525621993e-05, |
|
"loss": 0.9736, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.7764127764127764, |
|
"grad_norm": 1.032731056213379, |
|
"learning_rate": 1.7382724200008546e-05, |
|
"loss": 0.9236, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.7788697788697788, |
|
"grad_norm": 1.0368685722351074, |
|
"learning_rate": 1.7364801166615124e-05, |
|
"loss": 0.9678, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.7813267813267813, |
|
"grad_norm": 0.9975135922431946, |
|
"learning_rate": 1.734682628222462e-05, |
|
"loss": 0.9506, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.7837837837837838, |
|
"grad_norm": 0.969009280204773, |
|
"learning_rate": 1.7328799673387053e-05, |
|
"loss": 0.9284, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.7862407862407862, |
|
"grad_norm": 1.0193045139312744, |
|
"learning_rate": 1.7310721467016587e-05, |
|
"loss": 0.9434, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7886977886977887, |
|
"grad_norm": 1.0702950954437256, |
|
"learning_rate": 1.7292591790390668e-05, |
|
"loss": 0.9494, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.7911547911547911, |
|
"grad_norm": 1.0544315576553345, |
|
"learning_rate": 1.7274410771149094e-05, |
|
"loss": 0.905, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.7936117936117936, |
|
"grad_norm": 1.1505653858184814, |
|
"learning_rate": 1.725617853729316e-05, |
|
"loss": 0.9587, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.7960687960687961, |
|
"grad_norm": 0.9675195813179016, |
|
"learning_rate": 1.7237895217184702e-05, |
|
"loss": 0.9715, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.7985257985257985, |
|
"grad_norm": 1.0955339670181274, |
|
"learning_rate": 1.7219560939545246e-05, |
|
"loss": 0.9342, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.800982800982801, |
|
"grad_norm": 1.127308964729309, |
|
"learning_rate": 1.7201175833455066e-05, |
|
"loss": 0.945, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.8034398034398035, |
|
"grad_norm": 1.0063378810882568, |
|
"learning_rate": 1.718274002835229e-05, |
|
"loss": 0.9328, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.8058968058968059, |
|
"grad_norm": 1.0212886333465576, |
|
"learning_rate": 1.7164253654031986e-05, |
|
"loss": 0.926, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.8083538083538083, |
|
"grad_norm": 0.9954712986946106, |
|
"learning_rate": 1.7145716840645253e-05, |
|
"loss": 0.9266, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.8108108108108109, |
|
"grad_norm": 0.9671973586082458, |
|
"learning_rate": 1.7127129718698298e-05, |
|
"loss": 0.975, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.8132678132678133, |
|
"grad_norm": 1.116471767425537, |
|
"learning_rate": 1.710849241905151e-05, |
|
"loss": 0.9474, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.8157248157248157, |
|
"grad_norm": 1.031991720199585, |
|
"learning_rate": 1.7089805072918567e-05, |
|
"loss": 0.9674, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.8181818181818182, |
|
"grad_norm": 1.0887236595153809, |
|
"learning_rate": 1.7071067811865477e-05, |
|
"loss": 0.9402, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.8206388206388207, |
|
"grad_norm": 1.0192559957504272, |
|
"learning_rate": 1.7052280767809672e-05, |
|
"loss": 0.9201, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.8230958230958231, |
|
"grad_norm": 1.0986623764038086, |
|
"learning_rate": 1.7033444073019077e-05, |
|
"loss": 0.9507, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.8255528255528255, |
|
"grad_norm": 1.0037617683410645, |
|
"learning_rate": 1.7014557860111184e-05, |
|
"loss": 0.9653, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.828009828009828, |
|
"grad_norm": 1.0202786922454834, |
|
"learning_rate": 1.6995622262052093e-05, |
|
"loss": 0.9412, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.8304668304668305, |
|
"grad_norm": 0.9347285628318787, |
|
"learning_rate": 1.697663741215561e-05, |
|
"loss": 0.9528, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.8329238329238329, |
|
"grad_norm": 1.0408426523208618, |
|
"learning_rate": 1.6957603444082295e-05, |
|
"loss": 0.9172, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.8353808353808354, |
|
"grad_norm": 1.1628843545913696, |
|
"learning_rate": 1.6938520491838502e-05, |
|
"loss": 1.0025, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8378378378378378, |
|
"grad_norm": 0.9961422681808472, |
|
"learning_rate": 1.6919388689775463e-05, |
|
"loss": 0.9578, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.8402948402948403, |
|
"grad_norm": 1.0189507007598877, |
|
"learning_rate": 1.6900208172588333e-05, |
|
"loss": 0.9201, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.8427518427518428, |
|
"grad_norm": 1.0783741474151611, |
|
"learning_rate": 1.6880979075315238e-05, |
|
"loss": 0.8905, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.8452088452088452, |
|
"grad_norm": 1.1078404188156128, |
|
"learning_rate": 1.6861701533336322e-05, |
|
"loss": 0.9699, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.8476658476658476, |
|
"grad_norm": 1.016022801399231, |
|
"learning_rate": 1.6842375682372803e-05, |
|
"loss": 0.9316, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.8501228501228502, |
|
"grad_norm": 1.083304762840271, |
|
"learning_rate": 1.6823001658486013e-05, |
|
"loss": 0.9267, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.8525798525798526, |
|
"grad_norm": 1.6159943342208862, |
|
"learning_rate": 1.6803579598076434e-05, |
|
"loss": 1.0386, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.855036855036855, |
|
"grad_norm": 1.1118335723876953, |
|
"learning_rate": 1.678410963788275e-05, |
|
"loss": 0.9803, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.8574938574938575, |
|
"grad_norm": 0.9951636791229248, |
|
"learning_rate": 1.676459191498087e-05, |
|
"loss": 0.9072, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.85995085995086, |
|
"grad_norm": 1.0147863626480103, |
|
"learning_rate": 1.674502656678298e-05, |
|
"loss": 0.9751, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8624078624078624, |
|
"grad_norm": 1.0647886991500854, |
|
"learning_rate": 1.6725413731036562e-05, |
|
"loss": 0.8859, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.8648648648648649, |
|
"grad_norm": 1.0200670957565308, |
|
"learning_rate": 1.6705753545823423e-05, |
|
"loss": 0.9253, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.8673218673218673, |
|
"grad_norm": 1.0413808822631836, |
|
"learning_rate": 1.6686046149558736e-05, |
|
"loss": 0.9261, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.8697788697788698, |
|
"grad_norm": 1.060482382774353, |
|
"learning_rate": 1.6666291680990056e-05, |
|
"loss": 0.9291, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.8722358722358723, |
|
"grad_norm": 1.052126169204712, |
|
"learning_rate": 1.6646490279196344e-05, |
|
"loss": 0.9597, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.8746928746928747, |
|
"grad_norm": 1.0208313465118408, |
|
"learning_rate": 1.6626642083586986e-05, |
|
"loss": 0.9335, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.8771498771498771, |
|
"grad_norm": 1.148834466934204, |
|
"learning_rate": 1.6606747233900816e-05, |
|
"loss": 0.9782, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.8796068796068796, |
|
"grad_norm": 1.0331612825393677, |
|
"learning_rate": 1.6586805870205135e-05, |
|
"loss": 0.8924, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.8820638820638821, |
|
"grad_norm": 1.1008647680282593, |
|
"learning_rate": 1.656681813289471e-05, |
|
"loss": 0.9563, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.8845208845208845, |
|
"grad_norm": 1.1791423559188843, |
|
"learning_rate": 1.654678416269081e-05, |
|
"loss": 1.0417, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8869778869778869, |
|
"grad_norm": 1.0597944259643555, |
|
"learning_rate": 1.652670410064019e-05, |
|
"loss": 0.9014, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.8894348894348895, |
|
"grad_norm": 1.1398617029190063, |
|
"learning_rate": 1.6506578088114105e-05, |
|
"loss": 0.9467, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.8918918918918919, |
|
"grad_norm": 1.1174441576004028, |
|
"learning_rate": 1.6486406266807343e-05, |
|
"loss": 0.9263, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.8943488943488943, |
|
"grad_norm": 1.161808729171753, |
|
"learning_rate": 1.646618877873717e-05, |
|
"loss": 0.9582, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.8968058968058968, |
|
"grad_norm": 1.0449280738830566, |
|
"learning_rate": 1.6445925766242392e-05, |
|
"loss": 0.9732, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.8992628992628993, |
|
"grad_norm": 1.1118457317352295, |
|
"learning_rate": 1.6425617371982302e-05, |
|
"loss": 0.9491, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.9017199017199017, |
|
"grad_norm": 1.1369138956069946, |
|
"learning_rate": 1.6405263738935716e-05, |
|
"loss": 0.9477, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.9041769041769042, |
|
"grad_norm": 1.1870582103729248, |
|
"learning_rate": 1.6384865010399935e-05, |
|
"loss": 0.9596, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.9066339066339066, |
|
"grad_norm": 1.2551947832107544, |
|
"learning_rate": 1.6364421329989758e-05, |
|
"loss": 0.9229, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 1.0891375541687012, |
|
"learning_rate": 1.6343932841636455e-05, |
|
"loss": 0.9326, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.9115479115479116, |
|
"grad_norm": 1.0899828672409058, |
|
"learning_rate": 1.632339968958677e-05, |
|
"loss": 0.9336, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.914004914004914, |
|
"grad_norm": 1.0995900630950928, |
|
"learning_rate": 1.6302822018401885e-05, |
|
"loss": 0.8952, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.9164619164619164, |
|
"grad_norm": 1.0277985334396362, |
|
"learning_rate": 1.6282199972956425e-05, |
|
"loss": 0.9295, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.918918918918919, |
|
"grad_norm": 1.0418602228164673, |
|
"learning_rate": 1.6261533698437416e-05, |
|
"loss": 0.9309, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.9213759213759214, |
|
"grad_norm": 1.0787702798843384, |
|
"learning_rate": 1.6240823340343285e-05, |
|
"loss": 0.9481, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.9238329238329238, |
|
"grad_norm": 1.0829393863677979, |
|
"learning_rate": 1.6220069044482815e-05, |
|
"loss": 0.8948, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.9262899262899262, |
|
"grad_norm": 1.0835132598876953, |
|
"learning_rate": 1.6199270956974128e-05, |
|
"loss": 0.9834, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.9287469287469288, |
|
"grad_norm": 1.098840594291687, |
|
"learning_rate": 1.6178429224243665e-05, |
|
"loss": 0.917, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.9312039312039312, |
|
"grad_norm": 1.0230814218521118, |
|
"learning_rate": 1.6157543993025134e-05, |
|
"loss": 0.9491, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.9336609336609336, |
|
"grad_norm": 0.9591358304023743, |
|
"learning_rate": 1.6136615410358493e-05, |
|
"loss": 0.9544, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.9361179361179361, |
|
"grad_norm": 1.0422619581222534, |
|
"learning_rate": 1.6115643623588915e-05, |
|
"loss": 0.9421, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.9385749385749386, |
|
"grad_norm": 1.0649887323379517, |
|
"learning_rate": 1.6094628780365745e-05, |
|
"loss": 0.9139, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.941031941031941, |
|
"grad_norm": 1.1111162900924683, |
|
"learning_rate": 1.6073571028641452e-05, |
|
"loss": 0.9219, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.9434889434889435, |
|
"grad_norm": 1.0625733137130737, |
|
"learning_rate": 1.6052470516670613e-05, |
|
"loss": 0.9556, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.9459459459459459, |
|
"grad_norm": 1.0070087909698486, |
|
"learning_rate": 1.6031327393008848e-05, |
|
"loss": 0.9596, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.9484029484029484, |
|
"grad_norm": 1.0320253372192383, |
|
"learning_rate": 1.6010141806511765e-05, |
|
"loss": 0.9596, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.9508599508599509, |
|
"grad_norm": 1.0545932054519653, |
|
"learning_rate": 1.598891390633395e-05, |
|
"loss": 0.9776, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.9533169533169533, |
|
"grad_norm": 1.021756649017334, |
|
"learning_rate": 1.596764384192787e-05, |
|
"loss": 0.9311, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.9557739557739557, |
|
"grad_norm": 1.0189892053604126, |
|
"learning_rate": 1.594633176304287e-05, |
|
"loss": 0.923, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.9582309582309583, |
|
"grad_norm": 1.037270426750183, |
|
"learning_rate": 1.5924977819724068e-05, |
|
"loss": 0.9173, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9606879606879607, |
|
"grad_norm": 1.061562180519104, |
|
"learning_rate": 1.590358216231134e-05, |
|
"loss": 0.9129, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.9631449631449631, |
|
"grad_norm": 1.053601622581482, |
|
"learning_rate": 1.5882144941438234e-05, |
|
"loss": 0.9286, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.9656019656019657, |
|
"grad_norm": 1.1030821800231934, |
|
"learning_rate": 1.5860666308030933e-05, |
|
"loss": 1.0121, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.9680589680589681, |
|
"grad_norm": 0.9831104278564453, |
|
"learning_rate": 1.5839146413307167e-05, |
|
"loss": 0.9245, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.9705159705159705, |
|
"grad_norm": 1.0132577419281006, |
|
"learning_rate": 1.5817585408775168e-05, |
|
"loss": 0.8906, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.972972972972973, |
|
"grad_norm": 1.1054580211639404, |
|
"learning_rate": 1.5795983446232602e-05, |
|
"loss": 0.9785, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.9754299754299754, |
|
"grad_norm": 0.9700505137443542, |
|
"learning_rate": 1.5774340677765483e-05, |
|
"loss": 0.9462, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.9778869778869779, |
|
"grad_norm": 1.126731276512146, |
|
"learning_rate": 1.5752657255747122e-05, |
|
"loss": 0.9751, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.9803439803439803, |
|
"grad_norm": 1.0231095552444458, |
|
"learning_rate": 1.5730933332837045e-05, |
|
"loss": 0.958, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.9828009828009828, |
|
"grad_norm": 1.0033038854599, |
|
"learning_rate": 1.5709169061979915e-05, |
|
"loss": 0.892, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9852579852579852, |
|
"grad_norm": 1.080012559890747, |
|
"learning_rate": 1.568736459640447e-05, |
|
"loss": 0.9401, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.9877149877149877, |
|
"grad_norm": 1.0674031972885132, |
|
"learning_rate": 1.5665520089622424e-05, |
|
"loss": 0.9373, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.9901719901719902, |
|
"grad_norm": 0.9849039316177368, |
|
"learning_rate": 1.5643635695427405e-05, |
|
"loss": 0.9225, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.9926289926289926, |
|
"grad_norm": 0.9782394170761108, |
|
"learning_rate": 1.5621711567893853e-05, |
|
"loss": 0.9431, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.995085995085995, |
|
"grad_norm": 1.012881875038147, |
|
"learning_rate": 1.5599747861375957e-05, |
|
"loss": 0.9146, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.9975429975429976, |
|
"grad_norm": 0.9708315134048462, |
|
"learning_rate": 1.5577744730506545e-05, |
|
"loss": 0.9521, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.0670959949493408, |
|
"learning_rate": 1.5555702330196024e-05, |
|
"loss": 0.8446, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.0024570024570025, |
|
"grad_norm": 1.511311411857605, |
|
"learning_rate": 1.5533620815631255e-05, |
|
"loss": 0.6437, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.0049140049140048, |
|
"grad_norm": 1.3223845958709717, |
|
"learning_rate": 1.551150034227449e-05, |
|
"loss": 0.6258, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.0073710073710074, |
|
"grad_norm": 1.1301041841506958, |
|
"learning_rate": 1.5489341065862263e-05, |
|
"loss": 0.6252, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.00982800982801, |
|
"grad_norm": 1.0698575973510742, |
|
"learning_rate": 1.546714314240429e-05, |
|
"loss": 0.5928, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.0122850122850122, |
|
"grad_norm": 1.2460176944732666, |
|
"learning_rate": 1.5444906728182388e-05, |
|
"loss": 0.5851, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.0147420147420148, |
|
"grad_norm": 1.3937596082687378, |
|
"learning_rate": 1.5422631979749354e-05, |
|
"loss": 0.5751, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.0171990171990173, |
|
"grad_norm": 1.3066784143447876, |
|
"learning_rate": 1.5400319053927875e-05, |
|
"loss": 0.5688, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.0196560196560196, |
|
"grad_norm": 1.098724126815796, |
|
"learning_rate": 1.5377968107809425e-05, |
|
"loss": 0.5269, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.0221130221130221, |
|
"grad_norm": 1.1766797304153442, |
|
"learning_rate": 1.5355579298753154e-05, |
|
"loss": 0.5651, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.0245700245700247, |
|
"grad_norm": 1.2139365673065186, |
|
"learning_rate": 1.5333152784384777e-05, |
|
"loss": 0.6453, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.027027027027027, |
|
"grad_norm": 1.347971796989441, |
|
"learning_rate": 1.5310688722595472e-05, |
|
"loss": 0.5967, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.0294840294840295, |
|
"grad_norm": 1.4031742811203003, |
|
"learning_rate": 1.528818727154077e-05, |
|
"loss": 0.59, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.031941031941032, |
|
"grad_norm": 1.1434392929077148, |
|
"learning_rate": 1.5265648589639424e-05, |
|
"loss": 0.5591, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.0343980343980343, |
|
"grad_norm": 1.1155598163604736, |
|
"learning_rate": 1.5243072835572319e-05, |
|
"loss": 0.5689, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.0368550368550369, |
|
"grad_norm": 1.2288144826889038, |
|
"learning_rate": 1.5220460168281335e-05, |
|
"loss": 0.6077, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.0393120393120394, |
|
"grad_norm": 1.1721631288528442, |
|
"learning_rate": 1.519781074696824e-05, |
|
"loss": 0.5737, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.0417690417690417, |
|
"grad_norm": 1.1209845542907715, |
|
"learning_rate": 1.5175124731093553e-05, |
|
"loss": 0.5862, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.0442260442260443, |
|
"grad_norm": 1.1233024597167969, |
|
"learning_rate": 1.5152402280375454e-05, |
|
"loss": 0.565, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.0466830466830466, |
|
"grad_norm": 1.1558212041854858, |
|
"learning_rate": 1.5129643554788614e-05, |
|
"loss": 0.5595, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.049140049140049, |
|
"grad_norm": 1.147844910621643, |
|
"learning_rate": 1.5106848714563112e-05, |
|
"loss": 0.5793, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.0515970515970516, |
|
"grad_norm": 1.128221869468689, |
|
"learning_rate": 1.5084017920183271e-05, |
|
"loss": 0.5357, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.054054054054054, |
|
"grad_norm": 1.3069839477539062, |
|
"learning_rate": 1.5061151332386565e-05, |
|
"loss": 0.5914, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.0565110565110565, |
|
"grad_norm": 1.1022357940673828, |
|
"learning_rate": 1.5038249112162446e-05, |
|
"loss": 0.6201, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.058968058968059, |
|
"grad_norm": 1.3305854797363281, |
|
"learning_rate": 1.5015311420751243e-05, |
|
"loss": 0.6081, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.0614250614250613, |
|
"grad_norm": 1.1212079524993896, |
|
"learning_rate": 1.4992338419643022e-05, |
|
"loss": 0.5857, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.0638820638820639, |
|
"grad_norm": 1.1525722742080688, |
|
"learning_rate": 1.4969330270576428e-05, |
|
"loss": 0.5945, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.0663390663390664, |
|
"grad_norm": 1.0892962217330933, |
|
"learning_rate": 1.4946287135537571e-05, |
|
"loss": 0.5693, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.0687960687960687, |
|
"grad_norm": 1.0893158912658691, |
|
"learning_rate": 1.4923209176758872e-05, |
|
"loss": 0.5715, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.0712530712530712, |
|
"grad_norm": 1.0946080684661865, |
|
"learning_rate": 1.4900096556717923e-05, |
|
"loss": 0.5478, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.0737100737100738, |
|
"grad_norm": 1.124007225036621, |
|
"learning_rate": 1.4876949438136348e-05, |
|
"loss": 0.5887, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.076167076167076, |
|
"grad_norm": 1.1379528045654297, |
|
"learning_rate": 1.485376798397865e-05, |
|
"loss": 0.5656, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.0786240786240786, |
|
"grad_norm": 1.1640037298202515, |
|
"learning_rate": 1.4830552357451075e-05, |
|
"loss": 0.6137, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.0810810810810811, |
|
"grad_norm": 1.1026787757873535, |
|
"learning_rate": 1.4807302722000447e-05, |
|
"loss": 0.5827, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.0835380835380835, |
|
"grad_norm": 1.0457117557525635, |
|
"learning_rate": 1.4784019241313025e-05, |
|
"loss": 0.5224, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.085995085995086, |
|
"grad_norm": 1.0897119045257568, |
|
"learning_rate": 1.4760702079313363e-05, |
|
"loss": 0.56, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.0884520884520885, |
|
"grad_norm": 1.099949836730957, |
|
"learning_rate": 1.473735140016313e-05, |
|
"loss": 0.5739, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.0909090909090908, |
|
"grad_norm": 1.1518844366073608, |
|
"learning_rate": 1.4713967368259981e-05, |
|
"loss": 0.5996, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.0933660933660934, |
|
"grad_norm": 1.13741934299469, |
|
"learning_rate": 1.4690550148236371e-05, |
|
"loss": 0.5978, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.095823095823096, |
|
"grad_norm": 1.1064966917037964, |
|
"learning_rate": 1.466709990495843e-05, |
|
"loss": 0.5975, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.0982800982800982, |
|
"grad_norm": 1.1217718124389648, |
|
"learning_rate": 1.4643616803524778e-05, |
|
"loss": 0.5567, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.1007371007371007, |
|
"grad_norm": 1.0240809917449951, |
|
"learning_rate": 1.462010100926536e-05, |
|
"loss": 0.5173, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.1031941031941033, |
|
"grad_norm": 1.1165695190429688, |
|
"learning_rate": 1.4596552687740304e-05, |
|
"loss": 0.5505, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.1056511056511056, |
|
"grad_norm": 1.2207868099212646, |
|
"learning_rate": 1.4572972004738732e-05, |
|
"loss": 0.6181, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.1081081081081081, |
|
"grad_norm": 1.0639880895614624, |
|
"learning_rate": 1.454935912627761e-05, |
|
"loss": 0.5136, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.1105651105651106, |
|
"grad_norm": 1.145183801651001, |
|
"learning_rate": 1.4525714218600566e-05, |
|
"loss": 0.5783, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.113022113022113, |
|
"grad_norm": 1.0687991380691528, |
|
"learning_rate": 1.4502037448176734e-05, |
|
"loss": 0.5946, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.1154791154791155, |
|
"grad_norm": 1.100060224533081, |
|
"learning_rate": 1.4478328981699568e-05, |
|
"loss": 0.5731, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.117936117936118, |
|
"grad_norm": 1.0320765972137451, |
|
"learning_rate": 1.4454588986085677e-05, |
|
"loss": 0.5585, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.1203931203931203, |
|
"grad_norm": 1.1691075563430786, |
|
"learning_rate": 1.443081762847364e-05, |
|
"loss": 0.5978, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.1228501228501229, |
|
"grad_norm": 1.1214145421981812, |
|
"learning_rate": 1.4407015076222845e-05, |
|
"loss": 0.6147, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.1253071253071254, |
|
"grad_norm": 1.0933703184127808, |
|
"learning_rate": 1.4383181496912301e-05, |
|
"loss": 0.593, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.1277641277641277, |
|
"grad_norm": 1.1798315048217773, |
|
"learning_rate": 1.4359317058339457e-05, |
|
"loss": 0.5715, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.1302211302211302, |
|
"grad_norm": 1.0610649585723877, |
|
"learning_rate": 1.4335421928519022e-05, |
|
"loss": 0.5861, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.1326781326781328, |
|
"grad_norm": 1.0927037000656128, |
|
"learning_rate": 1.4311496275681785e-05, |
|
"loss": 0.5606, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.135135135135135, |
|
"grad_norm": 1.1032183170318604, |
|
"learning_rate": 1.4287540268273428e-05, |
|
"loss": 0.5669, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.1375921375921376, |
|
"grad_norm": 1.1331627368927002, |
|
"learning_rate": 1.4263554074953338e-05, |
|
"loss": 0.5704, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.1400491400491402, |
|
"grad_norm": 1.0285066366195679, |
|
"learning_rate": 1.4239537864593432e-05, |
|
"loss": 0.585, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.1425061425061425, |
|
"grad_norm": 1.1035512685775757, |
|
"learning_rate": 1.4215491806276944e-05, |
|
"loss": 0.5674, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.144963144963145, |
|
"grad_norm": 1.1370140314102173, |
|
"learning_rate": 1.4191416069297261e-05, |
|
"loss": 0.5789, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.1474201474201475, |
|
"grad_norm": 1.3444671630859375, |
|
"learning_rate": 1.4167310823156713e-05, |
|
"loss": 0.6142, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.1498771498771498, |
|
"grad_norm": 1.15607488155365, |
|
"learning_rate": 1.4143176237565386e-05, |
|
"loss": 0.5732, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.1523341523341524, |
|
"grad_norm": 1.1212263107299805, |
|
"learning_rate": 1.4119012482439929e-05, |
|
"loss": 0.5674, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.154791154791155, |
|
"grad_norm": 1.0720258951187134, |
|
"learning_rate": 1.4094819727902354e-05, |
|
"loss": 0.5752, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.1572481572481572, |
|
"grad_norm": 1.1930720806121826, |
|
"learning_rate": 1.407059814427884e-05, |
|
"loss": 0.5781, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.1597051597051597, |
|
"grad_norm": 1.1133079528808594, |
|
"learning_rate": 1.4046347902098535e-05, |
|
"loss": 0.57, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.1621621621621623, |
|
"grad_norm": 1.2250217199325562, |
|
"learning_rate": 1.4022069172092354e-05, |
|
"loss": 0.5761, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.1646191646191646, |
|
"grad_norm": 1.050622582435608, |
|
"learning_rate": 1.3997762125191774e-05, |
|
"loss": 0.5909, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.1670761670761671, |
|
"grad_norm": 1.0757161378860474, |
|
"learning_rate": 1.3973426932527637e-05, |
|
"loss": 0.5524, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.1695331695331694, |
|
"grad_norm": 1.1241124868392944, |
|
"learning_rate": 1.3949063765428943e-05, |
|
"loss": 0.5881, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.171990171990172, |
|
"grad_norm": 1.1652573347091675, |
|
"learning_rate": 1.3924672795421638e-05, |
|
"loss": 0.5778, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.1744471744471745, |
|
"grad_norm": 1.065495491027832, |
|
"learning_rate": 1.3900254194227417e-05, |
|
"loss": 0.5632, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.1769041769041768, |
|
"grad_norm": 1.2024372816085815, |
|
"learning_rate": 1.38758081337625e-05, |
|
"loss": 0.5675, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.1793611793611793, |
|
"grad_norm": 1.121402382850647, |
|
"learning_rate": 1.385133478613644e-05, |
|
"loss": 0.517, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.1818181818181819, |
|
"grad_norm": 1.1037131547927856, |
|
"learning_rate": 1.3826834323650899e-05, |
|
"loss": 0.5616, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.1842751842751842, |
|
"grad_norm": 1.135852336883545, |
|
"learning_rate": 1.3802306918798435e-05, |
|
"loss": 0.533, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.1867321867321867, |
|
"grad_norm": 1.2349828481674194, |
|
"learning_rate": 1.3777752744261295e-05, |
|
"loss": 0.5841, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.1891891891891893, |
|
"grad_norm": 1.265579104423523, |
|
"learning_rate": 1.3753171972910191e-05, |
|
"loss": 0.5973, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.1916461916461916, |
|
"grad_norm": 1.139418363571167, |
|
"learning_rate": 1.3728564777803089e-05, |
|
"loss": 0.5541, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.194103194103194, |
|
"grad_norm": 0.9873649477958679, |
|
"learning_rate": 1.3703931332183987e-05, |
|
"loss": 0.5606, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.1965601965601966, |
|
"grad_norm": 1.097970724105835, |
|
"learning_rate": 1.3679271809481693e-05, |
|
"loss": 0.578, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.199017199017199, |
|
"grad_norm": 1.1830071210861206, |
|
"learning_rate": 1.3654586383308619e-05, |
|
"loss": 0.5815, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.2014742014742015, |
|
"grad_norm": 1.1203513145446777, |
|
"learning_rate": 1.3629875227459532e-05, |
|
"loss": 0.5886, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.203931203931204, |
|
"grad_norm": 1.120291829109192, |
|
"learning_rate": 1.3605138515910362e-05, |
|
"loss": 0.582, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.2063882063882063, |
|
"grad_norm": 1.167699933052063, |
|
"learning_rate": 1.3580376422816945e-05, |
|
"loss": 0.602, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.2088452088452089, |
|
"grad_norm": 1.2335660457611084, |
|
"learning_rate": 1.3555589122513828e-05, |
|
"loss": 0.6206, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.2113022113022114, |
|
"grad_norm": 1.15221107006073, |
|
"learning_rate": 1.3530776789513009e-05, |
|
"loss": 0.5953, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.2137592137592137, |
|
"grad_norm": 1.0700962543487549, |
|
"learning_rate": 1.3505939598502742e-05, |
|
"loss": 0.5308, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.2162162162162162, |
|
"grad_norm": 1.2463740110397339, |
|
"learning_rate": 1.3481077724346279e-05, |
|
"loss": 0.6081, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.2186732186732188, |
|
"grad_norm": 1.0911908149719238, |
|
"learning_rate": 1.345619134208066e-05, |
|
"loss": 0.5504, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.221130221130221, |
|
"grad_norm": 1.2074313163757324, |
|
"learning_rate": 1.3431280626915466e-05, |
|
"loss": 0.5432, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.2235872235872236, |
|
"grad_norm": 1.1220638751983643, |
|
"learning_rate": 1.340634575423159e-05, |
|
"loss": 0.5558, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.2260442260442261, |
|
"grad_norm": 0.9794447422027588, |
|
"learning_rate": 1.3381386899580005e-05, |
|
"loss": 0.5194, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.2285012285012284, |
|
"grad_norm": 1.2143794298171997, |
|
"learning_rate": 1.3356404238680528e-05, |
|
"loss": 0.5486, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.230958230958231, |
|
"grad_norm": 1.2196683883666992, |
|
"learning_rate": 1.3331397947420578e-05, |
|
"loss": 0.5858, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.2334152334152333, |
|
"grad_norm": 1.1935386657714844, |
|
"learning_rate": 1.3306368201853941e-05, |
|
"loss": 0.5625, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.2358722358722358, |
|
"grad_norm": 1.2700451612472534, |
|
"learning_rate": 1.3281315178199537e-05, |
|
"loss": 0.5968, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.2383292383292384, |
|
"grad_norm": 1.1718500852584839, |
|
"learning_rate": 1.3256239052840157e-05, |
|
"loss": 0.5889, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.2407862407862407, |
|
"grad_norm": 1.193869948387146, |
|
"learning_rate": 1.3231140002321252e-05, |
|
"loss": 0.6119, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.2432432432432432, |
|
"grad_norm": 1.0735101699829102, |
|
"learning_rate": 1.320601820334967e-05, |
|
"loss": 0.5991, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.2457002457002457, |
|
"grad_norm": 1.1022897958755493, |
|
"learning_rate": 1.3180873832792417e-05, |
|
"loss": 0.5711, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.248157248157248, |
|
"grad_norm": 1.0396614074707031, |
|
"learning_rate": 1.3155707067675408e-05, |
|
"loss": 0.5668, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.2506142506142506, |
|
"grad_norm": 1.0049409866333008, |
|
"learning_rate": 1.3130518085182224e-05, |
|
"loss": 0.5679, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.253071253071253, |
|
"grad_norm": 1.1597648859024048, |
|
"learning_rate": 1.3105307062652873e-05, |
|
"loss": 0.5354, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.2555282555282554, |
|
"grad_norm": 1.121046543121338, |
|
"learning_rate": 1.3080074177582527e-05, |
|
"loss": 0.5465, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.257985257985258, |
|
"grad_norm": 1.3277342319488525, |
|
"learning_rate": 1.3054819607620275e-05, |
|
"loss": 0.6109, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.2604422604422605, |
|
"grad_norm": 1.156744360923767, |
|
"learning_rate": 1.3029543530567884e-05, |
|
"loss": 0.6064, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.2628992628992628, |
|
"grad_norm": 1.1742829084396362, |
|
"learning_rate": 1.3004246124378537e-05, |
|
"loss": 0.6237, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.2653562653562653, |
|
"grad_norm": 1.1408833265304565, |
|
"learning_rate": 1.2978927567155575e-05, |
|
"loss": 0.5855, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.2678132678132679, |
|
"grad_norm": 1.0413517951965332, |
|
"learning_rate": 1.2953588037151261e-05, |
|
"loss": 0.545, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.2702702702702702, |
|
"grad_norm": 1.1523407697677612, |
|
"learning_rate": 1.2928227712765504e-05, |
|
"loss": 0.5932, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.2727272727272727, |
|
"grad_norm": 1.1548582315444946, |
|
"learning_rate": 1.2902846772544625e-05, |
|
"loss": 0.5318, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.2751842751842752, |
|
"grad_norm": 1.1297783851623535, |
|
"learning_rate": 1.2877445395180077e-05, |
|
"loss": 0.5321, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.2776412776412776, |
|
"grad_norm": 1.244998574256897, |
|
"learning_rate": 1.2852023759507204e-05, |
|
"loss": 0.5985, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.28009828009828, |
|
"grad_norm": 1.2012943029403687, |
|
"learning_rate": 1.282658204450398e-05, |
|
"loss": 0.6266, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.2825552825552826, |
|
"grad_norm": 1.134021520614624, |
|
"learning_rate": 1.2801120429289731e-05, |
|
"loss": 0.5548, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.285012285012285, |
|
"grad_norm": 1.203113079071045, |
|
"learning_rate": 1.2775639093123905e-05, |
|
"loss": 0.5569, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.2874692874692875, |
|
"grad_norm": 1.2195301055908203, |
|
"learning_rate": 1.2750138215404784e-05, |
|
"loss": 0.5714, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.28992628992629, |
|
"grad_norm": 1.056214451789856, |
|
"learning_rate": 1.2724617975668229e-05, |
|
"loss": 0.6026, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.2923832923832923, |
|
"grad_norm": 1.014293909072876, |
|
"learning_rate": 1.2699078553586424e-05, |
|
"loss": 0.5393, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.2948402948402948, |
|
"grad_norm": 1.0652614831924438, |
|
"learning_rate": 1.2673520128966592e-05, |
|
"loss": 0.5407, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.2972972972972974, |
|
"grad_norm": 1.1022783517837524, |
|
"learning_rate": 1.2647942881749756e-05, |
|
"loss": 0.5819, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.2997542997542997, |
|
"grad_norm": 1.086804986000061, |
|
"learning_rate": 1.2622346992009447e-05, |
|
"loss": 0.5685, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.3022113022113022, |
|
"grad_norm": 1.307706594467163, |
|
"learning_rate": 1.2596732639950444e-05, |
|
"loss": 0.5525, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.3046683046683047, |
|
"grad_norm": 1.0837743282318115, |
|
"learning_rate": 1.2571100005907522e-05, |
|
"loss": 0.5333, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.307125307125307, |
|
"grad_norm": 1.0788577795028687, |
|
"learning_rate": 1.254544927034415e-05, |
|
"loss": 0.5888, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.3095823095823096, |
|
"grad_norm": 1.233123779296875, |
|
"learning_rate": 1.2519780613851254e-05, |
|
"loss": 0.6043, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.3120393120393121, |
|
"grad_norm": 1.1542140245437622, |
|
"learning_rate": 1.249409421714592e-05, |
|
"loss": 0.5822, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.3144963144963144, |
|
"grad_norm": 1.1778144836425781, |
|
"learning_rate": 1.2468390261070139e-05, |
|
"loss": 0.6057, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.316953316953317, |
|
"grad_norm": 1.1264017820358276, |
|
"learning_rate": 1.244266892658952e-05, |
|
"loss": 0.5904, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.3194103194103195, |
|
"grad_norm": 1.1247997283935547, |
|
"learning_rate": 1.2416930394792026e-05, |
|
"loss": 0.5753, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.3218673218673218, |
|
"grad_norm": 1.2115566730499268, |
|
"learning_rate": 1.2391174846886698e-05, |
|
"loss": 0.5877, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.3243243243243243, |
|
"grad_norm": 1.1855846643447876, |
|
"learning_rate": 1.2365402464202369e-05, |
|
"loss": 0.5685, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.3267813267813269, |
|
"grad_norm": 1.2449346780776978, |
|
"learning_rate": 1.2339613428186407e-05, |
|
"loss": 0.5672, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.3292383292383292, |
|
"grad_norm": 1.0996452569961548, |
|
"learning_rate": 1.2313807920403419e-05, |
|
"loss": 0.5765, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.3316953316953317, |
|
"grad_norm": 1.1104735136032104, |
|
"learning_rate": 1.228798612253397e-05, |
|
"loss": 0.5834, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.3341523341523343, |
|
"grad_norm": 1.1029411554336548, |
|
"learning_rate": 1.2262148216373333e-05, |
|
"loss": 0.5432, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.3366093366093366, |
|
"grad_norm": 1.308183193206787, |
|
"learning_rate": 1.2236294383830177e-05, |
|
"loss": 0.6166, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.339066339066339, |
|
"grad_norm": 1.1299169063568115, |
|
"learning_rate": 1.22104248069253e-05, |
|
"loss": 0.592, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.3415233415233416, |
|
"grad_norm": 1.0875178575515747, |
|
"learning_rate": 1.2184539667790349e-05, |
|
"loss": 0.5121, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.343980343980344, |
|
"grad_norm": 1.1140974760055542, |
|
"learning_rate": 1.2158639148666533e-05, |
|
"loss": 0.5538, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.3464373464373465, |
|
"grad_norm": 1.304472804069519, |
|
"learning_rate": 1.2132723431903341e-05, |
|
"loss": 0.635, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.348894348894349, |
|
"grad_norm": 1.1797876358032227, |
|
"learning_rate": 1.2106792699957264e-05, |
|
"loss": 0.6401, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.3513513513513513, |
|
"grad_norm": 1.2239415645599365, |
|
"learning_rate": 1.2080847135390502e-05, |
|
"loss": 0.5928, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.3538083538083538, |
|
"grad_norm": 1.146208643913269, |
|
"learning_rate": 1.2054886920869682e-05, |
|
"loss": 0.564, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.3562653562653564, |
|
"grad_norm": 1.2212401628494263, |
|
"learning_rate": 1.202891223916457e-05, |
|
"loss": 0.5695, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.3587223587223587, |
|
"grad_norm": 1.1870454549789429, |
|
"learning_rate": 1.2002923273146793e-05, |
|
"loss": 0.5908, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.3611793611793612, |
|
"grad_norm": 1.0334968566894531, |
|
"learning_rate": 1.1976920205788542e-05, |
|
"loss": 0.5675, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.3636363636363638, |
|
"grad_norm": 1.0874249935150146, |
|
"learning_rate": 1.1950903220161286e-05, |
|
"loss": 0.5755, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.366093366093366, |
|
"grad_norm": 1.1364604234695435, |
|
"learning_rate": 1.1924872499434478e-05, |
|
"loss": 0.5464, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.3685503685503686, |
|
"grad_norm": 1.2387900352478027, |
|
"learning_rate": 1.1898828226874284e-05, |
|
"loss": 0.5867, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.3710073710073711, |
|
"grad_norm": 1.2175296545028687, |
|
"learning_rate": 1.1872770585842273e-05, |
|
"loss": 0.5807, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.3734643734643734, |
|
"grad_norm": 1.1570370197296143, |
|
"learning_rate": 1.1846699759794129e-05, |
|
"loss": 0.5783, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.375921375921376, |
|
"grad_norm": 1.0780045986175537, |
|
"learning_rate": 1.1820615932278375e-05, |
|
"loss": 0.5679, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.3783783783783785, |
|
"grad_norm": 1.045121192932129, |
|
"learning_rate": 1.1794519286935056e-05, |
|
"loss": 0.5759, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.3808353808353808, |
|
"grad_norm": 1.117018699645996, |
|
"learning_rate": 1.1768410007494466e-05, |
|
"loss": 0.5849, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.3832923832923834, |
|
"grad_norm": 1.085938572883606, |
|
"learning_rate": 1.174228827777585e-05, |
|
"loss": 0.6009, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.3857493857493859, |
|
"grad_norm": 1.171752691268921, |
|
"learning_rate": 1.1716154281686105e-05, |
|
"loss": 0.6076, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.3882063882063882, |
|
"grad_norm": 1.102292537689209, |
|
"learning_rate": 1.1690008203218493e-05, |
|
"loss": 0.5868, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.3906633906633907, |
|
"grad_norm": 1.1972408294677734, |
|
"learning_rate": 1.1663850226451328e-05, |
|
"loss": 0.5649, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.393120393120393, |
|
"grad_norm": 1.191436767578125, |
|
"learning_rate": 1.16376805355467e-05, |
|
"loss": 0.5581, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.3955773955773956, |
|
"grad_norm": 1.1172912120819092, |
|
"learning_rate": 1.1611499314749177e-05, |
|
"loss": 0.5802, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.398034398034398, |
|
"grad_norm": 1.151570439338684, |
|
"learning_rate": 1.158530674838449e-05, |
|
"loss": 0.5652, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.4004914004914004, |
|
"grad_norm": 1.2126294374465942, |
|
"learning_rate": 1.155910302085826e-05, |
|
"loss": 0.571, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.402948402948403, |
|
"grad_norm": 1.2168971300125122, |
|
"learning_rate": 1.1532888316654675e-05, |
|
"loss": 0.5791, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.4054054054054055, |
|
"grad_norm": 1.1153680086135864, |
|
"learning_rate": 1.1506662820335208e-05, |
|
"loss": 0.5781, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.4078624078624078, |
|
"grad_norm": 1.1872464418411255, |
|
"learning_rate": 1.1480426716537316e-05, |
|
"loss": 0.5923, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.4103194103194103, |
|
"grad_norm": 1.0620458126068115, |
|
"learning_rate": 1.145418018997313e-05, |
|
"loss": 0.5603, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.4127764127764126, |
|
"grad_norm": 1.0912894010543823, |
|
"learning_rate": 1.1427923425428165e-05, |
|
"loss": 0.6011, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.4152334152334152, |
|
"grad_norm": 1.1529377698898315, |
|
"learning_rate": 1.1401656607760015e-05, |
|
"loss": 0.6114, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.4176904176904177, |
|
"grad_norm": 1.2269554138183594, |
|
"learning_rate": 1.1375379921897052e-05, |
|
"loss": 0.5876, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 1.42014742014742, |
|
"grad_norm": 1.1351017951965332, |
|
"learning_rate": 1.134909355283712e-05, |
|
"loss": 0.5898, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.4226044226044225, |
|
"grad_norm": 1.1139864921569824, |
|
"learning_rate": 1.1322797685646243e-05, |
|
"loss": 0.5656, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.425061425061425, |
|
"grad_norm": 1.0196126699447632, |
|
"learning_rate": 1.1296492505457315e-05, |
|
"loss": 0.5386, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.4275184275184274, |
|
"grad_norm": 1.1085504293441772, |
|
"learning_rate": 1.1270178197468788e-05, |
|
"loss": 0.5934, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 1.42997542997543, |
|
"grad_norm": 1.1703898906707764, |
|
"learning_rate": 1.1243854946943389e-05, |
|
"loss": 0.5998, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.4324324324324325, |
|
"grad_norm": 1.1272860765457153, |
|
"learning_rate": 1.1217522939206796e-05, |
|
"loss": 0.5733, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 1.4348894348894348, |
|
"grad_norm": 1.3168493509292603, |
|
"learning_rate": 1.1191182359646338e-05, |
|
"loss": 0.5752, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.4373464373464373, |
|
"grad_norm": 1.2049705982208252, |
|
"learning_rate": 1.1164833393709707e-05, |
|
"loss": 0.5701, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.4398034398034398, |
|
"grad_norm": 1.1225930452346802, |
|
"learning_rate": 1.1138476226903626e-05, |
|
"loss": 0.561, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.4422604422604421, |
|
"grad_norm": 1.230978012084961, |
|
"learning_rate": 1.1112111044792557e-05, |
|
"loss": 0.6239, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 1.4447174447174447, |
|
"grad_norm": 1.2035282850265503, |
|
"learning_rate": 1.1085738032997397e-05, |
|
"loss": 0.6059, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.4471744471744472, |
|
"grad_norm": 1.062477469444275, |
|
"learning_rate": 1.1059357377194161e-05, |
|
"loss": 0.5714, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 1.4496314496314495, |
|
"grad_norm": 1.0980175733566284, |
|
"learning_rate": 1.103296926311269e-05, |
|
"loss": 0.5742, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.452088452088452, |
|
"grad_norm": 1.0794448852539062, |
|
"learning_rate": 1.1006573876535322e-05, |
|
"loss": 0.5433, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.4545454545454546, |
|
"grad_norm": 1.2062351703643799, |
|
"learning_rate": 1.098017140329561e-05, |
|
"loss": 0.5712, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.457002457002457, |
|
"grad_norm": 1.1053940057754517, |
|
"learning_rate": 1.0953762029276982e-05, |
|
"loss": 0.547, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 1.4594594594594594, |
|
"grad_norm": 1.1719647645950317, |
|
"learning_rate": 1.0927345940411466e-05, |
|
"loss": 0.6152, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.461916461916462, |
|
"grad_norm": 1.2481141090393066, |
|
"learning_rate": 1.0900923322678366e-05, |
|
"loss": 0.5968, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.4643734643734643, |
|
"grad_norm": 1.1074830293655396, |
|
"learning_rate": 1.0874494362102932e-05, |
|
"loss": 0.5624, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.4668304668304668, |
|
"grad_norm": 1.031218409538269, |
|
"learning_rate": 1.0848059244755093e-05, |
|
"loss": 0.5549, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.4692874692874693, |
|
"grad_norm": 1.140859842300415, |
|
"learning_rate": 1.082161815674811e-05, |
|
"loss": 0.5655, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.4717444717444716, |
|
"grad_norm": 1.1622551679611206, |
|
"learning_rate": 1.0795171284237284e-05, |
|
"loss": 0.6138, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 1.4742014742014742, |
|
"grad_norm": 1.0816534757614136, |
|
"learning_rate": 1.0768718813418643e-05, |
|
"loss": 0.5708, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.4766584766584767, |
|
"grad_norm": 1.1631544828414917, |
|
"learning_rate": 1.0742260930527625e-05, |
|
"loss": 0.5568, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.479115479115479, |
|
"grad_norm": 1.2862136363983154, |
|
"learning_rate": 1.0715797821837776e-05, |
|
"loss": 0.5371, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.4815724815724816, |
|
"grad_norm": 1.1199933290481567, |
|
"learning_rate": 1.068932967365943e-05, |
|
"loss": 0.6247, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.484029484029484, |
|
"grad_norm": 1.182577133178711, |
|
"learning_rate": 1.0662856672338398e-05, |
|
"loss": 0.5763, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.4864864864864864, |
|
"grad_norm": 1.2508751153945923, |
|
"learning_rate": 1.0636379004254665e-05, |
|
"loss": 0.5975, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.488943488943489, |
|
"grad_norm": 1.0829459428787231, |
|
"learning_rate": 1.0609896855821069e-05, |
|
"loss": 0.5828, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.4914004914004915, |
|
"grad_norm": 1.1169177293777466, |
|
"learning_rate": 1.0583410413481995e-05, |
|
"loss": 0.5655, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.4938574938574938, |
|
"grad_norm": 1.201220154762268, |
|
"learning_rate": 1.0556919863712053e-05, |
|
"loss": 0.5554, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.4963144963144963, |
|
"grad_norm": 1.085603952407837, |
|
"learning_rate": 1.0530425393014773e-05, |
|
"loss": 0.5567, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.4987714987714988, |
|
"grad_norm": 1.1561790704727173, |
|
"learning_rate": 1.0503927187921291e-05, |
|
"loss": 0.5799, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.5012285012285012, |
|
"grad_norm": 1.1585196256637573, |
|
"learning_rate": 1.0477425434989038e-05, |
|
"loss": 0.5606, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.5036855036855037, |
|
"grad_norm": 1.2159061431884766, |
|
"learning_rate": 1.045092032080041e-05, |
|
"loss": 0.5641, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.5061425061425062, |
|
"grad_norm": 1.1273988485336304, |
|
"learning_rate": 1.0424412031961485e-05, |
|
"loss": 0.5652, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 1.5085995085995085, |
|
"grad_norm": 1.1589583158493042, |
|
"learning_rate": 1.0397900755100678e-05, |
|
"loss": 0.6213, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.511056511056511, |
|
"grad_norm": 1.0823664665222168, |
|
"learning_rate": 1.0371386676867447e-05, |
|
"loss": 0.5555, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.5135135135135136, |
|
"grad_norm": 1.1465460062026978, |
|
"learning_rate": 1.0344869983930975e-05, |
|
"loss": 0.5761, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.515970515970516, |
|
"grad_norm": 1.1848366260528564, |
|
"learning_rate": 1.0318350862978848e-05, |
|
"loss": 0.5642, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 1.5184275184275184, |
|
"grad_norm": 1.058383822441101, |
|
"learning_rate": 1.0291829500715744e-05, |
|
"loss": 0.569, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.520884520884521, |
|
"grad_norm": 1.1151319742202759, |
|
"learning_rate": 1.0265306083862135e-05, |
|
"loss": 0.5399, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 1.5233415233415233, |
|
"grad_norm": 1.128150224685669, |
|
"learning_rate": 1.0238780799152939e-05, |
|
"loss": 0.5654, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.5257985257985258, |
|
"grad_norm": 1.1646603345870972, |
|
"learning_rate": 1.0212253833336237e-05, |
|
"loss": 0.5588, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.5282555282555284, |
|
"grad_norm": 1.0704180002212524, |
|
"learning_rate": 1.0185725373171942e-05, |
|
"loss": 0.5414, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.5307125307125307, |
|
"grad_norm": 1.1394037008285522, |
|
"learning_rate": 1.015919560543049e-05, |
|
"loss": 0.526, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 1.5331695331695332, |
|
"grad_norm": 1.2430076599121094, |
|
"learning_rate": 1.013266471689152e-05, |
|
"loss": 0.6033, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.5356265356265357, |
|
"grad_norm": 1.1466593742370605, |
|
"learning_rate": 1.0106132894342564e-05, |
|
"loss": 0.5498, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.538083538083538, |
|
"grad_norm": 1.121476411819458, |
|
"learning_rate": 1.0079600324577722e-05, |
|
"loss": 0.5734, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.5405405405405406, |
|
"grad_norm": 1.135146975517273, |
|
"learning_rate": 1.005306719439637e-05, |
|
"loss": 0.5479, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.542997542997543, |
|
"grad_norm": 1.0370548963546753, |
|
"learning_rate": 1.0026533690601815e-05, |
|
"loss": 0.5541, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.5454545454545454, |
|
"grad_norm": 1.1773468255996704, |
|
"learning_rate": 1e-05, |
|
"loss": 0.5672, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.547911547911548, |
|
"grad_norm": 1.2109761238098145, |
|
"learning_rate": 9.973466309398187e-06, |
|
"loss": 0.598, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.5503685503685505, |
|
"grad_norm": 1.1260876655578613, |
|
"learning_rate": 9.946932805603635e-06, |
|
"loss": 0.5986, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.5528255528255528, |
|
"grad_norm": 1.2091740369796753, |
|
"learning_rate": 9.92039967542228e-06, |
|
"loss": 0.6053, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.5552825552825553, |
|
"grad_norm": 1.1314700841903687, |
|
"learning_rate": 9.89386710565744e-06, |
|
"loss": 0.5463, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.5577395577395579, |
|
"grad_norm": 1.1741538047790527, |
|
"learning_rate": 9.867335283108481e-06, |
|
"loss": 0.5285, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.5601965601965602, |
|
"grad_norm": 1.104568362236023, |
|
"learning_rate": 9.840804394569512e-06, |
|
"loss": 0.5926, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.5626535626535627, |
|
"grad_norm": 1.2032363414764404, |
|
"learning_rate": 9.81427462682806e-06, |
|
"loss": 0.5756, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.5651105651105652, |
|
"grad_norm": 1.0761467218399048, |
|
"learning_rate": 9.787746166663765e-06, |
|
"loss": 0.566, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.5675675675675675, |
|
"grad_norm": 1.2620768547058105, |
|
"learning_rate": 9.761219200847066e-06, |
|
"loss": 0.6151, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.57002457002457, |
|
"grad_norm": 1.1293340921401978, |
|
"learning_rate": 9.734693916137869e-06, |
|
"loss": 0.5631, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.5724815724815726, |
|
"grad_norm": 1.150578260421753, |
|
"learning_rate": 9.708170499284256e-06, |
|
"loss": 0.5691, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.574938574938575, |
|
"grad_norm": 1.1344029903411865, |
|
"learning_rate": 9.681649137021158e-06, |
|
"loss": 0.5681, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 1.5773955773955772, |
|
"grad_norm": 1.1116544008255005, |
|
"learning_rate": 9.655130016069029e-06, |
|
"loss": 0.5878, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.57985257985258, |
|
"grad_norm": 1.0114622116088867, |
|
"learning_rate": 9.628613323132554e-06, |
|
"loss": 0.5139, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 1.5823095823095823, |
|
"grad_norm": 1.0908740758895874, |
|
"learning_rate": 9.602099244899324e-06, |
|
"loss": 0.5706, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.5847665847665846, |
|
"grad_norm": 1.139978051185608, |
|
"learning_rate": 9.57558796803852e-06, |
|
"loss": 0.5887, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.5872235872235874, |
|
"grad_norm": 1.1135131120681763, |
|
"learning_rate": 9.549079679199592e-06, |
|
"loss": 0.5152, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.5896805896805897, |
|
"grad_norm": 1.0935195684432983, |
|
"learning_rate": 9.522574565010964e-06, |
|
"loss": 0.5599, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.592137592137592, |
|
"grad_norm": 1.159075140953064, |
|
"learning_rate": 9.496072812078712e-06, |
|
"loss": 0.5657, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.5945945945945947, |
|
"grad_norm": 1.1898802518844604, |
|
"learning_rate": 9.46957460698523e-06, |
|
"loss": 0.5783, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 1.597051597051597, |
|
"grad_norm": 1.1011685132980347, |
|
"learning_rate": 9.44308013628795e-06, |
|
"loss": 0.5674, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.5995085995085994, |
|
"grad_norm": 1.160726547241211, |
|
"learning_rate": 9.416589586518009e-06, |
|
"loss": 0.6082, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.6019656019656021, |
|
"grad_norm": 1.1680805683135986, |
|
"learning_rate": 9.390103144178933e-06, |
|
"loss": 0.5194, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.6044226044226044, |
|
"grad_norm": 1.1895872354507446, |
|
"learning_rate": 9.363620995745337e-06, |
|
"loss": 0.5288, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 1.6068796068796067, |
|
"grad_norm": 1.1235671043395996, |
|
"learning_rate": 9.337143327661604e-06, |
|
"loss": 0.5395, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.6093366093366095, |
|
"grad_norm": 1.1525102853775024, |
|
"learning_rate": 9.310670326340576e-06, |
|
"loss": 0.5442, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.6117936117936118, |
|
"grad_norm": 1.1586118936538696, |
|
"learning_rate": 9.284202178162225e-06, |
|
"loss": 0.547, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.6142506142506141, |
|
"grad_norm": 1.177646517753601, |
|
"learning_rate": 9.257739069472375e-06, |
|
"loss": 0.5781, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.6167076167076169, |
|
"grad_norm": 1.5798202753067017, |
|
"learning_rate": 9.23128118658136e-06, |
|
"loss": 0.5414, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.6191646191646192, |
|
"grad_norm": 1.2702497243881226, |
|
"learning_rate": 9.204828715762719e-06, |
|
"loss": 0.5682, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 1.6216216216216215, |
|
"grad_norm": 1.1953880786895752, |
|
"learning_rate": 9.178381843251892e-06, |
|
"loss": 0.5605, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.6240786240786242, |
|
"grad_norm": 1.2357171773910522, |
|
"learning_rate": 9.151940755244912e-06, |
|
"loss": 0.5783, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 1.6265356265356266, |
|
"grad_norm": 1.1717873811721802, |
|
"learning_rate": 9.125505637897072e-06, |
|
"loss": 0.5778, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.6289926289926289, |
|
"grad_norm": 1.1912225484848022, |
|
"learning_rate": 9.09907667732164e-06, |
|
"loss": 0.5197, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.6314496314496314, |
|
"grad_norm": 1.115591287612915, |
|
"learning_rate": 9.072654059588534e-06, |
|
"loss": 0.5781, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.633906633906634, |
|
"grad_norm": 1.177725076675415, |
|
"learning_rate": 9.046237970723022e-06, |
|
"loss": 0.582, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.6363636363636362, |
|
"grad_norm": 1.127977728843689, |
|
"learning_rate": 9.019828596704394e-06, |
|
"loss": 0.5365, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.6388206388206388, |
|
"grad_norm": 1.211154580116272, |
|
"learning_rate": 8.99342612346468e-06, |
|
"loss": 0.5424, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 1.6412776412776413, |
|
"grad_norm": 1.160064458847046, |
|
"learning_rate": 8.967030736887315e-06, |
|
"loss": 0.5525, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.6437346437346436, |
|
"grad_norm": 1.1632789373397827, |
|
"learning_rate": 8.94064262280584e-06, |
|
"loss": 0.5635, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.6461916461916462, |
|
"grad_norm": 1.0825740098953247, |
|
"learning_rate": 8.914261967002605e-06, |
|
"loss": 0.5794, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.6486486486486487, |
|
"grad_norm": 1.1623291969299316, |
|
"learning_rate": 8.887888955207444e-06, |
|
"loss": 0.5661, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.651105651105651, |
|
"grad_norm": 1.3267773389816284, |
|
"learning_rate": 8.861523773096379e-06, |
|
"loss": 0.5771, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.6535626535626535, |
|
"grad_norm": 1.2022407054901123, |
|
"learning_rate": 8.835166606290295e-06, |
|
"loss": 0.5586, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 1.656019656019656, |
|
"grad_norm": 1.211409568786621, |
|
"learning_rate": 8.808817640353662e-06, |
|
"loss": 0.574, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.6584766584766584, |
|
"grad_norm": 1.0847737789154053, |
|
"learning_rate": 8.782477060793211e-06, |
|
"loss": 0.5778, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.660933660933661, |
|
"grad_norm": 1.048552393913269, |
|
"learning_rate": 8.756145053056615e-06, |
|
"loss": 0.5541, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.6633906633906634, |
|
"grad_norm": 1.2484380006790161, |
|
"learning_rate": 8.729821802531213e-06, |
|
"loss": 0.6039, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 1.6658476658476657, |
|
"grad_norm": 1.3350811004638672, |
|
"learning_rate": 8.703507494542692e-06, |
|
"loss": 0.5658, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.6683046683046683, |
|
"grad_norm": 1.1048815250396729, |
|
"learning_rate": 8.67720231435376e-06, |
|
"loss": 0.5723, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 1.6707616707616708, |
|
"grad_norm": 1.128318190574646, |
|
"learning_rate": 8.650906447162884e-06, |
|
"loss": 0.602, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.6732186732186731, |
|
"grad_norm": 1.0732570886611938, |
|
"learning_rate": 8.624620078102952e-06, |
|
"loss": 0.5558, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.6756756756756757, |
|
"grad_norm": 1.1415199041366577, |
|
"learning_rate": 8.59834339223999e-06, |
|
"loss": 0.5847, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.6781326781326782, |
|
"grad_norm": 1.2452774047851562, |
|
"learning_rate": 8.572076574571838e-06, |
|
"loss": 0.5393, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 1.6805896805896805, |
|
"grad_norm": 1.1395483016967773, |
|
"learning_rate": 8.545819810026871e-06, |
|
"loss": 0.5818, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.683046683046683, |
|
"grad_norm": 1.126442551612854, |
|
"learning_rate": 8.519573283462688e-06, |
|
"loss": 0.577, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.6855036855036856, |
|
"grad_norm": 1.2619662284851074, |
|
"learning_rate": 8.493337179664794e-06, |
|
"loss": 0.6061, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.6879606879606879, |
|
"grad_norm": 1.1817083358764648, |
|
"learning_rate": 8.467111683345327e-06, |
|
"loss": 0.5834, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.6904176904176904, |
|
"grad_norm": 1.1162532567977905, |
|
"learning_rate": 8.440896979141743e-06, |
|
"loss": 0.5752, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.692874692874693, |
|
"grad_norm": 1.238197922706604, |
|
"learning_rate": 8.414693251615513e-06, |
|
"loss": 0.5541, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 1.6953316953316953, |
|
"grad_norm": 1.0888656377792358, |
|
"learning_rate": 8.388500685250826e-06, |
|
"loss": 0.5887, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.6977886977886978, |
|
"grad_norm": 1.2266663312911987, |
|
"learning_rate": 8.362319464453301e-06, |
|
"loss": 0.5747, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 1.7002457002457003, |
|
"grad_norm": 1.132505178451538, |
|
"learning_rate": 8.336149773548679e-06, |
|
"loss": 0.5689, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 1.7027027027027026, |
|
"grad_norm": 1.0732828378677368, |
|
"learning_rate": 8.309991796781512e-06, |
|
"loss": 0.5357, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 1.7051597051597052, |
|
"grad_norm": 1.0756043195724487, |
|
"learning_rate": 8.283845718313894e-06, |
|
"loss": 0.559, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 1.7076167076167077, |
|
"grad_norm": 1.0958280563354492, |
|
"learning_rate": 8.257711722224153e-06, |
|
"loss": 0.5177, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.71007371007371, |
|
"grad_norm": 1.0943565368652344, |
|
"learning_rate": 8.231589992505536e-06, |
|
"loss": 0.5725, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.7125307125307125, |
|
"grad_norm": 1.1087898015975952, |
|
"learning_rate": 8.205480713064947e-06, |
|
"loss": 0.574, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 1.714987714987715, |
|
"grad_norm": 1.128554344177246, |
|
"learning_rate": 8.17938406772163e-06, |
|
"loss": 0.594, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.7174447174447174, |
|
"grad_norm": 1.1335420608520508, |
|
"learning_rate": 8.153300240205874e-06, |
|
"loss": 0.5724, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 1.71990171990172, |
|
"grad_norm": 1.1042388677597046, |
|
"learning_rate": 8.12722941415773e-06, |
|
"loss": 0.573, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.7223587223587224, |
|
"grad_norm": 1.1227362155914307, |
|
"learning_rate": 8.101171773125716e-06, |
|
"loss": 0.5123, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 1.7248157248157248, |
|
"grad_norm": 1.2100160121917725, |
|
"learning_rate": 8.075127500565525e-06, |
|
"loss": 0.5836, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.7272727272727273, |
|
"grad_norm": 1.1813348531723022, |
|
"learning_rate": 8.04909677983872e-06, |
|
"loss": 0.5789, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 1.7297297297297298, |
|
"grad_norm": 1.216829538345337, |
|
"learning_rate": 8.02307979421146e-06, |
|
"loss": 0.6077, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.7321867321867321, |
|
"grad_norm": 1.1228618621826172, |
|
"learning_rate": 7.99707672685321e-06, |
|
"loss": 0.5049, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.7346437346437347, |
|
"grad_norm": 1.1116825342178345, |
|
"learning_rate": 7.971087760835434e-06, |
|
"loss": 0.5949, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 1.7371007371007372, |
|
"grad_norm": 1.1956136226654053, |
|
"learning_rate": 7.945113079130323e-06, |
|
"loss": 0.5613, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 1.7395577395577395, |
|
"grad_norm": 1.1992087364196777, |
|
"learning_rate": 7.9191528646095e-06, |
|
"loss": 0.5768, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 1.742014742014742, |
|
"grad_norm": 1.2149180173873901, |
|
"learning_rate": 7.89320730004274e-06, |
|
"loss": 0.5682, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 1.7444717444717446, |
|
"grad_norm": 1.0318468809127808, |
|
"learning_rate": 7.867276568096662e-06, |
|
"loss": 0.5533, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.746928746928747, |
|
"grad_norm": 1.1024391651153564, |
|
"learning_rate": 7.84136085133347e-06, |
|
"loss": 0.5543, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 1.7493857493857494, |
|
"grad_norm": 1.1635844707489014, |
|
"learning_rate": 7.815460332209656e-06, |
|
"loss": 0.5954, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 1.751842751842752, |
|
"grad_norm": 1.251796007156372, |
|
"learning_rate": 7.789575193074703e-06, |
|
"loss": 0.5956, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 1.7542997542997543, |
|
"grad_norm": 1.1243458986282349, |
|
"learning_rate": 7.763705616169825e-06, |
|
"loss": 0.5409, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 1.7567567567567568, |
|
"grad_norm": 1.0625543594360352, |
|
"learning_rate": 7.737851783626672e-06, |
|
"loss": 0.5581, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.7592137592137593, |
|
"grad_norm": 1.2470191717147827, |
|
"learning_rate": 7.712013877466032e-06, |
|
"loss": 0.6114, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 1.7616707616707616, |
|
"grad_norm": 1.011608362197876, |
|
"learning_rate": 7.686192079596586e-06, |
|
"loss": 0.5524, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 1.7641277641277642, |
|
"grad_norm": 1.0156747102737427, |
|
"learning_rate": 7.660386571813593e-06, |
|
"loss": 0.5425, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 1.7665847665847667, |
|
"grad_norm": 1.1418312788009644, |
|
"learning_rate": 7.634597535797633e-06, |
|
"loss": 0.5418, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 1.769041769041769, |
|
"grad_norm": 1.1201746463775635, |
|
"learning_rate": 7.608825153113305e-06, |
|
"loss": 0.5863, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.7714987714987716, |
|
"grad_norm": 1.1697362661361694, |
|
"learning_rate": 7.5830696052079754e-06, |
|
"loss": 0.5746, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 1.773955773955774, |
|
"grad_norm": 1.1003642082214355, |
|
"learning_rate": 7.557331073410486e-06, |
|
"loss": 0.597, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 1.7764127764127764, |
|
"grad_norm": 1.1120378971099854, |
|
"learning_rate": 7.531609738929865e-06, |
|
"loss": 0.5934, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 1.7788697788697787, |
|
"grad_norm": 1.1342380046844482, |
|
"learning_rate": 7.5059057828540815e-06, |
|
"loss": 0.5819, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 1.7813267813267815, |
|
"grad_norm": 1.178702473640442, |
|
"learning_rate": 7.480219386148751e-06, |
|
"loss": 0.5867, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.7837837837837838, |
|
"grad_norm": 1.1170891523361206, |
|
"learning_rate": 7.454550729655853e-06, |
|
"loss": 0.5619, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 1.786240786240786, |
|
"grad_norm": 1.1073411703109741, |
|
"learning_rate": 7.428899994092482e-06, |
|
"loss": 0.5536, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 1.7886977886977888, |
|
"grad_norm": 1.0886826515197754, |
|
"learning_rate": 7.403267360049557e-06, |
|
"loss": 0.5302, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 1.7911547911547911, |
|
"grad_norm": 1.1056452989578247, |
|
"learning_rate": 7.377653007990559e-06, |
|
"loss": 0.5706, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 1.7936117936117935, |
|
"grad_norm": 1.1150050163269043, |
|
"learning_rate": 7.3520571182502465e-06, |
|
"loss": 0.5991, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.7960687960687962, |
|
"grad_norm": 1.08351731300354, |
|
"learning_rate": 7.326479871033408e-06, |
|
"loss": 0.5642, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 1.7985257985257985, |
|
"grad_norm": 1.0654759407043457, |
|
"learning_rate": 7.300921446413582e-06, |
|
"loss": 0.5203, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 1.8009828009828008, |
|
"grad_norm": 1.209181547164917, |
|
"learning_rate": 7.275382024331773e-06, |
|
"loss": 0.5787, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 1.8034398034398036, |
|
"grad_norm": 1.1766862869262695, |
|
"learning_rate": 7.249861784595218e-06, |
|
"loss": 0.5681, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 1.805896805896806, |
|
"grad_norm": 1.110587477684021, |
|
"learning_rate": 7.2243609068761e-06, |
|
"loss": 0.5994, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.8083538083538082, |
|
"grad_norm": 1.2170779705047607, |
|
"learning_rate": 7.198879570710272e-06, |
|
"loss": 0.6055, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 1.810810810810811, |
|
"grad_norm": 1.0853193998336792, |
|
"learning_rate": 7.173417955496025e-06, |
|
"loss": 0.5567, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 1.8132678132678133, |
|
"grad_norm": 1.0811327695846558, |
|
"learning_rate": 7.1479762404927955e-06, |
|
"loss": 0.5671, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 1.8157248157248156, |
|
"grad_norm": 1.1481190919876099, |
|
"learning_rate": 7.122554604819925e-06, |
|
"loss": 0.5699, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 1.8181818181818183, |
|
"grad_norm": 1.2198657989501953, |
|
"learning_rate": 7.097153227455379e-06, |
|
"loss": 0.5912, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.8206388206388207, |
|
"grad_norm": 1.1447055339813232, |
|
"learning_rate": 7.071772287234497e-06, |
|
"loss": 0.6129, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 1.823095823095823, |
|
"grad_norm": 1.19182288646698, |
|
"learning_rate": 7.046411962848744e-06, |
|
"loss": 0.5565, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 1.8255528255528255, |
|
"grad_norm": 1.06711745262146, |
|
"learning_rate": 7.021072432844427e-06, |
|
"loss": 0.5472, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 1.828009828009828, |
|
"grad_norm": 1.1729991436004639, |
|
"learning_rate": 6.995753875621465e-06, |
|
"loss": 0.5959, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 1.8304668304668303, |
|
"grad_norm": 1.3030084371566772, |
|
"learning_rate": 6.970456469432116e-06, |
|
"loss": 0.5969, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.8329238329238329, |
|
"grad_norm": 1.085019588470459, |
|
"learning_rate": 6.945180392379729e-06, |
|
"loss": 0.5444, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 1.8353808353808354, |
|
"grad_norm": 1.1228212118148804, |
|
"learning_rate": 6.9199258224174774e-06, |
|
"loss": 0.5572, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 1.8378378378378377, |
|
"grad_norm": 1.219850778579712, |
|
"learning_rate": 6.894692937347127e-06, |
|
"loss": 0.5937, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 1.8402948402948403, |
|
"grad_norm": 1.0725655555725098, |
|
"learning_rate": 6.869481914817779e-06, |
|
"loss": 0.5827, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 1.8427518427518428, |
|
"grad_norm": 1.117969036102295, |
|
"learning_rate": 6.844292932324597e-06, |
|
"loss": 0.5589, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.845208845208845, |
|
"grad_norm": 1.0720747709274292, |
|
"learning_rate": 6.819126167207586e-06, |
|
"loss": 0.5062, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 1.8476658476658476, |
|
"grad_norm": 1.0815181732177734, |
|
"learning_rate": 6.793981796650333e-06, |
|
"loss": 0.5798, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 1.8501228501228502, |
|
"grad_norm": 1.0702989101409912, |
|
"learning_rate": 6.768859997678751e-06, |
|
"loss": 0.5327, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 1.8525798525798525, |
|
"grad_norm": 1.0632387399673462, |
|
"learning_rate": 6.743760947159847e-06, |
|
"loss": 0.5849, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 1.855036855036855, |
|
"grad_norm": 1.113589882850647, |
|
"learning_rate": 6.718684821800468e-06, |
|
"loss": 0.5947, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.8574938574938575, |
|
"grad_norm": 1.2338849306106567, |
|
"learning_rate": 6.693631798146061e-06, |
|
"loss": 0.61, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 1.8599508599508598, |
|
"grad_norm": 1.028483271598816, |
|
"learning_rate": 6.668602052579425e-06, |
|
"loss": 0.5944, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 1.8624078624078624, |
|
"grad_norm": 1.157347321510315, |
|
"learning_rate": 6.643595761319475e-06, |
|
"loss": 0.5437, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 1.864864864864865, |
|
"grad_norm": 1.0857754945755005, |
|
"learning_rate": 6.61861310042e-06, |
|
"loss": 0.5505, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 1.8673218673218672, |
|
"grad_norm": 1.0336072444915771, |
|
"learning_rate": 6.593654245768415e-06, |
|
"loss": 0.5798, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.8697788697788698, |
|
"grad_norm": 1.1750551462173462, |
|
"learning_rate": 6.5687193730845375e-06, |
|
"loss": 0.554, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 1.8722358722358723, |
|
"grad_norm": 1.137463927268982, |
|
"learning_rate": 6.543808657919345e-06, |
|
"loss": 0.5542, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 1.8746928746928746, |
|
"grad_norm": 1.2450264692306519, |
|
"learning_rate": 6.518922275653724e-06, |
|
"loss": 0.5759, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 1.8771498771498771, |
|
"grad_norm": 1.132763147354126, |
|
"learning_rate": 6.494060401497262e-06, |
|
"loss": 0.5395, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 1.8796068796068797, |
|
"grad_norm": 1.107035756111145, |
|
"learning_rate": 6.469223210486992e-06, |
|
"loss": 0.5819, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.882063882063882, |
|
"grad_norm": 1.2326126098632812, |
|
"learning_rate": 6.444410877486178e-06, |
|
"loss": 0.6089, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 1.8845208845208845, |
|
"grad_norm": 1.1576228141784668, |
|
"learning_rate": 6.419623577183056e-06, |
|
"loss": 0.5568, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 1.886977886977887, |
|
"grad_norm": 1.259473443031311, |
|
"learning_rate": 6.394861484089641e-06, |
|
"loss": 0.5501, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.8894348894348894, |
|
"grad_norm": 1.0849790573120117, |
|
"learning_rate": 6.370124772540469e-06, |
|
"loss": 0.5554, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 1.8918918918918919, |
|
"grad_norm": 1.0747138261795044, |
|
"learning_rate": 6.345413616691385e-06, |
|
"loss": 0.5572, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.8943488943488944, |
|
"grad_norm": 1.2411308288574219, |
|
"learning_rate": 6.320728190518308e-06, |
|
"loss": 0.5823, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 1.8968058968058967, |
|
"grad_norm": 1.0845237970352173, |
|
"learning_rate": 6.29606866781602e-06, |
|
"loss": 0.5564, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 1.8992628992628993, |
|
"grad_norm": 1.0935763120651245, |
|
"learning_rate": 6.2714352221969155e-06, |
|
"loss": 0.5657, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 1.9017199017199018, |
|
"grad_norm": 1.171252727508545, |
|
"learning_rate": 6.246828027089811e-06, |
|
"loss": 0.5762, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 1.904176904176904, |
|
"grad_norm": 1.205479383468628, |
|
"learning_rate": 6.222247255738706e-06, |
|
"loss": 0.5382, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.9066339066339066, |
|
"grad_norm": 1.0936667919158936, |
|
"learning_rate": 6.197693081201568e-06, |
|
"loss": 0.5512, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 1.9090909090909092, |
|
"grad_norm": 1.1234700679779053, |
|
"learning_rate": 6.173165676349103e-06, |
|
"loss": 0.5765, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 1.9115479115479115, |
|
"grad_norm": 1.1193130016326904, |
|
"learning_rate": 6.14866521386356e-06, |
|
"loss": 0.5407, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 1.914004914004914, |
|
"grad_norm": 1.1735546588897705, |
|
"learning_rate": 6.124191866237504e-06, |
|
"loss": 0.5759, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 1.9164619164619165, |
|
"grad_norm": 1.0813647508621216, |
|
"learning_rate": 6.0997458057725875e-06, |
|
"loss": 0.5435, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.9189189189189189, |
|
"grad_norm": 1.0532335042953491, |
|
"learning_rate": 6.075327204578363e-06, |
|
"loss": 0.5623, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 1.9213759213759214, |
|
"grad_norm": 1.0584250688552856, |
|
"learning_rate": 6.0509362345710585e-06, |
|
"loss": 0.5828, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 1.923832923832924, |
|
"grad_norm": 1.1264971494674683, |
|
"learning_rate": 6.026573067472366e-06, |
|
"loss": 0.591, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 1.9262899262899262, |
|
"grad_norm": 1.1128865480422974, |
|
"learning_rate": 6.00223787480823e-06, |
|
"loss": 0.5407, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 1.9287469287469288, |
|
"grad_norm": 1.1384304761886597, |
|
"learning_rate": 5.97793082790765e-06, |
|
"loss": 0.5633, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.9312039312039313, |
|
"grad_norm": 1.0851879119873047, |
|
"learning_rate": 5.953652097901468e-06, |
|
"loss": 0.5651, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 1.9336609336609336, |
|
"grad_norm": 1.2878016233444214, |
|
"learning_rate": 5.929401855721162e-06, |
|
"loss": 0.5841, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 1.9361179361179361, |
|
"grad_norm": 1.143180251121521, |
|
"learning_rate": 5.905180272097648e-06, |
|
"loss": 0.5518, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 1.9385749385749387, |
|
"grad_norm": 1.147679090499878, |
|
"learning_rate": 5.880987517560075e-06, |
|
"loss": 0.5335, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 1.941031941031941, |
|
"grad_norm": 1.084428071975708, |
|
"learning_rate": 5.856823762434618e-06, |
|
"loss": 0.5145, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.9434889434889435, |
|
"grad_norm": 1.1757539510726929, |
|
"learning_rate": 5.832689176843291e-06, |
|
"loss": 0.6105, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 1.945945945945946, |
|
"grad_norm": 1.2322425842285156, |
|
"learning_rate": 5.808583930702739e-06, |
|
"loss": 0.5917, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 1.9484029484029484, |
|
"grad_norm": 1.0495308637619019, |
|
"learning_rate": 5.784508193723058e-06, |
|
"loss": 0.555, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 1.950859950859951, |
|
"grad_norm": 1.2324461936950684, |
|
"learning_rate": 5.7604621354065704e-06, |
|
"loss": 0.5557, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 1.9533169533169534, |
|
"grad_norm": 1.0863914489746094, |
|
"learning_rate": 5.73644592504666e-06, |
|
"loss": 0.5689, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.9557739557739557, |
|
"grad_norm": 1.1552340984344482, |
|
"learning_rate": 5.712459731726577e-06, |
|
"loss": 0.5918, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 1.9582309582309583, |
|
"grad_norm": 1.1713007688522339, |
|
"learning_rate": 5.688503724318217e-06, |
|
"loss": 0.5743, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 1.9606879606879608, |
|
"grad_norm": 1.267248272895813, |
|
"learning_rate": 5.6645780714809814e-06, |
|
"loss": 0.5988, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 1.9631449631449631, |
|
"grad_norm": 1.1006453037261963, |
|
"learning_rate": 5.640682941660547e-06, |
|
"loss": 0.5526, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 1.9656019656019657, |
|
"grad_norm": 1.116868257522583, |
|
"learning_rate": 5.616818503087704e-06, |
|
"loss": 0.5802, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.9680589680589682, |
|
"grad_norm": 1.2064473628997803, |
|
"learning_rate": 5.592984923777156e-06, |
|
"loss": 0.5626, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 1.9705159705159705, |
|
"grad_norm": 1.1360725164413452, |
|
"learning_rate": 5.5691823715263646e-06, |
|
"loss": 0.5534, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 1.972972972972973, |
|
"grad_norm": 1.1609777212142944, |
|
"learning_rate": 5.545411013914329e-06, |
|
"loss": 0.5672, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 1.9754299754299756, |
|
"grad_norm": 1.1151705980300903, |
|
"learning_rate": 5.521671018300436e-06, |
|
"loss": 0.5639, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 1.9778869778869779, |
|
"grad_norm": 1.1209923028945923, |
|
"learning_rate": 5.497962551823266e-06, |
|
"loss": 0.5873, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.9803439803439802, |
|
"grad_norm": 1.110759973526001, |
|
"learning_rate": 5.4742857813994356e-06, |
|
"loss": 0.5441, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 1.982800982800983, |
|
"grad_norm": 1.2078731060028076, |
|
"learning_rate": 5.450640873722395e-06, |
|
"loss": 0.54, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 1.9852579852579852, |
|
"grad_norm": 1.1308197975158691, |
|
"learning_rate": 5.427027995261269e-06, |
|
"loss": 0.5208, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 1.9877149877149876, |
|
"grad_norm": 1.201937198638916, |
|
"learning_rate": 5.403447312259702e-06, |
|
"loss": 0.5978, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 1.9901719901719903, |
|
"grad_norm": 1.1013566255569458, |
|
"learning_rate": 5.379898990734641e-06, |
|
"loss": 0.5676, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.9926289926289926, |
|
"grad_norm": 1.2219934463500977, |
|
"learning_rate": 5.356383196475226e-06, |
|
"loss": 0.5878, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 1.995085995085995, |
|
"grad_norm": 1.0953749418258667, |
|
"learning_rate": 5.332900095041568e-06, |
|
"loss": 0.5594, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 1.9975429975429977, |
|
"grad_norm": 1.1119327545166016, |
|
"learning_rate": 5.3094498517636324e-06, |
|
"loss": 0.5875, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.0389142036437988, |
|
"learning_rate": 5.286032631740023e-06, |
|
"loss": 0.4483, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 2.0024570024570023, |
|
"grad_norm": 1.7053855657577515, |
|
"learning_rate": 5.262648599836873e-06, |
|
"loss": 0.3474, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 2.004914004914005, |
|
"grad_norm": 1.522942066192627, |
|
"learning_rate": 5.239297920686641e-06, |
|
"loss": 0.311, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 2.0073710073710074, |
|
"grad_norm": 1.504423975944519, |
|
"learning_rate": 5.215980758686978e-06, |
|
"loss": 0.3016, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 2.0098280098280097, |
|
"grad_norm": 1.269704818725586, |
|
"learning_rate": 5.192697277999557e-06, |
|
"loss": 0.319, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 2.0122850122850124, |
|
"grad_norm": 1.2130259275436401, |
|
"learning_rate": 5.169447642548928e-06, |
|
"loss": 0.3009, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 2.0147420147420148, |
|
"grad_norm": 1.2411495447158813, |
|
"learning_rate": 5.146232016021353e-06, |
|
"loss": 0.3164, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.017199017199017, |
|
"grad_norm": 1.1585211753845215, |
|
"learning_rate": 5.1230505618636575e-06, |
|
"loss": 0.2941, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 2.01965601965602, |
|
"grad_norm": 1.374408483505249, |
|
"learning_rate": 5.09990344328208e-06, |
|
"loss": 0.3249, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 2.022113022113022, |
|
"grad_norm": 1.8337355852127075, |
|
"learning_rate": 5.076790823241131e-06, |
|
"loss": 0.3264, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 2.0245700245700244, |
|
"grad_norm": 1.4516721963882446, |
|
"learning_rate": 5.053712864462432e-06, |
|
"loss": 0.2907, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 2.027027027027027, |
|
"grad_norm": 1.3430378437042236, |
|
"learning_rate": 5.030669729423572e-06, |
|
"loss": 0.2621, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 2.0294840294840295, |
|
"grad_norm": 1.6277974843978882, |
|
"learning_rate": 5.0076615803569815e-06, |
|
"loss": 0.3386, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 2.031941031941032, |
|
"grad_norm": 1.6233184337615967, |
|
"learning_rate": 4.984688579248757e-06, |
|
"loss": 0.3282, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 2.0343980343980346, |
|
"grad_norm": 1.2828155755996704, |
|
"learning_rate": 4.961750887837558e-06, |
|
"loss": 0.29, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 2.036855036855037, |
|
"grad_norm": 1.379706621170044, |
|
"learning_rate": 4.938848667613436e-06, |
|
"loss": 0.3222, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 2.039312039312039, |
|
"grad_norm": 1.233864188194275, |
|
"learning_rate": 4.915982079816732e-06, |
|
"loss": 0.2746, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.041769041769042, |
|
"grad_norm": 1.2086654901504517, |
|
"learning_rate": 4.893151285436891e-06, |
|
"loss": 0.2961, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 2.0442260442260443, |
|
"grad_norm": 1.1589128971099854, |
|
"learning_rate": 4.870356445211388e-06, |
|
"loss": 0.3014, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 2.0466830466830466, |
|
"grad_norm": 1.0225163698196411, |
|
"learning_rate": 4.84759771962455e-06, |
|
"loss": 0.2605, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 2.0491400491400493, |
|
"grad_norm": 1.1696596145629883, |
|
"learning_rate": 4.82487526890645e-06, |
|
"loss": 0.2958, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 2.0515970515970516, |
|
"grad_norm": 1.2834105491638184, |
|
"learning_rate": 4.802189253031764e-06, |
|
"loss": 0.3001, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 2.054054054054054, |
|
"grad_norm": 1.0881999731063843, |
|
"learning_rate": 4.779539831718668e-06, |
|
"loss": 0.2837, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 2.0565110565110567, |
|
"grad_norm": 1.2137595415115356, |
|
"learning_rate": 4.756927164427685e-06, |
|
"loss": 0.2729, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 2.058968058968059, |
|
"grad_norm": 1.3076894283294678, |
|
"learning_rate": 4.734351410360577e-06, |
|
"loss": 0.2936, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 2.0614250614250613, |
|
"grad_norm": 1.243006944656372, |
|
"learning_rate": 4.711812728459233e-06, |
|
"loss": 0.3079, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 2.063882063882064, |
|
"grad_norm": 1.1716985702514648, |
|
"learning_rate": 4.689311277404529e-06, |
|
"loss": 0.2839, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.0663390663390664, |
|
"grad_norm": 1.2072100639343262, |
|
"learning_rate": 4.666847215615225e-06, |
|
"loss": 0.2894, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 2.0687960687960687, |
|
"grad_norm": 1.3369587659835815, |
|
"learning_rate": 4.644420701246847e-06, |
|
"loss": 0.3135, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 2.0712530712530715, |
|
"grad_norm": 1.0535438060760498, |
|
"learning_rate": 4.622031892190579e-06, |
|
"loss": 0.2848, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 2.0737100737100738, |
|
"grad_norm": 1.1448948383331299, |
|
"learning_rate": 4.599680946072127e-06, |
|
"loss": 0.2981, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 2.076167076167076, |
|
"grad_norm": 1.163978099822998, |
|
"learning_rate": 4.57736802025065e-06, |
|
"loss": 0.2698, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 2.078624078624079, |
|
"grad_norm": 1.185007929801941, |
|
"learning_rate": 4.555093271817617e-06, |
|
"loss": 0.2822, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 2.081081081081081, |
|
"grad_norm": 1.2226570844650269, |
|
"learning_rate": 4.532856857595714e-06, |
|
"loss": 0.3142, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 2.0835380835380835, |
|
"grad_norm": 1.1364682912826538, |
|
"learning_rate": 4.51065893413774e-06, |
|
"loss": 0.2684, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 2.085995085995086, |
|
"grad_norm": 1.1746481657028198, |
|
"learning_rate": 4.488499657725511e-06, |
|
"loss": 0.2883, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 2.0884520884520885, |
|
"grad_norm": 1.126842975616455, |
|
"learning_rate": 4.466379184368747e-06, |
|
"loss": 0.2986, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.090909090909091, |
|
"grad_norm": 1.199286937713623, |
|
"learning_rate": 4.444297669803981e-06, |
|
"loss": 0.295, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 2.093366093366093, |
|
"grad_norm": 1.1583151817321777, |
|
"learning_rate": 4.422255269493455e-06, |
|
"loss": 0.2661, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 2.095823095823096, |
|
"grad_norm": 1.1596896648406982, |
|
"learning_rate": 4.400252138624047e-06, |
|
"loss": 0.2743, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 2.098280098280098, |
|
"grad_norm": 1.1054956912994385, |
|
"learning_rate": 4.378288432106151e-06, |
|
"loss": 0.2925, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 2.100737100737101, |
|
"grad_norm": 1.0872220993041992, |
|
"learning_rate": 4.356364304572596e-06, |
|
"loss": 0.2562, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 2.1031941031941033, |
|
"grad_norm": 1.1248897314071655, |
|
"learning_rate": 4.334479910377577e-06, |
|
"loss": 0.2926, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 2.1056511056511056, |
|
"grad_norm": 1.1297627687454224, |
|
"learning_rate": 4.312635403595532e-06, |
|
"loss": 0.2948, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 2.108108108108108, |
|
"grad_norm": 1.2067081928253174, |
|
"learning_rate": 4.290830938020087e-06, |
|
"loss": 0.3015, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 2.1105651105651106, |
|
"grad_norm": 1.1445972919464111, |
|
"learning_rate": 4.269066667162956e-06, |
|
"loss": 0.2866, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 2.113022113022113, |
|
"grad_norm": 1.2343792915344238, |
|
"learning_rate": 4.247342744252883e-06, |
|
"loss": 0.2957, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.1154791154791153, |
|
"grad_norm": 1.199395775794983, |
|
"learning_rate": 4.2256593222345185e-06, |
|
"loss": 0.2869, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 2.117936117936118, |
|
"grad_norm": 1.1511932611465454, |
|
"learning_rate": 4.2040165537674e-06, |
|
"loss": 0.3021, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 2.1203931203931203, |
|
"grad_norm": 1.146592140197754, |
|
"learning_rate": 4.182414591224834e-06, |
|
"loss": 0.3078, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 2.1228501228501226, |
|
"grad_norm": 1.0966308116912842, |
|
"learning_rate": 4.160853586692839e-06, |
|
"loss": 0.273, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 2.1253071253071254, |
|
"grad_norm": 1.1649693250656128, |
|
"learning_rate": 4.139333691969071e-06, |
|
"loss": 0.287, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 2.1277641277641277, |
|
"grad_norm": 1.0332820415496826, |
|
"learning_rate": 4.1178550585617694e-06, |
|
"loss": 0.2918, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 2.13022113022113, |
|
"grad_norm": 1.2864841222763062, |
|
"learning_rate": 4.096417837688666e-06, |
|
"loss": 0.3119, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 2.1326781326781328, |
|
"grad_norm": 1.1031755208969116, |
|
"learning_rate": 4.075022180275935e-06, |
|
"loss": 0.296, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 2.135135135135135, |
|
"grad_norm": 1.1938380002975464, |
|
"learning_rate": 4.053668236957135e-06, |
|
"loss": 0.2908, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 2.1375921375921374, |
|
"grad_norm": 1.0921452045440674, |
|
"learning_rate": 4.032356158072132e-06, |
|
"loss": 0.2943, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.14004914004914, |
|
"grad_norm": 1.2852400541305542, |
|
"learning_rate": 4.011086093666057e-06, |
|
"loss": 0.2911, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 2.1425061425061425, |
|
"grad_norm": 1.3049036264419556, |
|
"learning_rate": 3.9898581934882365e-06, |
|
"loss": 0.2957, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 2.1449631449631448, |
|
"grad_norm": 1.2709492444992065, |
|
"learning_rate": 3.96867260699116e-06, |
|
"loss": 0.3144, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 2.1474201474201475, |
|
"grad_norm": 1.1413925886154175, |
|
"learning_rate": 3.947529483329388e-06, |
|
"loss": 0.262, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 2.14987714987715, |
|
"grad_norm": 1.2879313230514526, |
|
"learning_rate": 3.92642897135855e-06, |
|
"loss": 0.2973, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 2.152334152334152, |
|
"grad_norm": 1.30074942111969, |
|
"learning_rate": 3.905371219634257e-06, |
|
"loss": 0.305, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 2.154791154791155, |
|
"grad_norm": 1.1726915836334229, |
|
"learning_rate": 3.884356376411089e-06, |
|
"loss": 0.2893, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 2.157248157248157, |
|
"grad_norm": 1.1194170713424683, |
|
"learning_rate": 3.863384589641509e-06, |
|
"loss": 0.2986, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 2.1597051597051595, |
|
"grad_norm": 1.1586145162582397, |
|
"learning_rate": 3.8424560069748705e-06, |
|
"loss": 0.2887, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 2.1621621621621623, |
|
"grad_norm": 1.1653716564178467, |
|
"learning_rate": 3.821570775756339e-06, |
|
"loss": 0.2736, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.1646191646191646, |
|
"grad_norm": 1.1076536178588867, |
|
"learning_rate": 3.8007290430258712e-06, |
|
"loss": 0.2893, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 2.167076167076167, |
|
"grad_norm": 1.2675201892852783, |
|
"learning_rate": 3.779930955517187e-06, |
|
"loss": 0.2954, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 2.1695331695331697, |
|
"grad_norm": 1.0717624425888062, |
|
"learning_rate": 3.759176659656717e-06, |
|
"loss": 0.289, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 2.171990171990172, |
|
"grad_norm": 1.171949863433838, |
|
"learning_rate": 3.7384663015625856e-06, |
|
"loss": 0.3027, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 2.1744471744471743, |
|
"grad_norm": 1.057793140411377, |
|
"learning_rate": 3.7178000270435765e-06, |
|
"loss": 0.2767, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 2.176904176904177, |
|
"grad_norm": 1.1228421926498413, |
|
"learning_rate": 3.697177981598116e-06, |
|
"loss": 0.2935, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 2.1793611793611793, |
|
"grad_norm": 1.140847086906433, |
|
"learning_rate": 3.6766003104132332e-06, |
|
"loss": 0.2543, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 2.1818181818181817, |
|
"grad_norm": 1.0831724405288696, |
|
"learning_rate": 3.6560671583635467e-06, |
|
"loss": 0.2599, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 2.1842751842751844, |
|
"grad_norm": 1.1893972158432007, |
|
"learning_rate": 3.6355786700102426e-06, |
|
"loss": 0.2868, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 2.1867321867321867, |
|
"grad_norm": 1.1741374731063843, |
|
"learning_rate": 3.6151349896000687e-06, |
|
"loss": 0.2847, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.189189189189189, |
|
"grad_norm": 1.1736268997192383, |
|
"learning_rate": 3.5947362610642854e-06, |
|
"loss": 0.2869, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 2.191646191646192, |
|
"grad_norm": 1.2521358728408813, |
|
"learning_rate": 3.5743826280176997e-06, |
|
"loss": 0.3096, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 2.194103194103194, |
|
"grad_norm": 1.271240472793579, |
|
"learning_rate": 3.5540742337576083e-06, |
|
"loss": 0.3035, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 2.1965601965601964, |
|
"grad_norm": 1.2453268766403198, |
|
"learning_rate": 3.533811221262833e-06, |
|
"loss": 0.278, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 2.199017199017199, |
|
"grad_norm": 1.7575854063034058, |
|
"learning_rate": 3.5135937331926595e-06, |
|
"loss": 0.3754, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 2.2014742014742015, |
|
"grad_norm": 1.1458030939102173, |
|
"learning_rate": 3.493421911885894e-06, |
|
"loss": 0.3078, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 2.203931203931204, |
|
"grad_norm": 1.1252243518829346, |
|
"learning_rate": 3.4732958993598153e-06, |
|
"loss": 0.2726, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 2.2063882063882065, |
|
"grad_norm": 1.1965776681900024, |
|
"learning_rate": 3.4532158373091916e-06, |
|
"loss": 0.2874, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 2.208845208845209, |
|
"grad_norm": 1.3262277841567993, |
|
"learning_rate": 3.433181867105291e-06, |
|
"loss": 0.302, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 2.211302211302211, |
|
"grad_norm": 1.0740711688995361, |
|
"learning_rate": 3.413194129794869e-06, |
|
"loss": 0.259, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.213759213759214, |
|
"grad_norm": 1.219891905784607, |
|
"learning_rate": 3.3932527660991877e-06, |
|
"loss": 0.3086, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 2.2162162162162162, |
|
"grad_norm": 1.1779321432113647, |
|
"learning_rate": 3.373357916413016e-06, |
|
"loss": 0.2801, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 2.2186732186732185, |
|
"grad_norm": 1.1177716255187988, |
|
"learning_rate": 3.3535097208036584e-06, |
|
"loss": 0.2943, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 2.2211302211302213, |
|
"grad_norm": 1.1561752557754517, |
|
"learning_rate": 3.333708319009945e-06, |
|
"loss": 0.2821, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 2.2235872235872236, |
|
"grad_norm": 1.2509113550186157, |
|
"learning_rate": 3.313953850441266e-06, |
|
"loss": 0.2696, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 2.226044226044226, |
|
"grad_norm": 1.2087299823760986, |
|
"learning_rate": 3.2942464541765775e-06, |
|
"loss": 0.2836, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 2.2285012285012287, |
|
"grad_norm": 1.1378703117370605, |
|
"learning_rate": 3.2745862689634433e-06, |
|
"loss": 0.2844, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 2.230958230958231, |
|
"grad_norm": 1.3601089715957642, |
|
"learning_rate": 3.254973433217021e-06, |
|
"loss": 0.2834, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 2.2334152334152333, |
|
"grad_norm": 1.133008360862732, |
|
"learning_rate": 3.2354080850191328e-06, |
|
"loss": 0.2851, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 2.235872235872236, |
|
"grad_norm": 1.2364482879638672, |
|
"learning_rate": 3.2158903621172556e-06, |
|
"loss": 0.2857, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.2383292383292384, |
|
"grad_norm": 1.2216523885726929, |
|
"learning_rate": 3.196420401923567e-06, |
|
"loss": 0.2758, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 2.2407862407862407, |
|
"grad_norm": 1.1590107679367065, |
|
"learning_rate": 3.1769983415139894e-06, |
|
"loss": 0.2787, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 2.2432432432432434, |
|
"grad_norm": 1.1557618379592896, |
|
"learning_rate": 3.157624317627195e-06, |
|
"loss": 0.3007, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 2.2457002457002457, |
|
"grad_norm": 1.2400990724563599, |
|
"learning_rate": 3.1382984666636806e-06, |
|
"loss": 0.2936, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 2.248157248157248, |
|
"grad_norm": 1.131558895111084, |
|
"learning_rate": 3.1190209246847624e-06, |
|
"loss": 0.2745, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 2.250614250614251, |
|
"grad_norm": 1.1964099407196045, |
|
"learning_rate": 3.099791827411668e-06, |
|
"loss": 0.2921, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 2.253071253071253, |
|
"grad_norm": 1.1541016101837158, |
|
"learning_rate": 3.0806113102245395e-06, |
|
"loss": 0.2726, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 2.2555282555282554, |
|
"grad_norm": 1.1907479763031006, |
|
"learning_rate": 3.061479508161502e-06, |
|
"loss": 0.2958, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 2.257985257985258, |
|
"grad_norm": 1.21101713180542, |
|
"learning_rate": 3.042396555917707e-06, |
|
"loss": 0.2923, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 2.2604422604422605, |
|
"grad_norm": 1.1928739547729492, |
|
"learning_rate": 3.023362587844393e-06, |
|
"loss": 0.2854, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.262899262899263, |
|
"grad_norm": 1.1127504110336304, |
|
"learning_rate": 3.00437773794791e-06, |
|
"loss": 0.2775, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 2.2653562653562656, |
|
"grad_norm": 1.1492047309875488, |
|
"learning_rate": 2.9854421398888212e-06, |
|
"loss": 0.2756, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 2.267813267813268, |
|
"grad_norm": 1.120092511177063, |
|
"learning_rate": 2.966555926980922e-06, |
|
"loss": 0.2773, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 2.27027027027027, |
|
"grad_norm": 1.2535494565963745, |
|
"learning_rate": 2.947719232190329e-06, |
|
"loss": 0.2991, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 2.2727272727272725, |
|
"grad_norm": 1.1032397747039795, |
|
"learning_rate": 2.9289321881345257e-06, |
|
"loss": 0.2598, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 2.2751842751842752, |
|
"grad_norm": 1.060048222541809, |
|
"learning_rate": 2.9101949270814346e-06, |
|
"loss": 0.2686, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 2.2776412776412776, |
|
"grad_norm": 1.238160490989685, |
|
"learning_rate": 2.8915075809484903e-06, |
|
"loss": 0.289, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 2.2800982800982803, |
|
"grad_norm": 1.0575051307678223, |
|
"learning_rate": 2.872870281301704e-06, |
|
"loss": 0.2595, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 2.2825552825552826, |
|
"grad_norm": 1.2428430318832397, |
|
"learning_rate": 2.8542831593547483e-06, |
|
"loss": 0.278, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 2.285012285012285, |
|
"grad_norm": 1.2655268907546997, |
|
"learning_rate": 2.8357463459680122e-06, |
|
"loss": 0.2877, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.2874692874692872, |
|
"grad_norm": 1.1474885940551758, |
|
"learning_rate": 2.8172599716477145e-06, |
|
"loss": 0.2683, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 2.28992628992629, |
|
"grad_norm": 1.2706103324890137, |
|
"learning_rate": 2.7988241665449357e-06, |
|
"loss": 0.2611, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 2.2923832923832923, |
|
"grad_norm": 1.1547375917434692, |
|
"learning_rate": 2.780439060454756e-06, |
|
"loss": 0.2957, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 2.294840294840295, |
|
"grad_norm": 1.1076372861862183, |
|
"learning_rate": 2.7621047828153e-06, |
|
"loss": 0.2787, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 2.2972972972972974, |
|
"grad_norm": 1.2577446699142456, |
|
"learning_rate": 2.7438214627068448e-06, |
|
"loss": 0.2823, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 2.2997542997542997, |
|
"grad_norm": 1.2012608051300049, |
|
"learning_rate": 2.7255892288509044e-06, |
|
"loss": 0.2818, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 2.302211302211302, |
|
"grad_norm": 1.1386785507202148, |
|
"learning_rate": 2.707408209609339e-06, |
|
"loss": 0.2757, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 2.3046683046683047, |
|
"grad_norm": 1.18578040599823, |
|
"learning_rate": 2.6892785329834157e-06, |
|
"loss": 0.3073, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 2.307125307125307, |
|
"grad_norm": 1.1896729469299316, |
|
"learning_rate": 2.6712003266129525e-06, |
|
"loss": 0.2911, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 2.30958230958231, |
|
"grad_norm": 1.308111548423767, |
|
"learning_rate": 2.6531737177753804e-06, |
|
"loss": 0.2859, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.312039312039312, |
|
"grad_norm": 1.1311650276184082, |
|
"learning_rate": 2.6351988333848787e-06, |
|
"loss": 0.256, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 2.3144963144963144, |
|
"grad_norm": 1.0582762956619263, |
|
"learning_rate": 2.6172757999914553e-06, |
|
"loss": 0.2848, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 2.3169533169533167, |
|
"grad_norm": 1.1629694700241089, |
|
"learning_rate": 2.5994047437800708e-06, |
|
"loss": 0.2799, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 2.3194103194103195, |
|
"grad_norm": 1.1673933267593384, |
|
"learning_rate": 2.581585790569755e-06, |
|
"loss": 0.2754, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 2.321867321867322, |
|
"grad_norm": 1.0987865924835205, |
|
"learning_rate": 2.5638190658126937e-06, |
|
"loss": 0.2858, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 2.3243243243243246, |
|
"grad_norm": 1.1722525358200073, |
|
"learning_rate": 2.5461046945933855e-06, |
|
"loss": 0.2593, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 2.326781326781327, |
|
"grad_norm": 1.1092591285705566, |
|
"learning_rate": 2.5284428016277284e-06, |
|
"loss": 0.2822, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 2.329238329238329, |
|
"grad_norm": 1.265002965927124, |
|
"learning_rate": 2.510833511262156e-06, |
|
"loss": 0.2801, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 2.3316953316953315, |
|
"grad_norm": 1.2009599208831787, |
|
"learning_rate": 2.493276947472756e-06, |
|
"loss": 0.3001, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 2.3341523341523343, |
|
"grad_norm": 1.1949445009231567, |
|
"learning_rate": 2.4757732338644127e-06, |
|
"loss": 0.2641, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.3366093366093366, |
|
"grad_norm": 1.1856027841567993, |
|
"learning_rate": 2.458322493669911e-06, |
|
"loss": 0.2698, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 2.339066339066339, |
|
"grad_norm": 1.2151247262954712, |
|
"learning_rate": 2.4409248497490923e-06, |
|
"loss": 0.3014, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 2.3415233415233416, |
|
"grad_norm": 1.188072919845581, |
|
"learning_rate": 2.4235804245879723e-06, |
|
"loss": 0.2605, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 2.343980343980344, |
|
"grad_norm": 1.2835193872451782, |
|
"learning_rate": 2.406289340297896e-06, |
|
"loss": 0.2931, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 2.3464373464373462, |
|
"grad_norm": 1.1126747131347656, |
|
"learning_rate": 2.3890517186146623e-06, |
|
"loss": 0.2556, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 2.348894348894349, |
|
"grad_norm": 1.2112548351287842, |
|
"learning_rate": 2.3718676808976683e-06, |
|
"loss": 0.2794, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 2.3513513513513513, |
|
"grad_norm": 1.2068251371383667, |
|
"learning_rate": 2.354737348129077e-06, |
|
"loss": 0.3035, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 2.3538083538083536, |
|
"grad_norm": 1.171133041381836, |
|
"learning_rate": 2.337660840912923e-06, |
|
"loss": 0.2557, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 2.3562653562653564, |
|
"grad_norm": 1.235913634300232, |
|
"learning_rate": 2.320638279474312e-06, |
|
"loss": 0.3008, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 2.3587223587223587, |
|
"grad_norm": 1.0996620655059814, |
|
"learning_rate": 2.3036697836585353e-06, |
|
"loss": 0.2694, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.361179361179361, |
|
"grad_norm": 1.0999877452850342, |
|
"learning_rate": 2.2867554729302545e-06, |
|
"loss": 0.2786, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 2.3636363636363638, |
|
"grad_norm": 1.1389085054397583, |
|
"learning_rate": 2.26989546637263e-06, |
|
"loss": 0.2814, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 2.366093366093366, |
|
"grad_norm": 1.0945831537246704, |
|
"learning_rate": 2.25308988268652e-06, |
|
"loss": 0.2832, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 2.3685503685503684, |
|
"grad_norm": 1.1268113851547241, |
|
"learning_rate": 2.2363388401896125e-06, |
|
"loss": 0.2806, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 2.371007371007371, |
|
"grad_norm": 1.2085450887680054, |
|
"learning_rate": 2.2196424568156073e-06, |
|
"loss": 0.2877, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 2.3734643734643734, |
|
"grad_norm": 1.0432332754135132, |
|
"learning_rate": 2.2030008501133815e-06, |
|
"loss": 0.2772, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 2.3759213759213758, |
|
"grad_norm": 0.9682121872901917, |
|
"learning_rate": 2.186414137246172e-06, |
|
"loss": 0.2562, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 2.3783783783783785, |
|
"grad_norm": 1.0478007793426514, |
|
"learning_rate": 2.1698824349907344e-06, |
|
"loss": 0.2646, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 2.380835380835381, |
|
"grad_norm": 1.244933843612671, |
|
"learning_rate": 2.1534058597365284e-06, |
|
"loss": 0.2826, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 2.383292383292383, |
|
"grad_norm": 1.1740527153015137, |
|
"learning_rate": 2.136984527484901e-06, |
|
"loss": 0.2891, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.385749385749386, |
|
"grad_norm": 1.235903024673462, |
|
"learning_rate": 2.1206185538482704e-06, |
|
"loss": 0.319, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 2.388206388206388, |
|
"grad_norm": 1.3708535432815552, |
|
"learning_rate": 2.1043080540493055e-06, |
|
"loss": 0.342, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 2.3906633906633905, |
|
"grad_norm": 1.1570706367492676, |
|
"learning_rate": 2.0880531429201146e-06, |
|
"loss": 0.2617, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 2.3931203931203933, |
|
"grad_norm": 1.1274479627609253, |
|
"learning_rate": 2.0718539349014544e-06, |
|
"loss": 0.2889, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 2.3955773955773956, |
|
"grad_norm": 1.166656494140625, |
|
"learning_rate": 2.0557105440418902e-06, |
|
"loss": 0.2788, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 2.398034398034398, |
|
"grad_norm": 1.27115797996521, |
|
"learning_rate": 2.039623083997031e-06, |
|
"loss": 0.3151, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 2.4004914004914006, |
|
"grad_norm": 1.1158872842788696, |
|
"learning_rate": 2.0235916680287015e-06, |
|
"loss": 0.2556, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 2.402948402948403, |
|
"grad_norm": 1.1800823211669922, |
|
"learning_rate": 2.007616409004165e-06, |
|
"loss": 0.2862, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 2.4054054054054053, |
|
"grad_norm": 1.3932093381881714, |
|
"learning_rate": 1.991697419395301e-06, |
|
"loss": 0.3104, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 2.407862407862408, |
|
"grad_norm": 1.1910911798477173, |
|
"learning_rate": 1.97583481127785e-06, |
|
"loss": 0.2861, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.4103194103194103, |
|
"grad_norm": 1.057436227798462, |
|
"learning_rate": 1.960028696330596e-06, |
|
"loss": 0.2789, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 2.4127764127764126, |
|
"grad_norm": 1.1399625539779663, |
|
"learning_rate": 1.9442791858345887e-06, |
|
"loss": 0.2795, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 2.4152334152334154, |
|
"grad_norm": 1.1812423467636108, |
|
"learning_rate": 1.9285863906723612e-06, |
|
"loss": 0.2795, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 2.4176904176904177, |
|
"grad_norm": 1.2888036966323853, |
|
"learning_rate": 1.9129504213271565e-06, |
|
"loss": 0.2742, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 2.42014742014742, |
|
"grad_norm": 1.102538824081421, |
|
"learning_rate": 1.8973713878821343e-06, |
|
"loss": 0.3167, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 2.4226044226044228, |
|
"grad_norm": 1.266358733177185, |
|
"learning_rate": 1.881849400019602e-06, |
|
"loss": 0.2778, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 2.425061425061425, |
|
"grad_norm": 1.1434144973754883, |
|
"learning_rate": 1.8663845670202562e-06, |
|
"loss": 0.3077, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 2.4275184275184274, |
|
"grad_norm": 1.1956113576889038, |
|
"learning_rate": 1.8509769977623905e-06, |
|
"loss": 0.3028, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 2.42997542997543, |
|
"grad_norm": 1.1281932592391968, |
|
"learning_rate": 1.8356268007211442e-06, |
|
"loss": 0.2734, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 2.4324324324324325, |
|
"grad_norm": 1.0696520805358887, |
|
"learning_rate": 1.8203340839677307e-06, |
|
"loss": 0.266, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.4348894348894348, |
|
"grad_norm": 1.0929800271987915, |
|
"learning_rate": 1.8050989551686915e-06, |
|
"loss": 0.2641, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 2.4373464373464375, |
|
"grad_norm": 1.2299550771713257, |
|
"learning_rate": 1.7899215215851084e-06, |
|
"loss": 0.3026, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 2.43980343980344, |
|
"grad_norm": 1.2351603507995605, |
|
"learning_rate": 1.7748018900718856e-06, |
|
"loss": 0.2932, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 2.442260442260442, |
|
"grad_norm": 1.1006183624267578, |
|
"learning_rate": 1.7597401670769688e-06, |
|
"loss": 0.2719, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 2.444717444717445, |
|
"grad_norm": 1.1939946413040161, |
|
"learning_rate": 1.744736458640607e-06, |
|
"loss": 0.2791, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 2.447174447174447, |
|
"grad_norm": 1.0497217178344727, |
|
"learning_rate": 1.729790870394603e-06, |
|
"loss": 0.287, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 2.4496314496314495, |
|
"grad_norm": 1.224005937576294, |
|
"learning_rate": 1.7149035075615795e-06, |
|
"loss": 0.2938, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 2.4520884520884523, |
|
"grad_norm": 1.0267997980117798, |
|
"learning_rate": 1.700074474954221e-06, |
|
"loss": 0.2861, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 2.4545454545454546, |
|
"grad_norm": 1.2167069911956787, |
|
"learning_rate": 1.6853038769745466e-06, |
|
"loss": 0.274, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 2.457002457002457, |
|
"grad_norm": 1.2097947597503662, |
|
"learning_rate": 1.6705918176131807e-06, |
|
"loss": 0.2765, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.4594594594594597, |
|
"grad_norm": 1.2343907356262207, |
|
"learning_rate": 1.6559384004486057e-06, |
|
"loss": 0.3039, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 2.461916461916462, |
|
"grad_norm": 1.189889669418335, |
|
"learning_rate": 1.6413437286464419e-06, |
|
"loss": 0.2819, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 2.4643734643734643, |
|
"grad_norm": 1.2207257747650146, |
|
"learning_rate": 1.6268079049587205e-06, |
|
"loss": 0.2932, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 2.4668304668304666, |
|
"grad_norm": 1.2403969764709473, |
|
"learning_rate": 1.6123310317231644e-06, |
|
"loss": 0.2853, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 2.4692874692874693, |
|
"grad_norm": 1.2092585563659668, |
|
"learning_rate": 1.5979132108624572e-06, |
|
"loss": 0.3057, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 2.4717444717444716, |
|
"grad_norm": 1.2463258504867554, |
|
"learning_rate": 1.583554543883532e-06, |
|
"loss": 0.2919, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 2.4742014742014744, |
|
"grad_norm": 1.1727359294891357, |
|
"learning_rate": 1.5692551318768556e-06, |
|
"loss": 0.2936, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 2.4766584766584767, |
|
"grad_norm": 1.196444034576416, |
|
"learning_rate": 1.5550150755157267e-06, |
|
"loss": 0.2956, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 2.479115479115479, |
|
"grad_norm": 1.1921645402908325, |
|
"learning_rate": 1.5408344750555382e-06, |
|
"loss": 0.2894, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 2.4815724815724813, |
|
"grad_norm": 1.2030874490737915, |
|
"learning_rate": 1.5267134303331122e-06, |
|
"loss": 0.2697, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.484029484029484, |
|
"grad_norm": 1.0682005882263184, |
|
"learning_rate": 1.5126520407659618e-06, |
|
"loss": 0.2678, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 2.4864864864864864, |
|
"grad_norm": 1.3198879957199097, |
|
"learning_rate": 1.4986504053516105e-06, |
|
"loss": 0.2587, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 2.488943488943489, |
|
"grad_norm": 1.224260687828064, |
|
"learning_rate": 1.4847086226668871e-06, |
|
"loss": 0.2822, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 2.4914004914004915, |
|
"grad_norm": 1.1843713521957397, |
|
"learning_rate": 1.4708267908672401e-06, |
|
"loss": 0.2684, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 2.493857493857494, |
|
"grad_norm": 1.1694753170013428, |
|
"learning_rate": 1.4570050076860343e-06, |
|
"loss": 0.2821, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 2.496314496314496, |
|
"grad_norm": 1.159956693649292, |
|
"learning_rate": 1.4432433704338723e-06, |
|
"loss": 0.2618, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 2.498771498771499, |
|
"grad_norm": 1.1955686807632446, |
|
"learning_rate": 1.4295419759979079e-06, |
|
"loss": 0.3257, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 2.501228501228501, |
|
"grad_norm": 1.0967086553573608, |
|
"learning_rate": 1.4159009208411611e-06, |
|
"loss": 0.2534, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 2.503685503685504, |
|
"grad_norm": 1.1328158378601074, |
|
"learning_rate": 1.4023203010018393e-06, |
|
"loss": 0.2726, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 2.506142506142506, |
|
"grad_norm": 1.1212531328201294, |
|
"learning_rate": 1.3888002120926625e-06, |
|
"loss": 0.2834, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.5085995085995085, |
|
"grad_norm": 1.1289138793945312, |
|
"learning_rate": 1.3753407493001968e-06, |
|
"loss": 0.2623, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 2.511056511056511, |
|
"grad_norm": 1.1258163452148438, |
|
"learning_rate": 1.3619420073841638e-06, |
|
"loss": 0.2946, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 2.5135135135135136, |
|
"grad_norm": 1.1019937992095947, |
|
"learning_rate": 1.3486040806767997e-06, |
|
"loss": 0.2761, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 2.515970515970516, |
|
"grad_norm": 1.1924903392791748, |
|
"learning_rate": 1.3353270630821713e-06, |
|
"loss": 0.2674, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 2.5184275184275187, |
|
"grad_norm": 1.318726658821106, |
|
"learning_rate": 1.3221110480755306e-06, |
|
"loss": 0.2704, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 2.520884520884521, |
|
"grad_norm": 1.2188078165054321, |
|
"learning_rate": 1.308956128702632e-06, |
|
"loss": 0.2968, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 2.5233415233415233, |
|
"grad_norm": 1.2859346866607666, |
|
"learning_rate": 1.2958623975791118e-06, |
|
"loss": 0.2819, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 2.5257985257985256, |
|
"grad_norm": 1.233586311340332, |
|
"learning_rate": 1.2828299468898075e-06, |
|
"loss": 0.2779, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 2.5282555282555284, |
|
"grad_norm": 1.1495767831802368, |
|
"learning_rate": 1.2698588683881185e-06, |
|
"loss": 0.2845, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 2.5307125307125307, |
|
"grad_norm": 1.0828988552093506, |
|
"learning_rate": 1.2569492533953664e-06, |
|
"loss": 0.2778, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.5331695331695334, |
|
"grad_norm": 1.1832849979400635, |
|
"learning_rate": 1.2441011928001435e-06, |
|
"loss": 0.289, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 2.5356265356265357, |
|
"grad_norm": 1.2485005855560303, |
|
"learning_rate": 1.231314777057675e-06, |
|
"loss": 0.301, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 2.538083538083538, |
|
"grad_norm": 1.331150770187378, |
|
"learning_rate": 1.2185900961891794e-06, |
|
"loss": 0.2788, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 2.5405405405405403, |
|
"grad_norm": 1.1798036098480225, |
|
"learning_rate": 1.2059272397812494e-06, |
|
"loss": 0.2734, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 2.542997542997543, |
|
"grad_norm": 1.0681538581848145, |
|
"learning_rate": 1.1933262969851988e-06, |
|
"loss": 0.2526, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 2.5454545454545454, |
|
"grad_norm": 1.2806833982467651, |
|
"learning_rate": 1.1807873565164507e-06, |
|
"loss": 0.2725, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 2.547911547911548, |
|
"grad_norm": 1.1350480318069458, |
|
"learning_rate": 1.1683105066539068e-06, |
|
"loss": 0.2657, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 2.5503685503685505, |
|
"grad_norm": 1.1276003122329712, |
|
"learning_rate": 1.1558958352393334e-06, |
|
"loss": 0.2595, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 2.552825552825553, |
|
"grad_norm": 1.1399433612823486, |
|
"learning_rate": 1.1435434296767235e-06, |
|
"loss": 0.2911, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 2.555282555282555, |
|
"grad_norm": 1.2321282625198364, |
|
"learning_rate": 1.1312533769317101e-06, |
|
"loss": 0.2738, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.557739557739558, |
|
"grad_norm": 1.122154712677002, |
|
"learning_rate": 1.1190257635309276e-06, |
|
"loss": 0.2789, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 2.56019656019656, |
|
"grad_norm": 1.193674921989441, |
|
"learning_rate": 1.1068606755614241e-06, |
|
"loss": 0.2772, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 2.562653562653563, |
|
"grad_norm": 1.2136112451553345, |
|
"learning_rate": 1.0947581986700307e-06, |
|
"loss": 0.2857, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 2.5651105651105652, |
|
"grad_norm": 1.168110966682434, |
|
"learning_rate": 1.0827184180627858e-06, |
|
"loss": 0.2967, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 2.5675675675675675, |
|
"grad_norm": 1.1920348405838013, |
|
"learning_rate": 1.0707414185043163e-06, |
|
"loss": 0.2717, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 2.57002457002457, |
|
"grad_norm": 1.105496883392334, |
|
"learning_rate": 1.0588272843172454e-06, |
|
"loss": 0.2772, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 2.5724815724815726, |
|
"grad_norm": 1.213555097579956, |
|
"learning_rate": 1.0469760993816058e-06, |
|
"loss": 0.2833, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 2.574938574938575, |
|
"grad_norm": 1.1333016157150269, |
|
"learning_rate": 1.0351879471342374e-06, |
|
"loss": 0.2969, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 2.5773955773955772, |
|
"grad_norm": 1.2033612728118896, |
|
"learning_rate": 1.0234629105682104e-06, |
|
"loss": 0.2883, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 2.57985257985258, |
|
"grad_norm": 1.0642030239105225, |
|
"learning_rate": 1.0118010722322313e-06, |
|
"loss": 0.2479, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.5823095823095823, |
|
"grad_norm": 1.2464839220046997, |
|
"learning_rate": 1.0002025142300764e-06, |
|
"loss": 0.255, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 2.5847665847665846, |
|
"grad_norm": 1.2047244310379028, |
|
"learning_rate": 9.886673182199958e-07, |
|
"loss": 0.2825, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 2.5872235872235874, |
|
"grad_norm": 1.172672152519226, |
|
"learning_rate": 9.771955654141496e-07, |
|
"loss": 0.2931, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 2.5896805896805897, |
|
"grad_norm": 1.1367522478103638, |
|
"learning_rate": 9.657873365780324e-07, |
|
"loss": 0.2581, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 2.592137592137592, |
|
"grad_norm": 1.2225159406661987, |
|
"learning_rate": 9.544427120299139e-07, |
|
"loss": 0.2879, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 2.5945945945945947, |
|
"grad_norm": 1.236801028251648, |
|
"learning_rate": 9.431617716402508e-07, |
|
"loss": 0.3092, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 2.597051597051597, |
|
"grad_norm": 1.2201226949691772, |
|
"learning_rate": 9.319445948311534e-07, |
|
"loss": 0.2772, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 2.5995085995085994, |
|
"grad_norm": 1.0961273908615112, |
|
"learning_rate": 9.207912605758052e-07, |
|
"loss": 0.237, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 2.601965601965602, |
|
"grad_norm": 1.198622465133667, |
|
"learning_rate": 9.097018473979124e-07, |
|
"loss": 0.2972, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 2.6044226044226044, |
|
"grad_norm": 1.1575977802276611, |
|
"learning_rate": 8.986764333711584e-07, |
|
"loss": 0.2669, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.6068796068796067, |
|
"grad_norm": 1.133432388305664, |
|
"learning_rate": 8.87715096118642e-07, |
|
"loss": 0.2863, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 2.6093366093366095, |
|
"grad_norm": 1.2548049688339233, |
|
"learning_rate": 8.768179128123456e-07, |
|
"loss": 0.2829, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 2.611793611793612, |
|
"grad_norm": 1.1735867261886597, |
|
"learning_rate": 8.659849601725701e-07, |
|
"loss": 0.3031, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 2.614250614250614, |
|
"grad_norm": 1.1587166786193848, |
|
"learning_rate": 8.55216314467422e-07, |
|
"loss": 0.2957, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 2.616707616707617, |
|
"grad_norm": 1.1736050844192505, |
|
"learning_rate": 8.445120515122552e-07, |
|
"loss": 0.2913, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 2.619164619164619, |
|
"grad_norm": 1.1523027420043945, |
|
"learning_rate": 8.338722466691451e-07, |
|
"loss": 0.2941, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 2.6216216216216215, |
|
"grad_norm": 1.1677874326705933, |
|
"learning_rate": 8.232969748463571e-07, |
|
"loss": 0.2821, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 2.6240786240786242, |
|
"grad_norm": 1.081207513809204, |
|
"learning_rate": 8.127863104978262e-07, |
|
"loss": 0.292, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 2.6265356265356266, |
|
"grad_norm": 1.1837942600250244, |
|
"learning_rate": 8.023403276226127e-07, |
|
"loss": 0.2864, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 2.628992628992629, |
|
"grad_norm": 1.287129521369934, |
|
"learning_rate": 7.919590997644111e-07, |
|
"loss": 0.286, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.631449631449631, |
|
"grad_norm": 1.3001762628555298, |
|
"learning_rate": 7.816427000110016e-07, |
|
"loss": 0.2688, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 2.633906633906634, |
|
"grad_norm": 1.0917863845825195, |
|
"learning_rate": 7.713912009937607e-07, |
|
"loss": 0.2893, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 2.6363636363636362, |
|
"grad_norm": 1.1937156915664673, |
|
"learning_rate": 7.612046748871327e-07, |
|
"loss": 0.2526, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 2.638820638820639, |
|
"grad_norm": 1.1753548383712769, |
|
"learning_rate": 7.510831934081309e-07, |
|
"loss": 0.3071, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 2.6412776412776413, |
|
"grad_norm": 1.205013632774353, |
|
"learning_rate": 7.410268278158273e-07, |
|
"loss": 0.2828, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 2.6437346437346436, |
|
"grad_norm": 1.2608375549316406, |
|
"learning_rate": 7.310356489108539e-07, |
|
"loss": 0.3012, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 2.646191646191646, |
|
"grad_norm": 1.3116860389709473, |
|
"learning_rate": 7.211097270349065e-07, |
|
"loss": 0.3083, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 2.6486486486486487, |
|
"grad_norm": 1.1478344202041626, |
|
"learning_rate": 7.112491320702441e-07, |
|
"loss": 0.2746, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 2.651105651105651, |
|
"grad_norm": 1.1642433404922485, |
|
"learning_rate": 7.014539334392012e-07, |
|
"loss": 0.2602, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 2.6535626535626538, |
|
"grad_norm": 1.2262073755264282, |
|
"learning_rate": 6.917242001036916e-07, |
|
"loss": 0.292, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.656019656019656, |
|
"grad_norm": 1.0907559394836426, |
|
"learning_rate": 6.820600005647382e-07, |
|
"loss": 0.2751, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 2.6584766584766584, |
|
"grad_norm": 1.1675864458084106, |
|
"learning_rate": 6.724614028619736e-07, |
|
"loss": 0.3053, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 2.6609336609336607, |
|
"grad_norm": 1.0712223052978516, |
|
"learning_rate": 6.629284745731701e-07, |
|
"loss": 0.2692, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 2.6633906633906634, |
|
"grad_norm": 1.0594029426574707, |
|
"learning_rate": 6.53461282813762e-07, |
|
"loss": 0.2844, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 2.6658476658476657, |
|
"grad_norm": 1.1743744611740112, |
|
"learning_rate": 6.440598942363796e-07, |
|
"loss": 0.2781, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 2.6683046683046685, |
|
"grad_norm": 1.264631748199463, |
|
"learning_rate": 6.347243750303623e-07, |
|
"loss": 0.2892, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 2.670761670761671, |
|
"grad_norm": 1.218990683555603, |
|
"learning_rate": 6.254547909213149e-07, |
|
"loss": 0.2764, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 2.673218673218673, |
|
"grad_norm": 1.1565907001495361, |
|
"learning_rate": 6.162512071706272e-07, |
|
"loss": 0.3149, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 2.6756756756756754, |
|
"grad_norm": 1.0758461952209473, |
|
"learning_rate": 6.071136885750272e-07, |
|
"loss": 0.2777, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 2.678132678132678, |
|
"grad_norm": 1.3106664419174194, |
|
"learning_rate": 5.980422994661139e-07, |
|
"loss": 0.2998, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.6805896805896805, |
|
"grad_norm": 1.1924840211868286, |
|
"learning_rate": 5.890371037099107e-07, |
|
"loss": 0.2812, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 2.6830466830466833, |
|
"grad_norm": 1.2186545133590698, |
|
"learning_rate": 5.800981647064186e-07, |
|
"loss": 0.2615, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 2.6855036855036856, |
|
"grad_norm": 1.1541470289230347, |
|
"learning_rate": 5.71225545389158e-07, |
|
"loss": 0.2529, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 2.687960687960688, |
|
"grad_norm": 1.0920991897583008, |
|
"learning_rate": 5.624193082247431e-07, |
|
"loss": 0.26, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 2.69041769041769, |
|
"grad_norm": 1.1850959062576294, |
|
"learning_rate": 5.536795152124253e-07, |
|
"loss": 0.2716, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 2.692874692874693, |
|
"grad_norm": 1.1443251371383667, |
|
"learning_rate": 5.450062278836677e-07, |
|
"loss": 0.2753, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 2.6953316953316953, |
|
"grad_norm": 1.2090981006622314, |
|
"learning_rate": 5.363995073017047e-07, |
|
"loss": 0.285, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 2.697788697788698, |
|
"grad_norm": 1.13853120803833, |
|
"learning_rate": 5.278594140611204e-07, |
|
"loss": 0.2684, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 2.7002457002457003, |
|
"grad_norm": 1.1839234828948975, |
|
"learning_rate": 5.193860082874125e-07, |
|
"loss": 0.2889, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 2.7027027027027026, |
|
"grad_norm": 1.1700607538223267, |
|
"learning_rate": 5.109793496365767e-07, |
|
"loss": 0.293, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.705159705159705, |
|
"grad_norm": 1.210724115371704, |
|
"learning_rate": 5.026394972946813e-07, |
|
"loss": 0.242, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 2.7076167076167077, |
|
"grad_norm": 1.1651077270507812, |
|
"learning_rate": 4.943665099774553e-07, |
|
"loss": 0.253, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 2.71007371007371, |
|
"grad_norm": 1.3025398254394531, |
|
"learning_rate": 4.861604459298697e-07, |
|
"loss": 0.2538, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 2.7125307125307128, |
|
"grad_norm": 1.3783848285675049, |
|
"learning_rate": 4.780213629257324e-07, |
|
"loss": 0.298, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 2.714987714987715, |
|
"grad_norm": 1.200514793395996, |
|
"learning_rate": 4.6994931826728094e-07, |
|
"loss": 0.3035, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 2.7174447174447174, |
|
"grad_norm": 1.2227789163589478, |
|
"learning_rate": 4.6194436878477024e-07, |
|
"loss": 0.2535, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 2.7199017199017197, |
|
"grad_norm": 1.1504371166229248, |
|
"learning_rate": 4.5400657083608857e-07, |
|
"loss": 0.2884, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 2.7223587223587224, |
|
"grad_norm": 1.082088828086853, |
|
"learning_rate": 4.4613598030634585e-07, |
|
"loss": 0.2622, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 2.7248157248157248, |
|
"grad_norm": 1.182896375656128, |
|
"learning_rate": 4.3833265260749157e-07, |
|
"loss": 0.255, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 2.7272727272727275, |
|
"grad_norm": 1.1281861066818237, |
|
"learning_rate": 4.305966426779118e-07, |
|
"loss": 0.3047, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.72972972972973, |
|
"grad_norm": 1.2978154420852661, |
|
"learning_rate": 4.229280049820561e-07, |
|
"loss": 0.2818, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 2.732186732186732, |
|
"grad_norm": 1.1349396705627441, |
|
"learning_rate": 4.15326793510048e-07, |
|
"loss": 0.2761, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 2.7346437346437344, |
|
"grad_norm": 1.2176555395126343, |
|
"learning_rate": 4.077930617773007e-07, |
|
"loss": 0.2848, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 2.737100737100737, |
|
"grad_norm": 1.2432841062545776, |
|
"learning_rate": 4.0032686282414525e-07, |
|
"loss": 0.27, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 2.7395577395577395, |
|
"grad_norm": 1.237545132637024, |
|
"learning_rate": 3.929282492154607e-07, |
|
"loss": 0.3215, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 2.7420147420147423, |
|
"grad_norm": 1.1233947277069092, |
|
"learning_rate": 3.855972730402968e-07, |
|
"loss": 0.2894, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 2.7444717444717446, |
|
"grad_norm": 1.1773197650909424, |
|
"learning_rate": 3.783339859115065e-07, |
|
"loss": 0.2755, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 2.746928746928747, |
|
"grad_norm": 1.1950693130493164, |
|
"learning_rate": 3.711384389653916e-07, |
|
"loss": 0.2796, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 2.749385749385749, |
|
"grad_norm": 1.2141116857528687, |
|
"learning_rate": 3.6401068286133544e-07, |
|
"loss": 0.2647, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 2.751842751842752, |
|
"grad_norm": 1.0938400030136108, |
|
"learning_rate": 3.569507677814488e-07, |
|
"loss": 0.255, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.7542997542997543, |
|
"grad_norm": 1.1651341915130615, |
|
"learning_rate": 3.49958743430211e-07, |
|
"loss": 0.3147, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 2.756756756756757, |
|
"grad_norm": 1.1536781787872314, |
|
"learning_rate": 3.430346590341338e-07, |
|
"loss": 0.2439, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 2.7592137592137593, |
|
"grad_norm": 1.1681103706359863, |
|
"learning_rate": 3.361785633413961e-07, |
|
"loss": 0.302, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 2.7616707616707616, |
|
"grad_norm": 1.232194423675537, |
|
"learning_rate": 3.2939050462151957e-07, |
|
"loss": 0.2695, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 2.764127764127764, |
|
"grad_norm": 1.1450867652893066, |
|
"learning_rate": 3.226705306650113e-07, |
|
"loss": 0.2851, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 2.7665847665847667, |
|
"grad_norm": 1.3288469314575195, |
|
"learning_rate": 3.160186887830441e-07, |
|
"loss": 0.302, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 2.769041769041769, |
|
"grad_norm": 1.1425598859786987, |
|
"learning_rate": 3.0943502580710773e-07, |
|
"loss": 0.2622, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 2.7714987714987718, |
|
"grad_norm": 1.1829224824905396, |
|
"learning_rate": 3.029195880886904e-07, |
|
"loss": 0.2634, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 2.773955773955774, |
|
"grad_norm": 1.1650996208190918, |
|
"learning_rate": 2.9647242149895005e-07, |
|
"loss": 0.2707, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 2.7764127764127764, |
|
"grad_norm": 1.126253604888916, |
|
"learning_rate": 2.900935714283848e-07, |
|
"loss": 0.2745, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.7788697788697787, |
|
"grad_norm": 1.1476022005081177, |
|
"learning_rate": 2.837830827865229e-07, |
|
"loss": 0.2736, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 2.7813267813267815, |
|
"grad_norm": 1.1921050548553467, |
|
"learning_rate": 2.775410000016021e-07, |
|
"loss": 0.2842, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 2.7837837837837838, |
|
"grad_norm": 1.04782235622406, |
|
"learning_rate": 2.7136736702025436e-07, |
|
"loss": 0.2721, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 2.786240786240786, |
|
"grad_norm": 1.1524274349212646, |
|
"learning_rate": 2.652622273072003e-07, |
|
"loss": 0.2634, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 2.788697788697789, |
|
"grad_norm": 1.07859206199646, |
|
"learning_rate": 2.5922562384494197e-07, |
|
"loss": 0.2449, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 2.791154791154791, |
|
"grad_norm": 1.0668470859527588, |
|
"learning_rate": 2.532575991334618e-07, |
|
"loss": 0.2661, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 2.7936117936117935, |
|
"grad_norm": 1.3293726444244385, |
|
"learning_rate": 2.473581951899184e-07, |
|
"loss": 0.2754, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 2.796068796068796, |
|
"grad_norm": 1.3514832258224487, |
|
"learning_rate": 2.415274535483547e-07, |
|
"loss": 0.3009, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 2.7985257985257985, |
|
"grad_norm": 1.1948869228363037, |
|
"learning_rate": 2.3576541525941132e-07, |
|
"loss": 0.2922, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 2.800982800982801, |
|
"grad_norm": 1.2372150421142578, |
|
"learning_rate": 2.300721208900192e-07, |
|
"loss": 0.2998, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.8034398034398036, |
|
"grad_norm": 1.1227543354034424, |
|
"learning_rate": 2.2444761052313857e-07, |
|
"loss": 0.2711, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 2.805896805896806, |
|
"grad_norm": 1.199108600616455, |
|
"learning_rate": 2.1889192375745494e-07, |
|
"loss": 0.2866, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 2.808353808353808, |
|
"grad_norm": 1.3015297651290894, |
|
"learning_rate": 2.1340509970711466e-07, |
|
"loss": 0.2932, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 2.810810810810811, |
|
"grad_norm": 1.2696152925491333, |
|
"learning_rate": 2.0798717700144078e-07, |
|
"loss": 0.2987, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 2.8132678132678133, |
|
"grad_norm": 1.140032410621643, |
|
"learning_rate": 2.0263819378466888e-07, |
|
"loss": 0.2619, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 2.8157248157248156, |
|
"grad_norm": 1.0940791368484497, |
|
"learning_rate": 1.973581877156716e-07, |
|
"loss": 0.2952, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 2.8181818181818183, |
|
"grad_norm": 1.2078993320465088, |
|
"learning_rate": 1.921471959676957e-07, |
|
"loss": 0.2713, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 2.8206388206388207, |
|
"grad_norm": 1.1918680667877197, |
|
"learning_rate": 1.870052552281032e-07, |
|
"loss": 0.2639, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 2.823095823095823, |
|
"grad_norm": 1.1544560194015503, |
|
"learning_rate": 1.8193240169810943e-07, |
|
"loss": 0.2788, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 2.8255528255528253, |
|
"grad_norm": 1.2254925966262817, |
|
"learning_rate": 1.7692867109252888e-07, |
|
"loss": 0.2613, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.828009828009828, |
|
"grad_norm": 1.1546679735183716, |
|
"learning_rate": 1.719940986395252e-07, |
|
"loss": 0.3007, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 2.8304668304668303, |
|
"grad_norm": 1.0897902250289917, |
|
"learning_rate": 1.6712871908036387e-07, |
|
"loss": 0.2689, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 2.832923832923833, |
|
"grad_norm": 1.3028407096862793, |
|
"learning_rate": 1.623325666691644e-07, |
|
"loss": 0.2945, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 2.8353808353808354, |
|
"grad_norm": 1.1586449146270752, |
|
"learning_rate": 1.5760567517266068e-07, |
|
"loss": 0.2778, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 2.8378378378378377, |
|
"grad_norm": 1.1948736906051636, |
|
"learning_rate": 1.5294807786996212e-07, |
|
"loss": 0.2989, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 2.84029484029484, |
|
"grad_norm": 1.1248109340667725, |
|
"learning_rate": 1.4835980755232626e-07, |
|
"loss": 0.2825, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 2.842751842751843, |
|
"grad_norm": 1.2164751291275024, |
|
"learning_rate": 1.4384089652291544e-07, |
|
"loss": 0.2964, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 2.845208845208845, |
|
"grad_norm": 1.1897505521774292, |
|
"learning_rate": 1.3939137659658153e-07, |
|
"loss": 0.282, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 2.847665847665848, |
|
"grad_norm": 1.2657814025878906, |
|
"learning_rate": 1.3501127909963275e-07, |
|
"loss": 0.3165, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 2.85012285012285, |
|
"grad_norm": 1.1859006881713867, |
|
"learning_rate": 1.3070063486961937e-07, |
|
"loss": 0.2956, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.8525798525798525, |
|
"grad_norm": 1.1239105463027954, |
|
"learning_rate": 1.2645947425511397e-07, |
|
"loss": 0.2981, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 2.855036855036855, |
|
"grad_norm": 1.2066782712936401, |
|
"learning_rate": 1.2228782711549924e-07, |
|
"loss": 0.2862, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 2.8574938574938575, |
|
"grad_norm": 1.241547703742981, |
|
"learning_rate": 1.1818572282075392e-07, |
|
"loss": 0.2795, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 2.85995085995086, |
|
"grad_norm": 1.1320499181747437, |
|
"learning_rate": 1.1415319025124938e-07, |
|
"loss": 0.2577, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 2.8624078624078626, |
|
"grad_norm": 1.3028688430786133, |
|
"learning_rate": 1.1019025779754666e-07, |
|
"loss": 0.301, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 2.864864864864865, |
|
"grad_norm": 1.0557419061660767, |
|
"learning_rate": 1.0629695336019763e-07, |
|
"loss": 0.2667, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 2.8673218673218672, |
|
"grad_norm": 1.2495602369308472, |
|
"learning_rate": 1.0247330434954073e-07, |
|
"loss": 0.2856, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 2.8697788697788695, |
|
"grad_norm": 1.1358976364135742, |
|
"learning_rate": 9.87193376855189e-08, |
|
"loss": 0.2883, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 2.8722358722358723, |
|
"grad_norm": 1.2297807931900024, |
|
"learning_rate": 9.503507979748305e-08, |
|
"loss": 0.318, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 2.8746928746928746, |
|
"grad_norm": 1.1552633047103882, |
|
"learning_rate": 9.142055662400672e-08, |
|
"loss": 0.273, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.8771498771498774, |
|
"grad_norm": 1.249558925628662, |
|
"learning_rate": 8.787579361270616e-08, |
|
"loss": 0.2796, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 2.8796068796068797, |
|
"grad_norm": 1.194430947303772, |
|
"learning_rate": 8.440081572005931e-08, |
|
"loss": 0.2586, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 2.882063882063882, |
|
"grad_norm": 1.0738084316253662, |
|
"learning_rate": 8.099564741123167e-08, |
|
"loss": 0.2687, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 2.8845208845208843, |
|
"grad_norm": 1.1545612812042236, |
|
"learning_rate": 7.766031265989849e-08, |
|
"loss": 0.2653, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 2.886977886977887, |
|
"grad_norm": 1.1456379890441895, |
|
"learning_rate": 7.439483494808498e-08, |
|
"loss": 0.2829, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 2.8894348894348894, |
|
"grad_norm": 1.156511902809143, |
|
"learning_rate": 7.119923726599421e-08, |
|
"loss": 0.285, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 2.891891891891892, |
|
"grad_norm": 1.2387841939926147, |
|
"learning_rate": 6.807354211184613e-08, |
|
"loss": 0.2733, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 2.8943488943488944, |
|
"grad_norm": 1.1624895334243774, |
|
"learning_rate": 6.501777149172328e-08, |
|
"loss": 0.2839, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 2.8968058968058967, |
|
"grad_norm": 1.15232253074646, |
|
"learning_rate": 6.203194691940973e-08, |
|
"loss": 0.2959, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 2.899262899262899, |
|
"grad_norm": 1.06272292137146, |
|
"learning_rate": 5.9116089416246803e-08, |
|
"loss": 0.2555, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.901719901719902, |
|
"grad_norm": 1.2629801034927368, |
|
"learning_rate": 5.6270219510975445e-08, |
|
"loss": 0.2869, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 2.904176904176904, |
|
"grad_norm": 1.2183952331542969, |
|
"learning_rate": 5.349435723960184e-08, |
|
"loss": 0.3002, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 2.906633906633907, |
|
"grad_norm": 1.1183772087097168, |
|
"learning_rate": 5.078852214525198e-08, |
|
"loss": 0.2884, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 2.909090909090909, |
|
"grad_norm": 1.1172285079956055, |
|
"learning_rate": 4.815273327803183e-08, |
|
"loss": 0.2807, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 2.9115479115479115, |
|
"grad_norm": 1.1453272104263306, |
|
"learning_rate": 4.5587009194894005e-08, |
|
"loss": 0.2751, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 2.914004914004914, |
|
"grad_norm": 1.0735313892364502, |
|
"learning_rate": 4.309136795951241e-08, |
|
"loss": 0.2466, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 2.9164619164619165, |
|
"grad_norm": 1.0989115238189697, |
|
"learning_rate": 4.066582714214895e-08, |
|
"loss": 0.2747, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 2.918918918918919, |
|
"grad_norm": 1.0996559858322144, |
|
"learning_rate": 3.831040381953144e-08, |
|
"loss": 0.2654, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 2.9213759213759216, |
|
"grad_norm": 1.115569829940796, |
|
"learning_rate": 3.602511457473479e-08, |
|
"loss": 0.2533, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 2.923832923832924, |
|
"grad_norm": 1.195621371269226, |
|
"learning_rate": 3.380997549706444e-08, |
|
"loss": 0.2631, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.9262899262899262, |
|
"grad_norm": 1.1971734762191772, |
|
"learning_rate": 3.1665002181937575e-08, |
|
"loss": 0.2753, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 2.9287469287469285, |
|
"grad_norm": 1.1896756887435913, |
|
"learning_rate": 2.9590209730784302e-08, |
|
"loss": 0.2725, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 2.9312039312039313, |
|
"grad_norm": 1.0904500484466553, |
|
"learning_rate": 2.758561275092886e-08, |
|
"loss": 0.2584, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 2.9336609336609336, |
|
"grad_norm": 1.1623117923736572, |
|
"learning_rate": 2.5651225355497466e-08, |
|
"loss": 0.2706, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 2.9361179361179364, |
|
"grad_norm": 1.1518123149871826, |
|
"learning_rate": 2.3787061163309533e-08, |
|
"loss": 0.2932, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 2.9385749385749387, |
|
"grad_norm": 1.278153419494629, |
|
"learning_rate": 2.1993133298791047e-08, |
|
"loss": 0.2776, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 2.941031941031941, |
|
"grad_norm": 1.205899715423584, |
|
"learning_rate": 2.0269454391874665e-08, |
|
"loss": 0.278, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 2.9434889434889433, |
|
"grad_norm": 1.2302438020706177, |
|
"learning_rate": 1.861603657791422e-08, |
|
"loss": 0.2969, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 2.945945945945946, |
|
"grad_norm": 1.1887091398239136, |
|
"learning_rate": 1.7032891497600347e-08, |
|
"loss": 0.2893, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 2.9484029484029484, |
|
"grad_norm": 1.1503926515579224, |
|
"learning_rate": 1.5520030296873877e-08, |
|
"loss": 0.2682, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.950859950859951, |
|
"grad_norm": 1.2009198665618896, |
|
"learning_rate": 1.4077463626852584e-08, |
|
"loss": 0.276, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 2.9533169533169534, |
|
"grad_norm": 1.10666024684906, |
|
"learning_rate": 1.270520164375344e-08, |
|
"loss": 0.2661, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 2.9557739557739557, |
|
"grad_norm": 1.2643858194351196, |
|
"learning_rate": 1.1403254008822695e-08, |
|
"loss": 0.2768, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 2.958230958230958, |
|
"grad_norm": 1.1746954917907715, |
|
"learning_rate": 1.0171629888265921e-08, |
|
"loss": 0.2755, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 2.960687960687961, |
|
"grad_norm": 1.2816386222839355, |
|
"learning_rate": 9.010337953185843e-09, |
|
"loss": 0.3065, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 2.963144963144963, |
|
"grad_norm": 1.1486769914627075, |
|
"learning_rate": 7.919386379515726e-09, |
|
"loss": 0.2906, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 2.965601965601966, |
|
"grad_norm": 1.0958787202835083, |
|
"learning_rate": 6.8987828479716304e-09, |
|
"loss": 0.2905, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 2.968058968058968, |
|
"grad_norm": 1.1250931024551392, |
|
"learning_rate": 5.948534543988027e-09, |
|
"loss": 0.2816, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 2.9705159705159705, |
|
"grad_norm": 1.084580659866333, |
|
"learning_rate": 5.068648157675604e-09, |
|
"loss": 0.2873, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 2.972972972972973, |
|
"grad_norm": 1.1390430927276611, |
|
"learning_rate": 4.259129883767976e-09, |
|
"loss": 0.2621, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.9754299754299756, |
|
"grad_norm": 1.2942683696746826, |
|
"learning_rate": 3.5199854215817176e-09, |
|
"loss": 0.2856, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 2.977886977886978, |
|
"grad_norm": 1.0806019306182861, |
|
"learning_rate": 2.851219974973063e-09, |
|
"loss": 0.3059, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 2.98034398034398, |
|
"grad_norm": 1.1289433240890503, |
|
"learning_rate": 2.2528382523057115e-09, |
|
"loss": 0.279, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 2.982800982800983, |
|
"grad_norm": 1.2074559926986694, |
|
"learning_rate": 1.7248444664141882e-09, |
|
"loss": 0.2985, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 2.9852579852579852, |
|
"grad_norm": 1.0647261142730713, |
|
"learning_rate": 1.2672423345760908e-09, |
|
"loss": 0.258, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 2.9877149877149876, |
|
"grad_norm": 1.2075697183609009, |
|
"learning_rate": 8.80035078482111e-10, |
|
"loss": 0.2782, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 2.9901719901719903, |
|
"grad_norm": 1.1851577758789062, |
|
"learning_rate": 5.632254242204926e-10, |
|
"loss": 0.2759, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 2.9926289926289926, |
|
"grad_norm": 1.156044840812683, |
|
"learning_rate": 3.1681560225038657e-10, |
|
"loss": 0.2875, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 2.995085995085995, |
|
"grad_norm": 1.169237494468689, |
|
"learning_rate": 1.4080734739074787e-10, |
|
"loss": 0.2794, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 2.9975429975429977, |
|
"grad_norm": 1.1338391304016113, |
|
"learning_rate": 3.52018988059033e-11, |
|
"loss": 0.286, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.1234797239303589, |
|
"learning_rate": 0.0, |
|
"loss": 0.2237, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1221, |
|
"total_flos": 3.3710113113023447e+18, |
|
"train_loss": 0.6036928490011737, |
|
"train_runtime": 9045.1389, |
|
"train_samples_per_second": 17.247, |
|
"train_steps_per_second": 0.135 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1221, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 0, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.3710113113023447e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|