|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 315, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.015873015873015872, |
|
"grad_norm": 3.3795859813690186, |
|
"learning_rate": 6.25e-07, |
|
"loss": 0.4037, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.031746031746031744, |
|
"grad_norm": 4.176924228668213, |
|
"learning_rate": 1.25e-06, |
|
"loss": 0.6101, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.047619047619047616, |
|
"grad_norm": 3.2537522315979004, |
|
"learning_rate": 1.8750000000000003e-06, |
|
"loss": 0.5335, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.06349206349206349, |
|
"grad_norm": 3.8701913356781006, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.5156, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.07936507936507936, |
|
"grad_norm": 3.2381300926208496, |
|
"learning_rate": 3.125e-06, |
|
"loss": 0.5435, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.09523809523809523, |
|
"grad_norm": 2.6569769382476807, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 0.4349, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.1111111111111111, |
|
"grad_norm": 2.641036033630371, |
|
"learning_rate": 4.3750000000000005e-06, |
|
"loss": 0.4657, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.12698412698412698, |
|
"grad_norm": 2.500192880630493, |
|
"learning_rate": 5e-06, |
|
"loss": 0.387, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.14285714285714285, |
|
"grad_norm": 2.925560474395752, |
|
"learning_rate": 5.625e-06, |
|
"loss": 0.5708, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.15873015873015872, |
|
"grad_norm": 1.740651249885559, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.4166, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1746031746031746, |
|
"grad_norm": 1.8194835186004639, |
|
"learning_rate": 6.875e-06, |
|
"loss": 0.4392, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.19047619047619047, |
|
"grad_norm": 1.2873449325561523, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.3224, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.20634920634920634, |
|
"grad_norm": 1.4529649019241333, |
|
"learning_rate": 8.125000000000001e-06, |
|
"loss": 0.3407, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.2222222222222222, |
|
"grad_norm": 1.3093940019607544, |
|
"learning_rate": 8.750000000000001e-06, |
|
"loss": 0.3677, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.23809523809523808, |
|
"grad_norm": 1.4765881299972534, |
|
"learning_rate": 9.375000000000001e-06, |
|
"loss": 0.3792, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.25396825396825395, |
|
"grad_norm": 5.271663665771484, |
|
"learning_rate": 1e-05, |
|
"loss": 0.6346, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.2698412698412698, |
|
"grad_norm": 1.3494993448257446, |
|
"learning_rate": 9.999724009977419e-06, |
|
"loss": 0.4433, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.2857142857142857, |
|
"grad_norm": 1.115939736366272, |
|
"learning_rate": 9.998896070377873e-06, |
|
"loss": 0.2864, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.30158730158730157, |
|
"grad_norm": 1.5331807136535645, |
|
"learning_rate": 9.99751627260259e-06, |
|
"loss": 0.3423, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.31746031746031744, |
|
"grad_norm": 1.5603549480438232, |
|
"learning_rate": 9.995584768975735e-06, |
|
"loss": 0.3799, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 1.4752039909362793, |
|
"learning_rate": 9.993101772727602e-06, |
|
"loss": 0.3565, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.3492063492063492, |
|
"grad_norm": 9.61251449584961, |
|
"learning_rate": 9.990067557971068e-06, |
|
"loss": 0.7244, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.36507936507936506, |
|
"grad_norm": 1.3895500898361206, |
|
"learning_rate": 9.986482459671332e-06, |
|
"loss": 0.3444, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.38095238095238093, |
|
"grad_norm": 1.231410026550293, |
|
"learning_rate": 9.982346873608936e-06, |
|
"loss": 0.3221, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.3968253968253968, |
|
"grad_norm": 1.9909180402755737, |
|
"learning_rate": 9.977661256336081e-06, |
|
"loss": 0.3724, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.4126984126984127, |
|
"grad_norm": 1.2321062088012695, |
|
"learning_rate": 9.972426125126208e-06, |
|
"loss": 0.3391, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.42857142857142855, |
|
"grad_norm": 5.4099321365356445, |
|
"learning_rate": 9.966642057916915e-06, |
|
"loss": 0.5806, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.4444444444444444, |
|
"grad_norm": 1.0613089799880981, |
|
"learning_rate": 9.960309693246135e-06, |
|
"loss": 0.2237, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.4603174603174603, |
|
"grad_norm": 1.1249053478240967, |
|
"learning_rate": 9.953429730181653e-06, |
|
"loss": 0.3618, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.47619047619047616, |
|
"grad_norm": 1.0634913444519043, |
|
"learning_rate": 9.94600292824394e-06, |
|
"loss": 0.3177, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.49206349206349204, |
|
"grad_norm": 1.1851016283035278, |
|
"learning_rate": 9.938030107322284e-06, |
|
"loss": 0.2799, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.5079365079365079, |
|
"grad_norm": 1.2439275979995728, |
|
"learning_rate": 9.929512147584297e-06, |
|
"loss": 0.3415, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.5238095238095238, |
|
"grad_norm": 1.1060391664505005, |
|
"learning_rate": 9.920449989378741e-06, |
|
"loss": 0.2888, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.5396825396825397, |
|
"grad_norm": 1.4267876148223877, |
|
"learning_rate": 9.910844633131712e-06, |
|
"loss": 0.3724, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 1.1967415809631348, |
|
"learning_rate": 9.90069713923621e-06, |
|
"loss": 0.3362, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 0.9548850059509277, |
|
"learning_rate": 9.890008627935057e-06, |
|
"loss": 0.2776, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.5873015873015873, |
|
"grad_norm": 1.1262502670288086, |
|
"learning_rate": 9.878780279197246e-06, |
|
"loss": 0.3215, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.6031746031746031, |
|
"grad_norm": 0.9589006304740906, |
|
"learning_rate": 9.867013332587667e-06, |
|
"loss": 0.2871, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.6190476190476191, |
|
"grad_norm": 1.2175616025924683, |
|
"learning_rate": 9.854709087130261e-06, |
|
"loss": 0.2672, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.6349206349206349, |
|
"grad_norm": 1.1927433013916016, |
|
"learning_rate": 9.841868901164621e-06, |
|
"loss": 0.3167, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6507936507936508, |
|
"grad_norm": 1.1496555805206299, |
|
"learning_rate": 9.828494192196037e-06, |
|
"loss": 0.331, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 2.9185266494750977, |
|
"learning_rate": 9.814586436738998e-06, |
|
"loss": 0.4792, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.6825396825396826, |
|
"grad_norm": 4.013699054718018, |
|
"learning_rate": 9.8001471701542e-06, |
|
"loss": 0.3316, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.6984126984126984, |
|
"grad_norm": 1.1543551683425903, |
|
"learning_rate": 9.785177986479049e-06, |
|
"loss": 0.2954, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 0.9748827219009399, |
|
"learning_rate": 9.76968053825168e-06, |
|
"loss": 0.2267, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.7301587301587301, |
|
"grad_norm": 1.0915194749832153, |
|
"learning_rate": 9.753656536328529e-06, |
|
"loss": 0.3005, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.746031746031746, |
|
"grad_norm": 2.678406238555908, |
|
"learning_rate": 9.737107749695456e-06, |
|
"loss": 0.2828, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.7619047619047619, |
|
"grad_norm": 0.9532033801078796, |
|
"learning_rate": 9.72003600527246e-06, |
|
"loss": 0.2658, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.7777777777777778, |
|
"grad_norm": 1.1294094324111938, |
|
"learning_rate": 9.702443187711991e-06, |
|
"loss": 0.2978, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.7936507936507936, |
|
"grad_norm": 1.067834734916687, |
|
"learning_rate": 9.6843312391909e-06, |
|
"loss": 0.2941, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.8095238095238095, |
|
"grad_norm": 1.0761404037475586, |
|
"learning_rate": 9.665702159196014e-06, |
|
"loss": 0.3056, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.8253968253968254, |
|
"grad_norm": 4.403201103210449, |
|
"learning_rate": 9.646558004303419e-06, |
|
"loss": 0.3728, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.8412698412698413, |
|
"grad_norm": 1.0028859376907349, |
|
"learning_rate": 9.62690088795141e-06, |
|
"loss": 0.2765, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"grad_norm": 1.0655666589736938, |
|
"learning_rate": 9.606732980207186e-06, |
|
"loss": 0.2912, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.873015873015873, |
|
"grad_norm": 8.566058158874512, |
|
"learning_rate": 9.586056507527266e-06, |
|
"loss": 0.5695, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 1.487505555152893, |
|
"learning_rate": 9.564873752511719e-06, |
|
"loss": 0.2906, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.9047619047619048, |
|
"grad_norm": 2.458684206008911, |
|
"learning_rate": 9.543187053652156e-06, |
|
"loss": 0.4637, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.9206349206349206, |
|
"grad_norm": 1.1918795108795166, |
|
"learning_rate": 9.520998805073583e-06, |
|
"loss": 0.347, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.9365079365079365, |
|
"grad_norm": 1.9880917072296143, |
|
"learning_rate": 9.498311456270091e-06, |
|
"loss": 0.3115, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 1.1867977380752563, |
|
"learning_rate": 9.475127511834438e-06, |
|
"loss": 0.3172, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.9682539682539683, |
|
"grad_norm": 1.2388020753860474, |
|
"learning_rate": 9.451449531181571e-06, |
|
"loss": 0.252, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.9841269841269841, |
|
"grad_norm": 9.597868919372559, |
|
"learning_rate": 9.427280128266049e-06, |
|
"loss": 0.2668, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.9280756711959839, |
|
"learning_rate": 9.4026219712935e-06, |
|
"loss": 0.2107, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.0158730158730158, |
|
"grad_norm": 0.9878024458885193, |
|
"learning_rate": 9.377477782426041e-06, |
|
"loss": 0.2008, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.0317460317460316, |
|
"grad_norm": 2.081068992614746, |
|
"learning_rate": 9.351850337481774e-06, |
|
"loss": 0.1998, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.0476190476190477, |
|
"grad_norm": 0.9242327213287354, |
|
"learning_rate": 9.325742465628342e-06, |
|
"loss": 0.1791, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.0634920634920635, |
|
"grad_norm": 2.085343599319458, |
|
"learning_rate": 9.299157049070604e-06, |
|
"loss": 0.1787, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.0793650793650793, |
|
"grad_norm": 0.815376877784729, |
|
"learning_rate": 9.272097022732444e-06, |
|
"loss": 0.1678, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.0952380952380953, |
|
"grad_norm": 0.8281353116035461, |
|
"learning_rate": 9.244565373932775e-06, |
|
"loss": 0.1646, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.1111111111111112, |
|
"grad_norm": 0.828342616558075, |
|
"learning_rate": 9.216565142055745e-06, |
|
"loss": 0.155, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.126984126984127, |
|
"grad_norm": 0.77997225522995, |
|
"learning_rate": 9.188099418215208e-06, |
|
"loss": 0.1176, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 1.1428571428571428, |
|
"grad_norm": 0.9490588903427124, |
|
"learning_rate": 9.159171344913469e-06, |
|
"loss": 0.1316, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.1587301587301586, |
|
"grad_norm": 1.061501145362854, |
|
"learning_rate": 9.129784115694368e-06, |
|
"loss": 0.1759, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 1.1746031746031746, |
|
"grad_norm": 1.077555537223816, |
|
"learning_rate": 9.09994097479073e-06, |
|
"loss": 0.1607, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.1904761904761905, |
|
"grad_norm": 0.8213773965835571, |
|
"learning_rate": 9.069645216766207e-06, |
|
"loss": 0.1123, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.2063492063492063, |
|
"grad_norm": 0.8540403246879578, |
|
"learning_rate": 9.038900186151574e-06, |
|
"loss": 0.1306, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.2222222222222223, |
|
"grad_norm": 1.0110580921173096, |
|
"learning_rate": 9.007709277075512e-06, |
|
"loss": 0.1644, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 1.2380952380952381, |
|
"grad_norm": 1.3765249252319336, |
|
"learning_rate": 8.976075932889896e-06, |
|
"loss": 0.2027, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.253968253968254, |
|
"grad_norm": 1.0460267066955566, |
|
"learning_rate": 8.944003645789678e-06, |
|
"loss": 0.1635, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 1.2698412698412698, |
|
"grad_norm": 0.8925233483314514, |
|
"learning_rate": 8.911495956427358e-06, |
|
"loss": 0.1538, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.2857142857142856, |
|
"grad_norm": 1.1790034770965576, |
|
"learning_rate": 8.8785564535221e-06, |
|
"loss": 0.164, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.3015873015873016, |
|
"grad_norm": 1.1329326629638672, |
|
"learning_rate": 8.845188773463567e-06, |
|
"loss": 0.1449, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 1.3174603174603174, |
|
"grad_norm": 4.996532440185547, |
|
"learning_rate": 8.811396599910467e-06, |
|
"loss": 0.247, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 0.74964839220047, |
|
"learning_rate": 8.777183663383897e-06, |
|
"loss": 0.1259, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.3492063492063493, |
|
"grad_norm": 1.8605432510375977, |
|
"learning_rate": 8.742553740855507e-06, |
|
"loss": 0.1935, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.3650793650793651, |
|
"grad_norm": 0.9615695476531982, |
|
"learning_rate": 8.707510655330536e-06, |
|
"loss": 0.1772, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 1.380952380952381, |
|
"grad_norm": 1.0028129816055298, |
|
"learning_rate": 8.672058275425773e-06, |
|
"loss": 0.159, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.3968253968253967, |
|
"grad_norm": 0.780433177947998, |
|
"learning_rate": 8.636200514942466e-06, |
|
"loss": 0.1374, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.4126984126984126, |
|
"grad_norm": 2.116007089614868, |
|
"learning_rate": 8.59994133243427e-06, |
|
"loss": 0.1951, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 0.8867003917694092, |
|
"learning_rate": 8.563284730770222e-06, |
|
"loss": 0.147, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.4444444444444444, |
|
"grad_norm": 0.8089895248413086, |
|
"learning_rate": 8.52623475669285e-06, |
|
"loss": 0.1109, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 1.4603174603174602, |
|
"grad_norm": 1.093917727470398, |
|
"learning_rate": 8.488795500371427e-06, |
|
"loss": 0.1328, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.4761904761904763, |
|
"grad_norm": 1.0122655630111694, |
|
"learning_rate": 8.450971094950433e-06, |
|
"loss": 0.1998, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.492063492063492, |
|
"grad_norm": 3.2237043380737305, |
|
"learning_rate": 8.412765716093273e-06, |
|
"loss": 0.1949, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 1.507936507936508, |
|
"grad_norm": 0.8921442031860352, |
|
"learning_rate": 8.374183581521288e-06, |
|
"loss": 0.1659, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.5238095238095237, |
|
"grad_norm": 0.9087255001068115, |
|
"learning_rate": 8.335228950548164e-06, |
|
"loss": 0.1381, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.5396825396825395, |
|
"grad_norm": 0.8095484972000122, |
|
"learning_rate": 8.29590612360969e-06, |
|
"loss": 0.1418, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 1.5555555555555556, |
|
"grad_norm": 0.9046949744224548, |
|
"learning_rate": 8.256219441789023e-06, |
|
"loss": 0.1362, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 1.5714285714285714, |
|
"grad_norm": 1.0229334831237793, |
|
"learning_rate": 8.216173286337449e-06, |
|
"loss": 0.1548, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.5873015873015874, |
|
"grad_norm": 0.7955384254455566, |
|
"learning_rate": 8.175772078190706e-06, |
|
"loss": 0.0964, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.6031746031746033, |
|
"grad_norm": 0.8258505463600159, |
|
"learning_rate": 8.135020277480933e-06, |
|
"loss": 0.1436, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 1.619047619047619, |
|
"grad_norm": 0.9820398688316345, |
|
"learning_rate": 8.093922383044293e-06, |
|
"loss": 0.1875, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.6349206349206349, |
|
"grad_norm": 0.8524972796440125, |
|
"learning_rate": 8.052482931924307e-06, |
|
"loss": 0.1381, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 1.6507936507936507, |
|
"grad_norm": 1.515363097190857, |
|
"learning_rate": 8.010706498870997e-06, |
|
"loss": 0.1783, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 0.9057204127311707, |
|
"learning_rate": 7.968597695835845e-06, |
|
"loss": 0.1491, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.6825396825396826, |
|
"grad_norm": 0.73180091381073, |
|
"learning_rate": 7.926161171462647e-06, |
|
"loss": 0.1216, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.6984126984126984, |
|
"grad_norm": 1.321354627609253, |
|
"learning_rate": 7.883401610574338e-06, |
|
"loss": 0.1496, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 1.7142857142857144, |
|
"grad_norm": 0.9403553009033203, |
|
"learning_rate": 7.84032373365578e-06, |
|
"loss": 0.1509, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.7301587301587302, |
|
"grad_norm": 0.9378424286842346, |
|
"learning_rate": 7.796932296332666e-06, |
|
"loss": 0.16, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 1.746031746031746, |
|
"grad_norm": 0.8329445123672485, |
|
"learning_rate": 7.753232088846505e-06, |
|
"loss": 0.0972, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.7619047619047619, |
|
"grad_norm": 1.2565405368804932, |
|
"learning_rate": 7.709227935525796e-06, |
|
"loss": 0.1837, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 1.7777777777777777, |
|
"grad_norm": 0.9211390614509583, |
|
"learning_rate": 7.664924694253444e-06, |
|
"loss": 0.1472, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 1.7936507936507935, |
|
"grad_norm": 0.8232191801071167, |
|
"learning_rate": 7.620327255930475e-06, |
|
"loss": 0.123, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 1.8095238095238095, |
|
"grad_norm": 0.8965691328048706, |
|
"learning_rate": 7.575440543936092e-06, |
|
"loss": 0.1329, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.8253968253968254, |
|
"grad_norm": 0.9043343663215637, |
|
"learning_rate": 7.530269513584158e-06, |
|
"loss": 0.1268, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.8412698412698414, |
|
"grad_norm": 0.822960376739502, |
|
"learning_rate": 7.484819151576148e-06, |
|
"loss": 0.128, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.8571428571428572, |
|
"grad_norm": 1.0179648399353027, |
|
"learning_rate": 7.439094475450638e-06, |
|
"loss": 0.1732, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 1.873015873015873, |
|
"grad_norm": 0.888350784778595, |
|
"learning_rate": 7.393100533029383e-06, |
|
"loss": 0.1608, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 1.8888888888888888, |
|
"grad_norm": 0.8138338923454285, |
|
"learning_rate": 7.346842401860069e-06, |
|
"loss": 0.1534, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 1.9047619047619047, |
|
"grad_norm": 1.0096592903137207, |
|
"learning_rate": 7.300325188655762e-06, |
|
"loss": 0.1705, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.9206349206349205, |
|
"grad_norm": 0.8139647841453552, |
|
"learning_rate": 7.253554028731149e-06, |
|
"loss": 0.1453, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 1.9365079365079365, |
|
"grad_norm": 1.51543128490448, |
|
"learning_rate": 7.206534085435626e-06, |
|
"loss": 0.2008, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.9523809523809523, |
|
"grad_norm": 0.944955587387085, |
|
"learning_rate": 7.159270549583278e-06, |
|
"loss": 0.1793, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 1.9682539682539684, |
|
"grad_norm": 0.943600594997406, |
|
"learning_rate": 7.111768638879834e-06, |
|
"loss": 0.1783, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 1.9841269841269842, |
|
"grad_norm": 0.8988723158836365, |
|
"learning_rate": 7.064033597346658e-06, |
|
"loss": 0.1472, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.8375394940376282, |
|
"learning_rate": 7.016070694741824e-06, |
|
"loss": 0.0907, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 2.015873015873016, |
|
"grad_norm": 0.5929964780807495, |
|
"learning_rate": 6.967885225978366e-06, |
|
"loss": 0.0541, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 2.0317460317460316, |
|
"grad_norm": 0.7823465466499329, |
|
"learning_rate": 6.919482510539723e-06, |
|
"loss": 0.0553, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 2.0476190476190474, |
|
"grad_norm": 0.6394754648208618, |
|
"learning_rate": 6.870867891892511e-06, |
|
"loss": 0.0539, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 2.0634920634920633, |
|
"grad_norm": 0.6169945001602173, |
|
"learning_rate": 6.822046736896607e-06, |
|
"loss": 0.0446, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.0793650793650795, |
|
"grad_norm": 0.6922060251235962, |
|
"learning_rate": 6.773024435212678e-06, |
|
"loss": 0.0474, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 2.0952380952380953, |
|
"grad_norm": 0.6317225098609924, |
|
"learning_rate": 6.723806398707186e-06, |
|
"loss": 0.0472, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 2.111111111111111, |
|
"grad_norm": 0.6741769909858704, |
|
"learning_rate": 6.674398060854931e-06, |
|
"loss": 0.042, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 2.126984126984127, |
|
"grad_norm": 0.9402458071708679, |
|
"learning_rate": 6.624804876139227e-06, |
|
"loss": 0.0647, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 2.142857142857143, |
|
"grad_norm": 0.8518017530441284, |
|
"learning_rate": 6.57503231944974e-06, |
|
"loss": 0.0559, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.1587301587301586, |
|
"grad_norm": 0.9338895678520203, |
|
"learning_rate": 6.525085885478088e-06, |
|
"loss": 0.0482, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 2.1746031746031744, |
|
"grad_norm": 0.8100844025611877, |
|
"learning_rate": 6.4749710881112485e-06, |
|
"loss": 0.0599, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 2.1904761904761907, |
|
"grad_norm": 1.0090680122375488, |
|
"learning_rate": 6.424693459822843e-06, |
|
"loss": 0.0642, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 2.2063492063492065, |
|
"grad_norm": 0.6279119253158569, |
|
"learning_rate": 6.374258551062377e-06, |
|
"loss": 0.0514, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"grad_norm": 0.6508628129959106, |
|
"learning_rate": 6.3236719296424985e-06, |
|
"loss": 0.0464, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.238095238095238, |
|
"grad_norm": 0.586854100227356, |
|
"learning_rate": 6.272939180124316e-06, |
|
"loss": 0.0407, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 2.253968253968254, |
|
"grad_norm": 0.6627777814865112, |
|
"learning_rate": 6.222065903200909e-06, |
|
"loss": 0.0487, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 2.2698412698412698, |
|
"grad_norm": 0.7252594232559204, |
|
"learning_rate": 6.171057715079012e-06, |
|
"loss": 0.0568, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 2.2857142857142856, |
|
"grad_norm": 0.5209100246429443, |
|
"learning_rate": 6.119920246859025e-06, |
|
"loss": 0.0363, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 2.3015873015873014, |
|
"grad_norm": 0.5737756490707397, |
|
"learning_rate": 6.068659143913349e-06, |
|
"loss": 0.0303, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 2.317460317460317, |
|
"grad_norm": 1.3126976490020752, |
|
"learning_rate": 6.0172800652631706e-06, |
|
"loss": 0.074, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 2.3333333333333335, |
|
"grad_norm": 0.6515079140663147, |
|
"learning_rate": 5.965788682953717e-06, |
|
"loss": 0.047, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 2.3492063492063493, |
|
"grad_norm": 0.6837308406829834, |
|
"learning_rate": 5.914190681428098e-06, |
|
"loss": 0.0354, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 2.365079365079365, |
|
"grad_norm": 1.8536609411239624, |
|
"learning_rate": 5.862491756899753e-06, |
|
"loss": 0.0605, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 2.380952380952381, |
|
"grad_norm": 0.7980743646621704, |
|
"learning_rate": 5.8106976167236236e-06, |
|
"loss": 0.0443, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.3968253968253967, |
|
"grad_norm": 0.7513721585273743, |
|
"learning_rate": 5.758813978766077e-06, |
|
"loss": 0.0554, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 2.4126984126984126, |
|
"grad_norm": 0.9976074695587158, |
|
"learning_rate": 5.706846570773677e-06, |
|
"loss": 0.0629, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 2.4285714285714284, |
|
"grad_norm": 0.6811009049415588, |
|
"learning_rate": 5.654801129740863e-06, |
|
"loss": 0.0395, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 2.4444444444444446, |
|
"grad_norm": 0.8673996329307556, |
|
"learning_rate": 5.6026834012766155e-06, |
|
"loss": 0.0517, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 2.4603174603174605, |
|
"grad_norm": 0.6963090300559998, |
|
"learning_rate": 5.550499138970158e-06, |
|
"loss": 0.0416, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 2.4761904761904763, |
|
"grad_norm": 0.6987242102622986, |
|
"learning_rate": 5.4982541037557825e-06, |
|
"loss": 0.0416, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 2.492063492063492, |
|
"grad_norm": 1.312720775604248, |
|
"learning_rate": 5.44595406327687e-06, |
|
"loss": 0.0499, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 2.507936507936508, |
|
"grad_norm": 0.7223400473594666, |
|
"learning_rate": 5.393604791249158e-06, |
|
"loss": 0.0617, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 2.5238095238095237, |
|
"grad_norm": 0.5593965649604797, |
|
"learning_rate": 5.341212066823356e-06, |
|
"loss": 0.0341, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 2.5396825396825395, |
|
"grad_norm": 0.80050128698349, |
|
"learning_rate": 5.288781673947143e-06, |
|
"loss": 0.0818, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.5555555555555554, |
|
"grad_norm": 0.9423879981040955, |
|
"learning_rate": 5.2363194007266435e-06, |
|
"loss": 0.0533, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 2.571428571428571, |
|
"grad_norm": 0.5583639740943909, |
|
"learning_rate": 5.183831038787449e-06, |
|
"loss": 0.0308, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 2.5873015873015874, |
|
"grad_norm": 0.9041386842727661, |
|
"learning_rate": 5.131322382635236e-06, |
|
"loss": 0.0393, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 2.6031746031746033, |
|
"grad_norm": 0.7514568567276001, |
|
"learning_rate": 5.078799229016083e-06, |
|
"loss": 0.0379, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 2.619047619047619, |
|
"grad_norm": 0.6620778441429138, |
|
"learning_rate": 5.0262673762765316e-06, |
|
"loss": 0.042, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 2.634920634920635, |
|
"grad_norm": 0.7500037550926208, |
|
"learning_rate": 4.973732623723471e-06, |
|
"loss": 0.0494, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 2.6507936507936507, |
|
"grad_norm": 0.6531999707221985, |
|
"learning_rate": 4.921200770983919e-06, |
|
"loss": 0.0364, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 1.0730338096618652, |
|
"learning_rate": 4.8686776173647655e-06, |
|
"loss": 0.0554, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 2.682539682539683, |
|
"grad_norm": 0.6723483204841614, |
|
"learning_rate": 4.816168961212553e-06, |
|
"loss": 0.0532, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 2.6984126984126986, |
|
"grad_norm": 0.7021626234054565, |
|
"learning_rate": 4.763680599273357e-06, |
|
"loss": 0.0611, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.7142857142857144, |
|
"grad_norm": 0.7522814273834229, |
|
"learning_rate": 4.711218326052859e-06, |
|
"loss": 0.0426, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 2.7301587301587302, |
|
"grad_norm": 0.6586665511131287, |
|
"learning_rate": 4.6587879331766465e-06, |
|
"loss": 0.0368, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 2.746031746031746, |
|
"grad_norm": 1.2219913005828857, |
|
"learning_rate": 4.606395208750844e-06, |
|
"loss": 0.0852, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 2.761904761904762, |
|
"grad_norm": 0.6554266810417175, |
|
"learning_rate": 4.554045936723132e-06, |
|
"loss": 0.0516, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 2.7777777777777777, |
|
"grad_norm": 0.5427403450012207, |
|
"learning_rate": 4.501745896244219e-06, |
|
"loss": 0.0388, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 2.7936507936507935, |
|
"grad_norm": 0.5815466046333313, |
|
"learning_rate": 4.4495008610298435e-06, |
|
"loss": 0.0452, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 2.8095238095238093, |
|
"grad_norm": 0.957595944404602, |
|
"learning_rate": 4.397316598723385e-06, |
|
"loss": 0.0584, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 2.825396825396825, |
|
"grad_norm": 0.6840035915374756, |
|
"learning_rate": 4.345198870259139e-06, |
|
"loss": 0.0503, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 2.8412698412698414, |
|
"grad_norm": 0.7481380105018616, |
|
"learning_rate": 4.2931534292263265e-06, |
|
"loss": 0.0561, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 0.5073666572570801, |
|
"learning_rate": 4.241186021233925e-06, |
|
"loss": 0.0339, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.873015873015873, |
|
"grad_norm": 0.6581488251686096, |
|
"learning_rate": 4.189302383276378e-06, |
|
"loss": 0.0629, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 2.888888888888889, |
|
"grad_norm": 0.623148500919342, |
|
"learning_rate": 4.137508243100249e-06, |
|
"loss": 0.0521, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 2.9047619047619047, |
|
"grad_norm": 0.621598482131958, |
|
"learning_rate": 4.085809318571905e-06, |
|
"loss": 0.0458, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 2.9206349206349205, |
|
"grad_norm": 0.6772786974906921, |
|
"learning_rate": 4.034211317046285e-06, |
|
"loss": 0.044, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 2.9365079365079367, |
|
"grad_norm": 1.262326955795288, |
|
"learning_rate": 3.982719934736832e-06, |
|
"loss": 0.046, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 2.9523809523809526, |
|
"grad_norm": 0.6345784068107605, |
|
"learning_rate": 3.931340856086652e-06, |
|
"loss": 0.042, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 2.9682539682539684, |
|
"grad_norm": 0.5799025297164917, |
|
"learning_rate": 3.880079753140978e-06, |
|
"loss": 0.0467, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 2.984126984126984, |
|
"grad_norm": 0.6458066701889038, |
|
"learning_rate": 3.82894228492099e-06, |
|
"loss": 0.047, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.5344202518463135, |
|
"learning_rate": 3.777934096799094e-06, |
|
"loss": 0.0418, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 3.015873015873016, |
|
"grad_norm": 0.3208453953266144, |
|
"learning_rate": 3.7270608198756852e-06, |
|
"loss": 0.0124, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.0317460317460316, |
|
"grad_norm": 0.48605024814605713, |
|
"learning_rate": 3.676328070357503e-06, |
|
"loss": 0.0332, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 3.0476190476190474, |
|
"grad_norm": 0.5282842516899109, |
|
"learning_rate": 3.6257414489376217e-06, |
|
"loss": 0.0155, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 3.0634920634920633, |
|
"grad_norm": 0.3682861030101776, |
|
"learning_rate": 3.5753065401771577e-06, |
|
"loss": 0.017, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 3.0793650793650795, |
|
"grad_norm": 0.4284667372703552, |
|
"learning_rate": 3.5250289118887515e-06, |
|
"loss": 0.0107, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 3.0952380952380953, |
|
"grad_norm": 0.36055704951286316, |
|
"learning_rate": 3.4749141145219118e-06, |
|
"loss": 0.0116, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 3.111111111111111, |
|
"grad_norm": 0.42703211307525635, |
|
"learning_rate": 3.424967680550261e-06, |
|
"loss": 0.0089, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 3.126984126984127, |
|
"grad_norm": 0.4422130882740021, |
|
"learning_rate": 3.3751951238607745e-06, |
|
"loss": 0.015, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 3.142857142857143, |
|
"grad_norm": 1.8617546558380127, |
|
"learning_rate": 3.3256019391450696e-06, |
|
"loss": 0.044, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 3.1587301587301586, |
|
"grad_norm": 0.4572526216506958, |
|
"learning_rate": 3.2761936012928147e-06, |
|
"loss": 0.0092, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 3.1746031746031744, |
|
"grad_norm": 0.606041431427002, |
|
"learning_rate": 3.226975564787322e-06, |
|
"loss": 0.013, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.1904761904761907, |
|
"grad_norm": 0.6772161722183228, |
|
"learning_rate": 3.177953263103394e-06, |
|
"loss": 0.0233, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 3.2063492063492065, |
|
"grad_norm": 0.4099721908569336, |
|
"learning_rate": 3.1291321081074887e-06, |
|
"loss": 0.0074, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 3.2222222222222223, |
|
"grad_norm": 0.34896424412727356, |
|
"learning_rate": 3.0805174894602775e-06, |
|
"loss": 0.0081, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 3.238095238095238, |
|
"grad_norm": 0.5158839821815491, |
|
"learning_rate": 3.032114774021636e-06, |
|
"loss": 0.0174, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 3.253968253968254, |
|
"grad_norm": 0.6538414359092712, |
|
"learning_rate": 2.9839293052581767e-06, |
|
"loss": 0.0188, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 3.2698412698412698, |
|
"grad_norm": 0.4532378911972046, |
|
"learning_rate": 2.9359664026533443e-06, |
|
"loss": 0.0123, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 3.2857142857142856, |
|
"grad_norm": 0.4588467478752136, |
|
"learning_rate": 2.8882313611201684e-06, |
|
"loss": 0.0154, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 3.3015873015873014, |
|
"grad_norm": 0.6380066871643066, |
|
"learning_rate": 2.8407294504167238e-06, |
|
"loss": 0.0168, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 3.317460317460317, |
|
"grad_norm": 0.3832227289676666, |
|
"learning_rate": 2.793465914564375e-06, |
|
"loss": 0.0118, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"grad_norm": 0.4849812090396881, |
|
"learning_rate": 2.7464459712688517e-06, |
|
"loss": 0.0156, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.3492063492063493, |
|
"grad_norm": 0.5382815003395081, |
|
"learning_rate": 2.6996748113442397e-06, |
|
"loss": 0.0162, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 3.365079365079365, |
|
"grad_norm": 0.4217240810394287, |
|
"learning_rate": 2.653157598139932e-06, |
|
"loss": 0.0144, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 3.380952380952381, |
|
"grad_norm": 0.3669329881668091, |
|
"learning_rate": 2.6068994669706184e-06, |
|
"loss": 0.0068, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 3.3968253968253967, |
|
"grad_norm": 0.3910522162914276, |
|
"learning_rate": 2.560905524549364e-06, |
|
"loss": 0.0134, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 3.4126984126984126, |
|
"grad_norm": 0.3835786283016205, |
|
"learning_rate": 2.515180848423853e-06, |
|
"loss": 0.009, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 3.4285714285714284, |
|
"grad_norm": 0.39538145065307617, |
|
"learning_rate": 2.469730486415842e-06, |
|
"loss": 0.0154, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 3.4444444444444446, |
|
"grad_norm": 0.48559364676475525, |
|
"learning_rate": 2.4245594560639086e-06, |
|
"loss": 0.0159, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 3.4603174603174605, |
|
"grad_norm": 0.6111780405044556, |
|
"learning_rate": 2.379672744069527e-06, |
|
"loss": 0.0216, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 3.4761904761904763, |
|
"grad_norm": 0.48673927783966064, |
|
"learning_rate": 2.335075305746558e-06, |
|
"loss": 0.0124, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 3.492063492063492, |
|
"grad_norm": 0.4308861792087555, |
|
"learning_rate": 2.2907720644742064e-06, |
|
"loss": 0.0112, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.507936507936508, |
|
"grad_norm": 0.39692750573158264, |
|
"learning_rate": 2.2467679111534963e-06, |
|
"loss": 0.0162, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 3.5238095238095237, |
|
"grad_norm": 0.4090782701969147, |
|
"learning_rate": 2.2030677036673345e-06, |
|
"loss": 0.0214, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 3.5396825396825395, |
|
"grad_norm": 0.4135613441467285, |
|
"learning_rate": 2.159676266344222e-06, |
|
"loss": 0.0157, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 3.5555555555555554, |
|
"grad_norm": 0.8922034502029419, |
|
"learning_rate": 2.1165983894256647e-06, |
|
"loss": 0.0355, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 3.571428571428571, |
|
"grad_norm": 0.25958430767059326, |
|
"learning_rate": 2.0738388285373532e-06, |
|
"loss": 0.0057, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 3.5873015873015874, |
|
"grad_norm": 0.3267192244529724, |
|
"learning_rate": 2.0314023041641567e-06, |
|
"loss": 0.0089, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 3.6031746031746033, |
|
"grad_norm": 0.4624428451061249, |
|
"learning_rate": 1.9892935011290037e-06, |
|
"loss": 0.0159, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 3.619047619047619, |
|
"grad_norm": 0.2781091630458832, |
|
"learning_rate": 1.947517068075694e-06, |
|
"loss": 0.0083, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 3.634920634920635, |
|
"grad_norm": 0.31553471088409424, |
|
"learning_rate": 1.9060776169557083e-06, |
|
"loss": 0.0137, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 3.6507936507936507, |
|
"grad_norm": 0.5683518052101135, |
|
"learning_rate": 1.864979722519068e-06, |
|
"loss": 0.0298, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.6666666666666665, |
|
"grad_norm": 0.3412047028541565, |
|
"learning_rate": 1.8242279218092968e-06, |
|
"loss": 0.0137, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 3.682539682539683, |
|
"grad_norm": 0.5104885101318359, |
|
"learning_rate": 1.7838267136625536e-06, |
|
"loss": 0.0114, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 3.6984126984126986, |
|
"grad_norm": 0.4502926170825958, |
|
"learning_rate": 1.743780558210979e-06, |
|
"loss": 0.013, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 3.7142857142857144, |
|
"grad_norm": 0.48987045884132385, |
|
"learning_rate": 1.704093876390312e-06, |
|
"loss": 0.0217, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 3.7301587301587302, |
|
"grad_norm": 0.3420720398426056, |
|
"learning_rate": 1.664771049451837e-06, |
|
"loss": 0.0089, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 3.746031746031746, |
|
"grad_norm": 0.3107622563838959, |
|
"learning_rate": 1.6258164184787123e-06, |
|
"loss": 0.0085, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 3.761904761904762, |
|
"grad_norm": 0.5032781958580017, |
|
"learning_rate": 1.5872342839067305e-06, |
|
"loss": 0.0075, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 3.7777777777777777, |
|
"grad_norm": 0.3874753415584564, |
|
"learning_rate": 1.5490289050495678e-06, |
|
"loss": 0.0108, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 3.7936507936507935, |
|
"grad_norm": 0.30904141068458557, |
|
"learning_rate": 1.511204499628574e-06, |
|
"loss": 0.0086, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 3.8095238095238093, |
|
"grad_norm": 0.39750581979751587, |
|
"learning_rate": 1.4737652433071515e-06, |
|
"loss": 0.0135, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.825396825396825, |
|
"grad_norm": 0.5290718674659729, |
|
"learning_rate": 1.4367152692297799e-06, |
|
"loss": 0.0187, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 3.8412698412698414, |
|
"grad_norm": 0.4600606858730316, |
|
"learning_rate": 1.4000586675657312e-06, |
|
"loss": 0.0111, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 3.857142857142857, |
|
"grad_norm": 0.461280882358551, |
|
"learning_rate": 1.3637994850575342e-06, |
|
"loss": 0.0147, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 3.873015873015873, |
|
"grad_norm": 0.5423941016197205, |
|
"learning_rate": 1.3279417245742288e-06, |
|
"loss": 0.0236, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.888888888888889, |
|
"grad_norm": 0.39030736684799194, |
|
"learning_rate": 1.2924893446694648e-06, |
|
"loss": 0.006, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 3.9047619047619047, |
|
"grad_norm": 0.5754550099372864, |
|
"learning_rate": 1.257446259144494e-06, |
|
"loss": 0.0286, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 3.9206349206349205, |
|
"grad_norm": 0.35015997290611267, |
|
"learning_rate": 1.222816336616104e-06, |
|
"loss": 0.0143, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 3.9365079365079367, |
|
"grad_norm": 0.4731229245662689, |
|
"learning_rate": 1.1886034000895341e-06, |
|
"loss": 0.0177, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 3.9523809523809526, |
|
"grad_norm": 0.3958592116832733, |
|
"learning_rate": 1.1548112265364336e-06, |
|
"loss": 0.0132, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 3.9682539682539684, |
|
"grad_norm": 0.4273618161678314, |
|
"learning_rate": 1.1214435464779006e-06, |
|
"loss": 0.0206, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.984126984126984, |
|
"grad_norm": 0.429865300655365, |
|
"learning_rate": 1.088504043572643e-06, |
|
"loss": 0.0131, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.28878334164619446, |
|
"learning_rate": 1.055996354210323e-06, |
|
"loss": 0.0079, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 4.015873015873016, |
|
"grad_norm": 0.12734107673168182, |
|
"learning_rate": 1.0239240671101065e-06, |
|
"loss": 0.0027, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 4.031746031746032, |
|
"grad_norm": 0.24097059667110443, |
|
"learning_rate": 9.922907229244905e-07, |
|
"loss": 0.0063, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 4.0476190476190474, |
|
"grad_norm": 0.18659502267837524, |
|
"learning_rate": 9.610998138484262e-07, |
|
"loss": 0.0032, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 4.063492063492063, |
|
"grad_norm": 0.16850638389587402, |
|
"learning_rate": 9.303547832337934e-07, |
|
"loss": 0.0036, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 4.079365079365079, |
|
"grad_norm": 0.13990715146064758, |
|
"learning_rate": 9.000590252092701e-07, |
|
"loss": 0.0034, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 4.095238095238095, |
|
"grad_norm": 0.16486185789108276, |
|
"learning_rate": 8.702158843056319e-07, |
|
"loss": 0.0029, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 4.111111111111111, |
|
"grad_norm": 0.13398580253124237, |
|
"learning_rate": 8.408286550865319e-07, |
|
"loss": 0.0041, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 4.1269841269841265, |
|
"grad_norm": 0.14831550419330597, |
|
"learning_rate": 8.119005817847924e-07, |
|
"loss": 0.0039, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.142857142857143, |
|
"grad_norm": 0.1601114124059677, |
|
"learning_rate": 7.834348579442552e-07, |
|
"loss": 0.0034, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 4.158730158730159, |
|
"grad_norm": 0.2705823481082916, |
|
"learning_rate": 7.554346260672263e-07, |
|
"loss": 0.0054, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 4.174603174603175, |
|
"grad_norm": 0.17291611433029175, |
|
"learning_rate": 7.279029772675572e-07, |
|
"loss": 0.0025, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 4.190476190476191, |
|
"grad_norm": 0.11752714961767197, |
|
"learning_rate": 7.008429509293979e-07, |
|
"loss": 0.002, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 4.2063492063492065, |
|
"grad_norm": 0.2471189945936203, |
|
"learning_rate": 6.742575343716584e-07, |
|
"loss": 0.0053, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 4.222222222222222, |
|
"grad_norm": 0.2652353048324585, |
|
"learning_rate": 6.481496625182271e-07, |
|
"loss": 0.0035, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 4.238095238095238, |
|
"grad_norm": 0.5520457029342651, |
|
"learning_rate": 6.225222175739598e-07, |
|
"loss": 0.0094, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 4.253968253968254, |
|
"grad_norm": 0.1428154557943344, |
|
"learning_rate": 5.973780287065007e-07, |
|
"loss": 0.002, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 4.26984126984127, |
|
"grad_norm": 0.38164809346199036, |
|
"learning_rate": 5.727198717339511e-07, |
|
"loss": 0.0185, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 4.285714285714286, |
|
"grad_norm": 0.2093685418367386, |
|
"learning_rate": 5.485504688184307e-07, |
|
"loss": 0.0016, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.301587301587301, |
|
"grad_norm": 0.15652765333652496, |
|
"learning_rate": 5.24872488165562e-07, |
|
"loss": 0.0029, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 4.317460317460317, |
|
"grad_norm": 0.17758126556873322, |
|
"learning_rate": 5.016885437299113e-07, |
|
"loss": 0.003, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 4.333333333333333, |
|
"grad_norm": 0.18992979824543, |
|
"learning_rate": 4.790011949264173e-07, |
|
"loss": 0.0064, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 4.349206349206349, |
|
"grad_norm": 0.10369639843702316, |
|
"learning_rate": 4.5681294634784437e-07, |
|
"loss": 0.0014, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 4.365079365079365, |
|
"grad_norm": 0.09521545469760895, |
|
"learning_rate": 4.3512624748828225e-07, |
|
"loss": 0.0012, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 4.380952380952381, |
|
"grad_norm": 0.310982346534729, |
|
"learning_rate": 4.139434924727359e-07, |
|
"loss": 0.0059, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 4.396825396825397, |
|
"grad_norm": 0.3128473162651062, |
|
"learning_rate": 3.9326701979281623e-07, |
|
"loss": 0.0059, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 4.412698412698413, |
|
"grad_norm": 0.33223262429237366, |
|
"learning_rate": 3.7309911204858997e-07, |
|
"loss": 0.0031, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 4.428571428571429, |
|
"grad_norm": 0.13230553269386292, |
|
"learning_rate": 3.534419956965823e-07, |
|
"loss": 0.0024, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 4.444444444444445, |
|
"grad_norm": 0.20840996503829956, |
|
"learning_rate": 3.3429784080398765e-07, |
|
"loss": 0.005, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.4603174603174605, |
|
"grad_norm": 0.21250075101852417, |
|
"learning_rate": 3.1566876080910193e-07, |
|
"loss": 0.0042, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 4.476190476190476, |
|
"grad_norm": 0.14549191296100616, |
|
"learning_rate": 2.9755681228800904e-07, |
|
"loss": 0.0023, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 4.492063492063492, |
|
"grad_norm": 0.27009841799736023, |
|
"learning_rate": 2.799639947275412e-07, |
|
"loss": 0.0037, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 4.507936507936508, |
|
"grad_norm": 0.18486513197422028, |
|
"learning_rate": 2.6289225030454556e-07, |
|
"loss": 0.0025, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 4.523809523809524, |
|
"grad_norm": 0.2685333490371704, |
|
"learning_rate": 2.4634346367147233e-07, |
|
"loss": 0.0038, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 4.5396825396825395, |
|
"grad_norm": 0.22350570559501648, |
|
"learning_rate": 2.303194617483212e-07, |
|
"loss": 0.0052, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 4.555555555555555, |
|
"grad_norm": 0.1270400732755661, |
|
"learning_rate": 2.1482201352095277e-07, |
|
"loss": 0.0037, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 4.571428571428571, |
|
"grad_norm": 0.14703014492988586, |
|
"learning_rate": 1.998528298458019e-07, |
|
"loss": 0.0026, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 4.587301587301587, |
|
"grad_norm": 0.05500807240605354, |
|
"learning_rate": 1.8541356326100436e-07, |
|
"loss": 0.0006, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 4.603174603174603, |
|
"grad_norm": 0.23130561411380768, |
|
"learning_rate": 1.7150580780396385e-07, |
|
"loss": 0.0041, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.619047619047619, |
|
"grad_norm": 0.14613750576972961, |
|
"learning_rate": 1.5813109883537792e-07, |
|
"loss": 0.0022, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 4.634920634920634, |
|
"grad_norm": 0.19623324275016785, |
|
"learning_rate": 1.4529091286973994e-07, |
|
"loss": 0.0029, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 4.650793650793651, |
|
"grad_norm": 0.2580835819244385, |
|
"learning_rate": 1.3298666741233424e-07, |
|
"loss": 0.0047, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 4.666666666666667, |
|
"grad_norm": 0.19105015695095062, |
|
"learning_rate": 1.2121972080275378e-07, |
|
"loss": 0.0041, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 4.682539682539683, |
|
"grad_norm": 0.18912175297737122, |
|
"learning_rate": 1.0999137206494315e-07, |
|
"loss": 0.0045, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 4.698412698412699, |
|
"grad_norm": 0.2492668479681015, |
|
"learning_rate": 9.93028607637908e-08, |
|
"loss": 0.0067, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 4.714285714285714, |
|
"grad_norm": 0.1554499715566635, |
|
"learning_rate": 8.915536686828764e-08, |
|
"loss": 0.0085, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 4.73015873015873, |
|
"grad_norm": 0.16742238402366638, |
|
"learning_rate": 7.955001062125989e-08, |
|
"loss": 0.0036, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 4.746031746031746, |
|
"grad_norm": 0.1783057302236557, |
|
"learning_rate": 7.048785241570321e-08, |
|
"loss": 0.0028, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 4.761904761904762, |
|
"grad_norm": 0.1566859483718872, |
|
"learning_rate": 6.19698926777168e-08, |
|
"loss": 0.0033, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.777777777777778, |
|
"grad_norm": 0.1521420031785965, |
|
"learning_rate": 5.399707175606117e-08, |
|
"loss": 0.0016, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 4.7936507936507935, |
|
"grad_norm": 0.17157761752605438, |
|
"learning_rate": 4.657026981834623e-08, |
|
"loss": 0.002, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 4.809523809523809, |
|
"grad_norm": 0.1927778720855713, |
|
"learning_rate": 3.9690306753866204e-08, |
|
"loss": 0.0029, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 4.825396825396825, |
|
"grad_norm": 0.15908092260360718, |
|
"learning_rate": 3.3357942083085404e-08, |
|
"loss": 0.0016, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 4.841269841269841, |
|
"grad_norm": 0.175292506814003, |
|
"learning_rate": 2.7573874873791372e-08, |
|
"loss": 0.0043, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 4.857142857142857, |
|
"grad_norm": 0.121407650411129, |
|
"learning_rate": 2.233874366391997e-08, |
|
"loss": 0.001, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 4.8730158730158735, |
|
"grad_norm": 0.2875802218914032, |
|
"learning_rate": 1.7653126391063425e-08, |
|
"loss": 0.005, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 4.888888888888889, |
|
"grad_norm": 0.18822738528251648, |
|
"learning_rate": 1.3517540328669143e-08, |
|
"loss": 0.0025, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 4.904761904761905, |
|
"grad_norm": 0.15360276401042938, |
|
"learning_rate": 9.93244202893262e-09, |
|
"loss": 0.0023, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 4.920634920634921, |
|
"grad_norm": 0.29353615641593933, |
|
"learning_rate": 6.898227272398306e-09, |
|
"loss": 0.0065, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.936507936507937, |
|
"grad_norm": 0.5181974172592163, |
|
"learning_rate": 4.415231024265665e-09, |
|
"loss": 0.0108, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 4.9523809523809526, |
|
"grad_norm": 0.15942662954330444, |
|
"learning_rate": 2.4837273974115393e-09, |
|
"loss": 0.0026, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 4.968253968253968, |
|
"grad_norm": 0.1265093982219696, |
|
"learning_rate": 1.1039296221276863e-09, |
|
"loss": 0.0116, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 4.984126984126984, |
|
"grad_norm": 0.29910922050476074, |
|
"learning_rate": 2.7599002258127395e-10, |
|
"loss": 0.0022, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.09251413494348526, |
|
"learning_rate": 0.0, |
|
"loss": 0.0015, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 315, |
|
"total_flos": 32588508020736.0, |
|
"train_loss": 0.11869781007546754, |
|
"train_runtime": 5355.6377, |
|
"train_samples_per_second": 0.934, |
|
"train_steps_per_second": 0.059 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 315, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 32588508020736.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|