|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.2178649237472767, |
|
"eval_steps": 9, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002178649237472767, |
|
"grad_norm": 0.9198092818260193, |
|
"learning_rate": 1e-05, |
|
"loss": 2.7151, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002178649237472767, |
|
"eval_loss": 1.4438984394073486, |
|
"eval_runtime": 63.6435, |
|
"eval_samples_per_second": 6.081, |
|
"eval_steps_per_second": 0.77, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004357298474945534, |
|
"grad_norm": 0.9449625611305237, |
|
"learning_rate": 2e-05, |
|
"loss": 2.8151, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.006535947712418301, |
|
"grad_norm": 0.9119125604629517, |
|
"learning_rate": 3e-05, |
|
"loss": 2.9119, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.008714596949891068, |
|
"grad_norm": 0.9674502015113831, |
|
"learning_rate": 4e-05, |
|
"loss": 2.8157, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.010893246187363835, |
|
"grad_norm": 0.9883757829666138, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8768, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.013071895424836602, |
|
"grad_norm": 0.9771690964698792, |
|
"learning_rate": 6e-05, |
|
"loss": 3.1472, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.015250544662309368, |
|
"grad_norm": 1.2254856824874878, |
|
"learning_rate": 7e-05, |
|
"loss": 2.7488, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.017429193899782137, |
|
"grad_norm": 1.9455372095108032, |
|
"learning_rate": 8e-05, |
|
"loss": 2.615, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0196078431372549, |
|
"grad_norm": 1.188912034034729, |
|
"learning_rate": 9e-05, |
|
"loss": 2.6818, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0196078431372549, |
|
"eval_loss": 1.2748327255249023, |
|
"eval_runtime": 63.6623, |
|
"eval_samples_per_second": 6.079, |
|
"eval_steps_per_second": 0.77, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02178649237472767, |
|
"grad_norm": 1.1046169996261597, |
|
"learning_rate": 0.0001, |
|
"loss": 2.4862, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.023965141612200435, |
|
"grad_norm": 1.140702247619629, |
|
"learning_rate": 9.99695413509548e-05, |
|
"loss": 2.6588, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.026143790849673203, |
|
"grad_norm": 1.100024938583374, |
|
"learning_rate": 9.987820251299122e-05, |
|
"loss": 2.3629, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.02832244008714597, |
|
"grad_norm": 1.133280873298645, |
|
"learning_rate": 9.972609476841367e-05, |
|
"loss": 2.309, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.030501089324618737, |
|
"grad_norm": 1.1353580951690674, |
|
"learning_rate": 9.951340343707852e-05, |
|
"loss": 2.401, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.032679738562091505, |
|
"grad_norm": 1.2960582971572876, |
|
"learning_rate": 9.924038765061042e-05, |
|
"loss": 2.298, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.034858387799564274, |
|
"grad_norm": 1.2568838596343994, |
|
"learning_rate": 9.890738003669029e-05, |
|
"loss": 2.2967, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.037037037037037035, |
|
"grad_norm": 1.2687550783157349, |
|
"learning_rate": 9.851478631379982e-05, |
|
"loss": 1.9983, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0392156862745098, |
|
"grad_norm": 1.3108571767807007, |
|
"learning_rate": 9.806308479691595e-05, |
|
"loss": 2.1777, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0392156862745098, |
|
"eval_loss": 1.0612674951553345, |
|
"eval_runtime": 63.6208, |
|
"eval_samples_per_second": 6.083, |
|
"eval_steps_per_second": 0.77, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04139433551198257, |
|
"grad_norm": 1.2421090602874756, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 2.1237, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04357298474945534, |
|
"grad_norm": 1.2717667818069458, |
|
"learning_rate": 9.698463103929542e-05, |
|
"loss": 2.0674, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0457516339869281, |
|
"grad_norm": 1.5267056226730347, |
|
"learning_rate": 9.635919272833938e-05, |
|
"loss": 2.2134, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.04793028322440087, |
|
"grad_norm": 1.3219244480133057, |
|
"learning_rate": 9.567727288213005e-05, |
|
"loss": 2.0757, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.05010893246187364, |
|
"grad_norm": 2.458014488220215, |
|
"learning_rate": 9.493970231495835e-05, |
|
"loss": 2.1298, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.05228758169934641, |
|
"grad_norm": 1.3178763389587402, |
|
"learning_rate": 9.414737964294636e-05, |
|
"loss": 1.976, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.054466230936819175, |
|
"grad_norm": 1.464354395866394, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 2.1124, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05664488017429194, |
|
"grad_norm": 3.1391263008117676, |
|
"learning_rate": 9.24024048078213e-05, |
|
"loss": 2.0881, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.058823529411764705, |
|
"grad_norm": 1.312290906906128, |
|
"learning_rate": 9.145187862775209e-05, |
|
"loss": 1.8875, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.058823529411764705, |
|
"eval_loss": 1.0010426044464111, |
|
"eval_runtime": 63.6531, |
|
"eval_samples_per_second": 6.08, |
|
"eval_steps_per_second": 0.77, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.06100217864923747, |
|
"grad_norm": 1.45926833152771, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 2.1062, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.06318082788671024, |
|
"grad_norm": 1.319545030593872, |
|
"learning_rate": 8.940053768033609e-05, |
|
"loss": 1.8661, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.06535947712418301, |
|
"grad_norm": 1.4982140064239502, |
|
"learning_rate": 8.83022221559489e-05, |
|
"loss": 1.9231, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06753812636165578, |
|
"grad_norm": 1.494714379310608, |
|
"learning_rate": 8.715724127386972e-05, |
|
"loss": 1.8508, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.06971677559912855, |
|
"grad_norm": 1.421554684638977, |
|
"learning_rate": 8.596699001693255e-05, |
|
"loss": 1.9356, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0718954248366013, |
|
"grad_norm": 1.840808391571045, |
|
"learning_rate": 8.473291852294987e-05, |
|
"loss": 1.8936, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.07407407407407407, |
|
"grad_norm": 1.348752737045288, |
|
"learning_rate": 8.345653031794292e-05, |
|
"loss": 1.9958, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.07625272331154684, |
|
"grad_norm": 1.445092797279358, |
|
"learning_rate": 8.213938048432697e-05, |
|
"loss": 1.9379, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0784313725490196, |
|
"grad_norm": 1.3066115379333496, |
|
"learning_rate": 8.07830737662829e-05, |
|
"loss": 2.152, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0784313725490196, |
|
"eval_loss": 0.9629685282707214, |
|
"eval_runtime": 63.6264, |
|
"eval_samples_per_second": 6.082, |
|
"eval_steps_per_second": 0.77, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.08061002178649238, |
|
"grad_norm": 1.3450731039047241, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 1.8907, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.08278867102396514, |
|
"grad_norm": 1.2317782640457153, |
|
"learning_rate": 7.795964517353735e-05, |
|
"loss": 1.6973, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.08496732026143791, |
|
"grad_norm": 1.4453401565551758, |
|
"learning_rate": 7.649596321166024e-05, |
|
"loss": 1.882, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.08714596949891068, |
|
"grad_norm": 1.2838209867477417, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 2.0322, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08932461873638345, |
|
"grad_norm": 1.446647047996521, |
|
"learning_rate": 7.347357813929454e-05, |
|
"loss": 1.8212, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0915032679738562, |
|
"grad_norm": 1.3220431804656982, |
|
"learning_rate": 7.191855733945387e-05, |
|
"loss": 1.9573, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.09368191721132897, |
|
"grad_norm": 1.4899321794509888, |
|
"learning_rate": 7.033683215379002e-05, |
|
"loss": 2.0206, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.09586056644880174, |
|
"grad_norm": 1.4026318788528442, |
|
"learning_rate": 6.873032967079561e-05, |
|
"loss": 1.9114, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.09803921568627451, |
|
"grad_norm": 1.5946420431137085, |
|
"learning_rate": 6.710100716628344e-05, |
|
"loss": 1.8127, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.09803921568627451, |
|
"eval_loss": 0.936172366142273, |
|
"eval_runtime": 63.6149, |
|
"eval_samples_per_second": 6.083, |
|
"eval_steps_per_second": 0.77, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10021786492374728, |
|
"grad_norm": 1.2588008642196655, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 1.8878, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.10239651416122005, |
|
"grad_norm": 1.3478667736053467, |
|
"learning_rate": 6.378186779084995e-05, |
|
"loss": 1.9031, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.10457516339869281, |
|
"grad_norm": 1.408197045326233, |
|
"learning_rate": 6.209609477998338e-05, |
|
"loss": 2.0725, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.10675381263616558, |
|
"grad_norm": 1.3039942979812622, |
|
"learning_rate": 6.0395584540887963e-05, |
|
"loss": 1.7085, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.10893246187363835, |
|
"grad_norm": 1.3145512342453003, |
|
"learning_rate": 5.868240888334653e-05, |
|
"loss": 1.8257, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1111111111111111, |
|
"grad_norm": 1.3988362550735474, |
|
"learning_rate": 5.695865504800327e-05, |
|
"loss": 1.8748, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.11328976034858387, |
|
"grad_norm": 1.3726601600646973, |
|
"learning_rate": 5.522642316338268e-05, |
|
"loss": 2.0158, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.11546840958605664, |
|
"grad_norm": 1.333004355430603, |
|
"learning_rate": 5.348782368720626e-05, |
|
"loss": 1.8631, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.11764705882352941, |
|
"grad_norm": 1.4668630361557007, |
|
"learning_rate": 5.174497483512506e-05, |
|
"loss": 1.8015, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.11764705882352941, |
|
"eval_loss": 0.9176331162452698, |
|
"eval_runtime": 63.6089, |
|
"eval_samples_per_second": 6.084, |
|
"eval_steps_per_second": 0.77, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.11982570806100218, |
|
"grad_norm": 1.4083083868026733, |
|
"learning_rate": 5e-05, |
|
"loss": 1.8677, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.12200435729847495, |
|
"grad_norm": 1.4715665578842163, |
|
"learning_rate": 4.825502516487497e-05, |
|
"loss": 1.847, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.12418300653594772, |
|
"grad_norm": 1.4060428142547607, |
|
"learning_rate": 4.6512176312793736e-05, |
|
"loss": 1.6689, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.12636165577342048, |
|
"grad_norm": 1.4863489866256714, |
|
"learning_rate": 4.477357683661734e-05, |
|
"loss": 1.7699, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.12854030501089325, |
|
"grad_norm": 1.3359904289245605, |
|
"learning_rate": 4.3041344951996746e-05, |
|
"loss": 1.9346, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.13071895424836602, |
|
"grad_norm": 1.452951431274414, |
|
"learning_rate": 4.131759111665349e-05, |
|
"loss": 1.978, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1328976034858388, |
|
"grad_norm": 1.3285267353057861, |
|
"learning_rate": 3.960441545911204e-05, |
|
"loss": 1.6478, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.13507625272331156, |
|
"grad_norm": 1.4526207447052002, |
|
"learning_rate": 3.790390522001662e-05, |
|
"loss": 1.8832, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.13725490196078433, |
|
"grad_norm": 1.3983100652694702, |
|
"learning_rate": 3.6218132209150045e-05, |
|
"loss": 1.8115, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.13725490196078433, |
|
"eval_loss": 0.9005686640739441, |
|
"eval_runtime": 63.6036, |
|
"eval_samples_per_second": 6.085, |
|
"eval_steps_per_second": 0.77, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.1394335511982571, |
|
"grad_norm": 1.3702726364135742, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 1.9095, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.14161220043572983, |
|
"grad_norm": 1.2157334089279175, |
|
"learning_rate": 3.289899283371657e-05, |
|
"loss": 1.6402, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.1437908496732026, |
|
"grad_norm": 1.388006567955017, |
|
"learning_rate": 3.12696703292044e-05, |
|
"loss": 1.7927, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.14596949891067537, |
|
"grad_norm": 1.4038636684417725, |
|
"learning_rate": 2.9663167846209998e-05, |
|
"loss": 1.7986, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.14814814814814814, |
|
"grad_norm": 1.4844595193862915, |
|
"learning_rate": 2.8081442660546125e-05, |
|
"loss": 1.9496, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.1503267973856209, |
|
"grad_norm": 1.5678737163543701, |
|
"learning_rate": 2.6526421860705473e-05, |
|
"loss": 1.9151, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.15250544662309368, |
|
"grad_norm": 1.5263280868530273, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 1.9183, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.15468409586056645, |
|
"grad_norm": 1.4814729690551758, |
|
"learning_rate": 2.350403678833976e-05, |
|
"loss": 1.7335, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.1568627450980392, |
|
"grad_norm": 1.6117812395095825, |
|
"learning_rate": 2.2040354826462668e-05, |
|
"loss": 1.8991, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.1568627450980392, |
|
"eval_loss": 0.8903294205665588, |
|
"eval_runtime": 63.6061, |
|
"eval_samples_per_second": 6.084, |
|
"eval_steps_per_second": 0.77, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.15904139433551198, |
|
"grad_norm": 1.336901307106018, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 1.5583, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.16122004357298475, |
|
"grad_norm": 1.5692085027694702, |
|
"learning_rate": 1.9216926233717085e-05, |
|
"loss": 1.8606, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.16339869281045752, |
|
"grad_norm": 1.4867205619812012, |
|
"learning_rate": 1.7860619515673033e-05, |
|
"loss": 1.8198, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1655773420479303, |
|
"grad_norm": 1.8424410820007324, |
|
"learning_rate": 1.6543469682057106e-05, |
|
"loss": 1.7089, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.16775599128540306, |
|
"grad_norm": 1.5348371267318726, |
|
"learning_rate": 1.526708147705013e-05, |
|
"loss": 1.8777, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.16993464052287582, |
|
"grad_norm": 1.4218686819076538, |
|
"learning_rate": 1.4033009983067452e-05, |
|
"loss": 1.8016, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.1721132897603486, |
|
"grad_norm": 1.4566261768341064, |
|
"learning_rate": 1.2842758726130283e-05, |
|
"loss": 1.7037, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.17429193899782136, |
|
"grad_norm": 1.4800132513046265, |
|
"learning_rate": 1.1697777844051105e-05, |
|
"loss": 1.7889, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.17647058823529413, |
|
"grad_norm": 1.4129917621612549, |
|
"learning_rate": 1.0599462319663905e-05, |
|
"loss": 1.8648, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.17647058823529413, |
|
"eval_loss": 0.884328305721283, |
|
"eval_runtime": 63.6367, |
|
"eval_samples_per_second": 6.081, |
|
"eval_steps_per_second": 0.77, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.1786492374727669, |
|
"grad_norm": 2.1455793380737305, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 1.9025, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.18082788671023964, |
|
"grad_norm": 1.6186474561691284, |
|
"learning_rate": 8.548121372247918e-06, |
|
"loss": 2.0026, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.1830065359477124, |
|
"grad_norm": 1.6770442724227905, |
|
"learning_rate": 7.597595192178702e-06, |
|
"loss": 1.9087, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.18518518518518517, |
|
"grad_norm": 1.7015310525894165, |
|
"learning_rate": 6.698729810778065e-06, |
|
"loss": 1.9845, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.18736383442265794, |
|
"grad_norm": 1.4738478660583496, |
|
"learning_rate": 5.852620357053651e-06, |
|
"loss": 1.8843, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.1895424836601307, |
|
"grad_norm": 1.548987627029419, |
|
"learning_rate": 5.060297685041659e-06, |
|
"loss": 1.9916, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.19172113289760348, |
|
"grad_norm": 1.4082995653152466, |
|
"learning_rate": 4.322727117869951e-06, |
|
"loss": 1.7131, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.19389978213507625, |
|
"grad_norm": 1.434032678604126, |
|
"learning_rate": 3.6408072716606346e-06, |
|
"loss": 1.8378, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.19607843137254902, |
|
"grad_norm": 1.4525424242019653, |
|
"learning_rate": 3.0153689607045845e-06, |
|
"loss": 1.606, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.19607843137254902, |
|
"eval_loss": 0.8813257217407227, |
|
"eval_runtime": 63.7104, |
|
"eval_samples_per_second": 6.074, |
|
"eval_steps_per_second": 0.769, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.19825708061002179, |
|
"grad_norm": 1.4912457466125488, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 1.6441, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.20043572984749455, |
|
"grad_norm": 1.4377028942108154, |
|
"learning_rate": 1.9369152030840556e-06, |
|
"loss": 1.8356, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.20261437908496732, |
|
"grad_norm": 1.555307388305664, |
|
"learning_rate": 1.4852136862001764e-06, |
|
"loss": 1.7871, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.2047930283224401, |
|
"grad_norm": 1.4429875612258911, |
|
"learning_rate": 1.0926199633097157e-06, |
|
"loss": 1.716, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.20697167755991286, |
|
"grad_norm": 1.669837236404419, |
|
"learning_rate": 7.596123493895991e-07, |
|
"loss": 1.7517, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.20915032679738563, |
|
"grad_norm": 1.4851337671279907, |
|
"learning_rate": 4.865965629214819e-07, |
|
"loss": 1.7941, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.2113289760348584, |
|
"grad_norm": 1.3839343786239624, |
|
"learning_rate": 2.7390523158633554e-07, |
|
"loss": 1.7239, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.21350762527233116, |
|
"grad_norm": 1.7010775804519653, |
|
"learning_rate": 1.2179748700879012e-07, |
|
"loss": 1.8666, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.21568627450980393, |
|
"grad_norm": 1.3769611120224, |
|
"learning_rate": 3.04586490452119e-08, |
|
"loss": 1.6545, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.21568627450980393, |
|
"eval_loss": 0.8808857798576355, |
|
"eval_runtime": 63.6882, |
|
"eval_samples_per_second": 6.076, |
|
"eval_steps_per_second": 0.769, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.2178649237472767, |
|
"grad_norm": 1.4309481382369995, |
|
"learning_rate": 0.0, |
|
"loss": 1.7055, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.294949098192896e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|