|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9921259842519685, |
|
"eval_steps": 500, |
|
"global_step": 380, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005249343832020997, |
|
"grad_norm": 1.1348930782232016, |
|
"learning_rate": 1.5000000000000002e-07, |
|
"loss": 1.1087, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.010498687664041995, |
|
"grad_norm": 1.123696373079589, |
|
"learning_rate": 3.0000000000000004e-07, |
|
"loss": 1.1356, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.015748031496062992, |
|
"grad_norm": 1.0989081863562118, |
|
"learning_rate": 4.5e-07, |
|
"loss": 1.1158, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.02099737532808399, |
|
"grad_norm": 1.0628548113414964, |
|
"learning_rate": 6.000000000000001e-07, |
|
"loss": 1.0986, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.026246719160104987, |
|
"grad_norm": 1.0629069543612368, |
|
"learning_rate": 7.5e-07, |
|
"loss": 1.0727, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.031496062992125984, |
|
"grad_norm": 1.1219311917213644, |
|
"learning_rate": 9e-07, |
|
"loss": 1.1513, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03674540682414698, |
|
"grad_norm": 1.068318638334139, |
|
"learning_rate": 1.05e-06, |
|
"loss": 1.0978, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.04199475065616798, |
|
"grad_norm": 1.0335025624008565, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 1.0932, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.047244094488188976, |
|
"grad_norm": 0.9514112971268772, |
|
"learning_rate": 1.35e-06, |
|
"loss": 1.1046, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.05249343832020997, |
|
"grad_norm": 0.8944230714776324, |
|
"learning_rate": 1.5e-06, |
|
"loss": 1.0638, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05774278215223097, |
|
"grad_norm": 0.8720343077794245, |
|
"learning_rate": 1.65e-06, |
|
"loss": 1.1132, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.06299212598425197, |
|
"grad_norm": 0.7519518665820406, |
|
"learning_rate": 1.8e-06, |
|
"loss": 1.0788, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.06824146981627296, |
|
"grad_norm": 0.7768466543241798, |
|
"learning_rate": 1.95e-06, |
|
"loss": 1.0795, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.07349081364829396, |
|
"grad_norm": 0.7109922479048013, |
|
"learning_rate": 2.1e-06, |
|
"loss": 1.1012, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.07874015748031496, |
|
"grad_norm": 0.6312078880187205, |
|
"learning_rate": 2.25e-06, |
|
"loss": 1.0851, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.08398950131233596, |
|
"grad_norm": 0.5514473048370377, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 1.1041, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.08923884514435695, |
|
"grad_norm": 0.6271281070432462, |
|
"learning_rate": 2.55e-06, |
|
"loss": 1.0855, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.09448818897637795, |
|
"grad_norm": 0.7059888078645049, |
|
"learning_rate": 2.7e-06, |
|
"loss": 1.0473, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.09973753280839895, |
|
"grad_norm": 0.7226157330393405, |
|
"learning_rate": 2.85e-06, |
|
"loss": 1.0665, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.10498687664041995, |
|
"grad_norm": 0.7244742832208652, |
|
"learning_rate": 3e-06, |
|
"loss": 1.0604, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.11023622047244094, |
|
"grad_norm": 0.7088251146482789, |
|
"learning_rate": 3.1500000000000003e-06, |
|
"loss": 1.0516, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.11548556430446194, |
|
"grad_norm": 0.5987242362229293, |
|
"learning_rate": 3.3e-06, |
|
"loss": 1.084, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.12073490813648294, |
|
"grad_norm": 0.5730637810768702, |
|
"learning_rate": 3.45e-06, |
|
"loss": 1.0621, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.12598425196850394, |
|
"grad_norm": 0.5894968443138215, |
|
"learning_rate": 3.6e-06, |
|
"loss": 1.0797, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.13123359580052493, |
|
"grad_norm": 0.5798124303184627, |
|
"learning_rate": 3.75e-06, |
|
"loss": 1.0035, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.13648293963254593, |
|
"grad_norm": 0.643205751513686, |
|
"learning_rate": 3.9e-06, |
|
"loss": 1.0455, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.14173228346456693, |
|
"grad_norm": 0.5621970774702022, |
|
"learning_rate": 4.05e-06, |
|
"loss": 1.0576, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.14698162729658792, |
|
"grad_norm": 0.5506084571895594, |
|
"learning_rate": 4.2e-06, |
|
"loss": 1.0298, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.15223097112860892, |
|
"grad_norm": 0.48741149421912777, |
|
"learning_rate": 4.35e-06, |
|
"loss": 1.0018, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.15748031496062992, |
|
"grad_norm": 0.46403007703544275, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.9872, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.16272965879265092, |
|
"grad_norm": 0.4754381818573106, |
|
"learning_rate": 4.65e-06, |
|
"loss": 1.0271, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.1679790026246719, |
|
"grad_norm": 0.9362850890979981, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 1.0437, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.1732283464566929, |
|
"grad_norm": 0.47391181595772164, |
|
"learning_rate": 4.95e-06, |
|
"loss": 1.0437, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.1784776902887139, |
|
"grad_norm": 0.5276920454851337, |
|
"learning_rate": 5.1e-06, |
|
"loss": 1.0557, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.1837270341207349, |
|
"grad_norm": 0.4616075133913133, |
|
"learning_rate": 5.2500000000000006e-06, |
|
"loss": 1.0465, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.1889763779527559, |
|
"grad_norm": 0.4555174555636226, |
|
"learning_rate": 5.4e-06, |
|
"loss": 1.0588, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.1942257217847769, |
|
"grad_norm": 0.5071864534648831, |
|
"learning_rate": 5.55e-06, |
|
"loss": 1.044, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.1994750656167979, |
|
"grad_norm": 0.4851367263882934, |
|
"learning_rate": 5.7e-06, |
|
"loss": 1.0464, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.2047244094488189, |
|
"grad_norm": 0.44188022228811896, |
|
"learning_rate": 5.85e-06, |
|
"loss": 1.0182, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.2099737532808399, |
|
"grad_norm": 0.43420740120454643, |
|
"learning_rate": 6e-06, |
|
"loss": 1.0188, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2152230971128609, |
|
"grad_norm": 0.4291543441241407, |
|
"learning_rate": 5.9998719351101036e-06, |
|
"loss": 1.0245, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.2204724409448819, |
|
"grad_norm": 0.43326370236005163, |
|
"learning_rate": 5.999487751374158e-06, |
|
"loss": 1.0238, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.22572178477690288, |
|
"grad_norm": 0.427571644972227, |
|
"learning_rate": 5.998847481592462e-06, |
|
"loss": 1.0311, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.23097112860892388, |
|
"grad_norm": 0.4215063088273006, |
|
"learning_rate": 5.997951180429069e-06, |
|
"loss": 0.9925, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.23622047244094488, |
|
"grad_norm": 0.4206536914503675, |
|
"learning_rate": 5.996798924407118e-06, |
|
"loss": 1.003, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.24146981627296588, |
|
"grad_norm": 0.40910969064965136, |
|
"learning_rate": 5.995390811902302e-06, |
|
"loss": 0.9949, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.24671916010498687, |
|
"grad_norm": 0.4165775049327623, |
|
"learning_rate": 5.993726963134471e-06, |
|
"loss": 0.9734, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.25196850393700787, |
|
"grad_norm": 0.3832235501001726, |
|
"learning_rate": 5.9918075201573645e-06, |
|
"loss": 0.9485, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.2572178477690289, |
|
"grad_norm": 0.37002495168808525, |
|
"learning_rate": 5.9896326468464835e-06, |
|
"loss": 0.9358, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.26246719160104987, |
|
"grad_norm": 0.44836853406053057, |
|
"learning_rate": 5.987202528885104e-06, |
|
"loss": 0.9982, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2677165354330709, |
|
"grad_norm": 0.4080608606117312, |
|
"learning_rate": 5.984517373748417e-06, |
|
"loss": 1.0129, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.27296587926509186, |
|
"grad_norm": 0.4001550595702573, |
|
"learning_rate": 5.981577410685822e-06, |
|
"loss": 0.9788, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.2782152230971129, |
|
"grad_norm": 0.41021488877460305, |
|
"learning_rate": 5.978382890701347e-06, |
|
"loss": 1.0262, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.28346456692913385, |
|
"grad_norm": 0.39997016380492506, |
|
"learning_rate": 5.9749340865322284e-06, |
|
"loss": 1.0275, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.2887139107611549, |
|
"grad_norm": 0.3839823787027912, |
|
"learning_rate": 5.971231292625615e-06, |
|
"loss": 0.9374, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.29396325459317585, |
|
"grad_norm": 0.4125068495663659, |
|
"learning_rate": 5.967274825113438e-06, |
|
"loss": 0.9954, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.2992125984251969, |
|
"grad_norm": 0.3908377197765856, |
|
"learning_rate": 5.963065021785414e-06, |
|
"loss": 0.9671, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.30446194225721784, |
|
"grad_norm": 0.3850488592862481, |
|
"learning_rate": 5.958602242060207e-06, |
|
"loss": 0.9657, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.30971128608923887, |
|
"grad_norm": 0.3877990366088493, |
|
"learning_rate": 5.95388686695475e-06, |
|
"loss": 0.9678, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.31496062992125984, |
|
"grad_norm": 0.40470471194287355, |
|
"learning_rate": 5.948919299051706e-06, |
|
"loss": 1.0149, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.32020997375328086, |
|
"grad_norm": 0.42889495063392963, |
|
"learning_rate": 5.943699962465096e-06, |
|
"loss": 1.033, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.32545931758530183, |
|
"grad_norm": 0.39164358737100274, |
|
"learning_rate": 5.9382293028040985e-06, |
|
"loss": 0.9761, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.33070866141732286, |
|
"grad_norm": 0.3869342590567232, |
|
"learning_rate": 5.9325077871349975e-06, |
|
"loss": 0.9982, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.3359580052493438, |
|
"grad_norm": 0.39264627926569035, |
|
"learning_rate": 5.9265359039413105e-06, |
|
"loss": 0.9667, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.34120734908136485, |
|
"grad_norm": 0.3887717698297268, |
|
"learning_rate": 5.920314163082079e-06, |
|
"loss": 0.9806, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.3464566929133858, |
|
"grad_norm": 0.40896336915084297, |
|
"learning_rate": 5.913843095748342e-06, |
|
"loss": 1.0135, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.35170603674540685, |
|
"grad_norm": 0.3610209560875707, |
|
"learning_rate": 5.907123254417783e-06, |
|
"loss": 0.956, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.3569553805774278, |
|
"grad_norm": 0.38154744815823505, |
|
"learning_rate": 5.9001552128075625e-06, |
|
"loss": 1.0045, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.36220472440944884, |
|
"grad_norm": 0.4094826396119445, |
|
"learning_rate": 5.892939565825335e-06, |
|
"loss": 1.0069, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.3674540682414698, |
|
"grad_norm": 0.39129138622932325, |
|
"learning_rate": 5.885476929518457e-06, |
|
"loss": 0.9525, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.37270341207349084, |
|
"grad_norm": 0.3712890701175899, |
|
"learning_rate": 5.8777679410213956e-06, |
|
"loss": 0.9792, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.3779527559055118, |
|
"grad_norm": 0.4086264062600148, |
|
"learning_rate": 5.869813258501323e-06, |
|
"loss": 0.9926, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.38320209973753283, |
|
"grad_norm": 0.368975878599487, |
|
"learning_rate": 5.861613561101934e-06, |
|
"loss": 0.9643, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.3884514435695538, |
|
"grad_norm": 0.36792811629461203, |
|
"learning_rate": 5.853169548885461e-06, |
|
"loss": 0.9867, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.3937007874015748, |
|
"grad_norm": 0.3566251893981936, |
|
"learning_rate": 5.844481942772898e-06, |
|
"loss": 1.0069, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.3989501312335958, |
|
"grad_norm": 0.4578529359685586, |
|
"learning_rate": 5.835551484482459e-06, |
|
"loss": 1.0173, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.4041994750656168, |
|
"grad_norm": 0.3935925285922137, |
|
"learning_rate": 5.826378936466249e-06, |
|
"loss": 0.9743, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.4094488188976378, |
|
"grad_norm": 0.4109939217838428, |
|
"learning_rate": 5.81696508184517e-06, |
|
"loss": 0.9866, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.4146981627296588, |
|
"grad_norm": 0.3839870332489822, |
|
"learning_rate": 5.807310724342058e-06, |
|
"loss": 0.9516, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.4199475065616798, |
|
"grad_norm": 0.3774576797883406, |
|
"learning_rate": 5.797416688213067e-06, |
|
"loss": 0.9895, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.4251968503937008, |
|
"grad_norm": 0.3817468964498129, |
|
"learning_rate": 5.787283818177297e-06, |
|
"loss": 0.9632, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4304461942257218, |
|
"grad_norm": 0.60843002346461, |
|
"learning_rate": 5.776912979344669e-06, |
|
"loss": 1.0166, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.4356955380577428, |
|
"grad_norm": 0.3858713700245362, |
|
"learning_rate": 5.766305057142073e-06, |
|
"loss": 0.9976, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.4409448818897638, |
|
"grad_norm": 0.3724153436541016, |
|
"learning_rate": 5.755460957237769e-06, |
|
"loss": 0.9645, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.4461942257217848, |
|
"grad_norm": 0.38201105695018567, |
|
"learning_rate": 5.744381605464064e-06, |
|
"loss": 0.9899, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.45144356955380577, |
|
"grad_norm": 0.38383930861007165, |
|
"learning_rate": 5.7330679477382655e-06, |
|
"loss": 0.9919, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.4566929133858268, |
|
"grad_norm": 0.4078870418259581, |
|
"learning_rate": 5.7215209499819296e-06, |
|
"loss": 0.9797, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.46194225721784776, |
|
"grad_norm": 0.38463767466523974, |
|
"learning_rate": 5.709741598038387e-06, |
|
"loss": 0.9597, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.4671916010498688, |
|
"grad_norm": 0.36309855116472584, |
|
"learning_rate": 5.697730897588577e-06, |
|
"loss": 0.9737, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.47244094488188976, |
|
"grad_norm": 0.4106701446638758, |
|
"learning_rate": 5.685489874065187e-06, |
|
"loss": 0.9683, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4776902887139108, |
|
"grad_norm": 0.37110409255145443, |
|
"learning_rate": 5.673019572565103e-06, |
|
"loss": 1.0418, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.48293963254593175, |
|
"grad_norm": 0.3558357783330656, |
|
"learning_rate": 5.660321057760186e-06, |
|
"loss": 1.0055, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.4881889763779528, |
|
"grad_norm": 0.40499489938404787, |
|
"learning_rate": 5.6473954138063674e-06, |
|
"loss": 1.0113, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.49343832020997375, |
|
"grad_norm": 0.39428526462199764, |
|
"learning_rate": 5.634243744251094e-06, |
|
"loss": 0.9875, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.49868766404199477, |
|
"grad_norm": 0.3711741011240413, |
|
"learning_rate": 5.620867171939109e-06, |
|
"loss": 0.9749, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.5039370078740157, |
|
"grad_norm": 0.3961340085644134, |
|
"learning_rate": 5.607266838916585e-06, |
|
"loss": 0.982, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.5091863517060368, |
|
"grad_norm": 0.3784646685814138, |
|
"learning_rate": 5.593443906333624e-06, |
|
"loss": 0.9957, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.5144356955380578, |
|
"grad_norm": 0.3750460397069026, |
|
"learning_rate": 5.579399554345118e-06, |
|
"loss": 0.9755, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.5196850393700787, |
|
"grad_norm": 0.3746718538274792, |
|
"learning_rate": 5.565134982009994e-06, |
|
"loss": 0.9736, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.5249343832020997, |
|
"grad_norm": 0.38418890409196027, |
|
"learning_rate": 5.550651407188843e-06, |
|
"loss": 0.9506, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5301837270341208, |
|
"grad_norm": 0.422976375435725, |
|
"learning_rate": 5.535950066439941e-06, |
|
"loss": 1.0141, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.5354330708661418, |
|
"grad_norm": 0.38354451243133536, |
|
"learning_rate": 5.521032214913679e-06, |
|
"loss": 0.9618, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.5406824146981627, |
|
"grad_norm": 0.38257660011773076, |
|
"learning_rate": 5.505899126245397e-06, |
|
"loss": 0.939, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.5459317585301837, |
|
"grad_norm": 0.3768438915225408, |
|
"learning_rate": 5.490552092446652e-06, |
|
"loss": 0.9675, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.5511811023622047, |
|
"grad_norm": 0.3749655286727107, |
|
"learning_rate": 5.474992423794907e-06, |
|
"loss": 0.9592, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.5564304461942258, |
|
"grad_norm": 0.38461916993489687, |
|
"learning_rate": 5.459221448721664e-06, |
|
"loss": 0.9623, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.5616797900262467, |
|
"grad_norm": 0.35648642966931204, |
|
"learning_rate": 5.443240513699045e-06, |
|
"loss": 0.985, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.5669291338582677, |
|
"grad_norm": 0.4051560712719681, |
|
"learning_rate": 5.427050983124842e-06, |
|
"loss": 0.9407, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.5721784776902887, |
|
"grad_norm": 0.3769879713701903, |
|
"learning_rate": 5.410654239206021e-06, |
|
"loss": 0.968, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.5774278215223098, |
|
"grad_norm": 0.3746822083724367, |
|
"learning_rate": 5.394051681840719e-06, |
|
"loss": 0.9497, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.5826771653543307, |
|
"grad_norm": 0.3987231911136733, |
|
"learning_rate": 5.3772447284987216e-06, |
|
"loss": 0.961, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.5879265091863517, |
|
"grad_norm": 0.37848222525971176, |
|
"learning_rate": 5.36023481410045e-06, |
|
"loss": 0.9707, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.5931758530183727, |
|
"grad_norm": 0.3794904855253974, |
|
"learning_rate": 5.343023390894446e-06, |
|
"loss": 0.9714, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.5984251968503937, |
|
"grad_norm": 0.37452267525256994, |
|
"learning_rate": 5.325611928333389e-06, |
|
"loss": 0.9406, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.6036745406824147, |
|
"grad_norm": 0.39474437059829304, |
|
"learning_rate": 5.308001912948637e-06, |
|
"loss": 0.9626, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.6089238845144357, |
|
"grad_norm": 0.4023921986663554, |
|
"learning_rate": 5.290194848223309e-06, |
|
"loss": 0.9889, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.6141732283464567, |
|
"grad_norm": 0.39963771712171875, |
|
"learning_rate": 5.272192254463929e-06, |
|
"loss": 0.9639, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.6194225721784777, |
|
"grad_norm": 0.3893586064595733, |
|
"learning_rate": 5.2539956686706205e-06, |
|
"loss": 0.9469, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.6246719160104987, |
|
"grad_norm": 0.4651495625439333, |
|
"learning_rate": 5.2356066444058875e-06, |
|
"loss": 0.9658, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.6299212598425197, |
|
"grad_norm": 0.39599728107932586, |
|
"learning_rate": 5.217026751661978e-06, |
|
"loss": 1.0137, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6351706036745407, |
|
"grad_norm": 0.406988761369817, |
|
"learning_rate": 5.198257576726835e-06, |
|
"loss": 0.9306, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.6404199475065617, |
|
"grad_norm": 0.3611939094322339, |
|
"learning_rate": 5.179300722048673e-06, |
|
"loss": 0.9462, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.6456692913385826, |
|
"grad_norm": 0.3809841775392484, |
|
"learning_rate": 5.1601578060991645e-06, |
|
"loss": 0.953, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.6509186351706037, |
|
"grad_norm": 0.46022843064705843, |
|
"learning_rate": 5.1408304632352575e-06, |
|
"loss": 0.9422, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.6561679790026247, |
|
"grad_norm": 0.3979704646560941, |
|
"learning_rate": 5.1213203435596425e-06, |
|
"loss": 0.9751, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.6614173228346457, |
|
"grad_norm": 0.39388496260457084, |
|
"learning_rate": 5.101629112779873e-06, |
|
"loss": 0.9722, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.3899148438115094, |
|
"learning_rate": 5.08175845206615e-06, |
|
"loss": 0.9652, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.6719160104986877, |
|
"grad_norm": 0.37391882787694275, |
|
"learning_rate": 5.061710057907788e-06, |
|
"loss": 0.9621, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.6771653543307087, |
|
"grad_norm": 0.39500875865406576, |
|
"learning_rate": 5.041485641968385e-06, |
|
"loss": 0.9899, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.6824146981627297, |
|
"grad_norm": 0.37540362490802714, |
|
"learning_rate": 5.021086930939672e-06, |
|
"loss": 0.9472, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.6876640419947506, |
|
"grad_norm": 0.3940788728379769, |
|
"learning_rate": 5.000515666394105e-06, |
|
"loss": 0.9479, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.6929133858267716, |
|
"grad_norm": 0.3919125365655477, |
|
"learning_rate": 4.979773604636169e-06, |
|
"loss": 0.9624, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.6981627296587927, |
|
"grad_norm": 0.3804552314744538, |
|
"learning_rate": 4.958862516552433e-06, |
|
"loss": 0.9806, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.7034120734908137, |
|
"grad_norm": 0.3674434286105591, |
|
"learning_rate": 4.937784187460362e-06, |
|
"loss": 0.9511, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.7086614173228346, |
|
"grad_norm": 0.4109777494732396, |
|
"learning_rate": 4.916540416955884e-06, |
|
"loss": 0.9943, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7139107611548556, |
|
"grad_norm": 0.40231567788837497, |
|
"learning_rate": 4.895133018759753e-06, |
|
"loss": 0.9798, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.7191601049868767, |
|
"grad_norm": 0.3721834479908975, |
|
"learning_rate": 4.873563820562698e-06, |
|
"loss": 0.9504, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.7244094488188977, |
|
"grad_norm": 0.36127526200518306, |
|
"learning_rate": 4.851834663869379e-06, |
|
"loss": 0.9517, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.7296587926509186, |
|
"grad_norm": 0.3513827139135777, |
|
"learning_rate": 4.82994740384117e-06, |
|
"loss": 0.9835, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.7349081364829396, |
|
"grad_norm": 0.36760728272750326, |
|
"learning_rate": 4.80790390913777e-06, |
|
"loss": 0.9503, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.7401574803149606, |
|
"grad_norm": 0.36275280721999276, |
|
"learning_rate": 4.785706061757656e-06, |
|
"loss": 0.9743, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.7454068241469817, |
|
"grad_norm": 0.3733380512329921, |
|
"learning_rate": 4.763355756877419e-06, |
|
"loss": 0.9384, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.7506561679790026, |
|
"grad_norm": 0.3801691027568987, |
|
"learning_rate": 4.740854902689947e-06, |
|
"loss": 0.9296, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.7559055118110236, |
|
"grad_norm": 0.39053906811778566, |
|
"learning_rate": 4.718205420241516e-06, |
|
"loss": 0.9488, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.7611548556430446, |
|
"grad_norm": 0.3923993707534958, |
|
"learning_rate": 4.695409243267776e-06, |
|
"loss": 0.9383, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.7664041994750657, |
|
"grad_norm": 0.364792552828712, |
|
"learning_rate": 4.672468318028657e-06, |
|
"loss": 0.9193, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.7716535433070866, |
|
"grad_norm": 0.35070825551906964, |
|
"learning_rate": 4.649384603142202e-06, |
|
"loss": 0.9164, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.7769028871391076, |
|
"grad_norm": 0.37099778180795795, |
|
"learning_rate": 4.626160069417348e-06, |
|
"loss": 0.9425, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.7821522309711286, |
|
"grad_norm": 0.36954118968922517, |
|
"learning_rate": 4.602796699685665e-06, |
|
"loss": 0.9265, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.7874015748031497, |
|
"grad_norm": 0.4076466706382121, |
|
"learning_rate": 4.579296488632067e-06, |
|
"loss": 1.0133, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7926509186351706, |
|
"grad_norm": 0.4015334925568992, |
|
"learning_rate": 4.5556614426245165e-06, |
|
"loss": 0.9486, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.7979002624671916, |
|
"grad_norm": 0.39628644809730684, |
|
"learning_rate": 4.5318935795427206e-06, |
|
"loss": 0.9605, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.8031496062992126, |
|
"grad_norm": 0.36792154742540445, |
|
"learning_rate": 4.507994928605862e-06, |
|
"loss": 0.9287, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.8083989501312336, |
|
"grad_norm": 0.3887839296706913, |
|
"learning_rate": 4.483967530199337e-06, |
|
"loss": 0.951, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.8136482939632546, |
|
"grad_norm": 0.36716852968968616, |
|
"learning_rate": 4.459813435700569e-06, |
|
"loss": 0.9702, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.8188976377952756, |
|
"grad_norm": 0.3533521076976156, |
|
"learning_rate": 4.4355347073038595e-06, |
|
"loss": 0.9612, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.8241469816272966, |
|
"grad_norm": 0.3499649930079787, |
|
"learning_rate": 4.411133417844328e-06, |
|
"loss": 0.9599, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.8293963254593176, |
|
"grad_norm": 0.38582146832565867, |
|
"learning_rate": 4.38661165062094e-06, |
|
"loss": 0.9894, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.8346456692913385, |
|
"grad_norm": 0.39040836855795735, |
|
"learning_rate": 4.36197149921864e-06, |
|
"loss": 0.9747, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.8398950131233596, |
|
"grad_norm": 0.3798580758700489, |
|
"learning_rate": 4.3372150673296155e-06, |
|
"loss": 0.9654, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.8451443569553806, |
|
"grad_norm": 0.3764456540061034, |
|
"learning_rate": 4.3123444685736795e-06, |
|
"loss": 0.9823, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.8503937007874016, |
|
"grad_norm": 0.3771195417830333, |
|
"learning_rate": 4.287361826317827e-06, |
|
"loss": 0.9456, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.8556430446194225, |
|
"grad_norm": 0.37650137746409273, |
|
"learning_rate": 4.262269273494946e-06, |
|
"loss": 1.0022, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.8608923884514436, |
|
"grad_norm": 0.38148353077474145, |
|
"learning_rate": 4.237068952421711e-06, |
|
"loss": 0.964, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.8661417322834646, |
|
"grad_norm": 0.3982519128695332, |
|
"learning_rate": 4.2117630146156845e-06, |
|
"loss": 0.9673, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.8713910761154856, |
|
"grad_norm": 0.36000775624632003, |
|
"learning_rate": 4.186353620611627e-06, |
|
"loss": 0.9359, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.8766404199475065, |
|
"grad_norm": 0.36850454735662447, |
|
"learning_rate": 4.160842939777036e-06, |
|
"loss": 0.9422, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.8818897637795275, |
|
"grad_norm": 0.37804115639757085, |
|
"learning_rate": 4.135233150126931e-06, |
|
"loss": 0.9454, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.8871391076115486, |
|
"grad_norm": 0.3689383402086321, |
|
"learning_rate": 4.109526438137908e-06, |
|
"loss": 0.9455, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.8923884514435696, |
|
"grad_norm": 0.46527154775209717, |
|
"learning_rate": 4.08372499856146e-06, |
|
"loss": 0.9386, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.8976377952755905, |
|
"grad_norm": 0.45653306710128705, |
|
"learning_rate": 4.0578310342365975e-06, |
|
"loss": 0.9616, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.9028871391076115, |
|
"grad_norm": 0.3773630567359451, |
|
"learning_rate": 4.031846755901785e-06, |
|
"loss": 0.9285, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.9081364829396326, |
|
"grad_norm": 0.3644595191521506, |
|
"learning_rate": 4.005774382006182e-06, |
|
"loss": 0.9663, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.9133858267716536, |
|
"grad_norm": 0.3539767481135477, |
|
"learning_rate": 3.97961613852025e-06, |
|
"loss": 0.9564, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.9186351706036745, |
|
"grad_norm": 0.3819676152776953, |
|
"learning_rate": 3.953374258745705e-06, |
|
"loss": 0.9607, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.9238845144356955, |
|
"grad_norm": 0.38397675786726637, |
|
"learning_rate": 3.927050983124842e-06, |
|
"loss": 0.9539, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.9291338582677166, |
|
"grad_norm": 0.3979084367711538, |
|
"learning_rate": 3.900648559049258e-06, |
|
"loss": 0.9505, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.9343832020997376, |
|
"grad_norm": 0.3756154385935223, |
|
"learning_rate": 3.874169240667974e-06, |
|
"loss": 0.9519, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.9396325459317585, |
|
"grad_norm": 0.40551973597201274, |
|
"learning_rate": 3.847615288694985e-06, |
|
"loss": 0.9727, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.9448818897637795, |
|
"grad_norm": 0.4149625851710124, |
|
"learning_rate": 3.820988970216249e-06, |
|
"loss": 0.9464, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.9501312335958005, |
|
"grad_norm": 0.35739115830542967, |
|
"learning_rate": 3.7942925584961272e-06, |
|
"loss": 0.9427, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.9553805774278216, |
|
"grad_norm": 0.3759540038847051, |
|
"learning_rate": 3.767528332783307e-06, |
|
"loss": 0.9679, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.9606299212598425, |
|
"grad_norm": 0.3525867658299593, |
|
"learning_rate": 3.740698578116199e-06, |
|
"loss": 0.9183, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.9658792650918635, |
|
"grad_norm": 0.3557123352774738, |
|
"learning_rate": 3.7138055851278564e-06, |
|
"loss": 0.9383, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.9711286089238845, |
|
"grad_norm": 0.3623514252763418, |
|
"learning_rate": 3.6868516498504025e-06, |
|
"loss": 0.9246, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.9763779527559056, |
|
"grad_norm": 0.38495496418054853, |
|
"learning_rate": 3.6598390735190066e-06, |
|
"loss": 0.9612, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.9816272965879265, |
|
"grad_norm": 0.3648599004428126, |
|
"learning_rate": 3.63277016237541e-06, |
|
"loss": 0.9293, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.9868766404199475, |
|
"grad_norm": 0.38871547084803876, |
|
"learning_rate": 3.6056472274710305e-06, |
|
"loss": 0.9973, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.9921259842519685, |
|
"grad_norm": 0.38590844403642666, |
|
"learning_rate": 3.578472584469651e-06, |
|
"loss": 0.9457, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.9973753280839895, |
|
"grad_norm": 0.3872507088649178, |
|
"learning_rate": 3.5512485534497116e-06, |
|
"loss": 0.9462, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.3872507088649178, |
|
"learning_rate": 3.523977458706237e-06, |
|
"loss": 0.9693, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 1.005249343832021, |
|
"grad_norm": 0.6232728744646114, |
|
"learning_rate": 3.49666162855239e-06, |
|
"loss": 0.887, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.010498687664042, |
|
"grad_norm": 0.4149641950734625, |
|
"learning_rate": 3.469303395120693e-06, |
|
"loss": 0.8826, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 1.015748031496063, |
|
"grad_norm": 0.37273340109017755, |
|
"learning_rate": 3.441905094163913e-06, |
|
"loss": 0.8893, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.020997375328084, |
|
"grad_norm": 0.4113832689982837, |
|
"learning_rate": 3.414469064855647e-06, |
|
"loss": 0.9205, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.026246719160105, |
|
"grad_norm": 0.49485155842511663, |
|
"learning_rate": 3.3869976495906104e-06, |
|
"loss": 0.9074, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.031496062992126, |
|
"grad_norm": 0.3736781934252868, |
|
"learning_rate": 3.3594931937846498e-06, |
|
"loss": 0.8966, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 1.036745406824147, |
|
"grad_norm": 0.3758650059773124, |
|
"learning_rate": 3.3319580456745023e-06, |
|
"loss": 0.8759, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.041994750656168, |
|
"grad_norm": 0.4056031624712629, |
|
"learning_rate": 3.3043945561173092e-06, |
|
"loss": 0.8788, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 1.047244094488189, |
|
"grad_norm": 0.36344982085137467, |
|
"learning_rate": 3.2768050783899063e-06, |
|
"loss": 0.873, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.05249343832021, |
|
"grad_norm": 0.3760103676246, |
|
"learning_rate": 3.249191967987912e-06, |
|
"loss": 0.899, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 1.057742782152231, |
|
"grad_norm": 0.39433477834527153, |
|
"learning_rate": 3.221557582424622e-06, |
|
"loss": 0.9019, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.0629921259842519, |
|
"grad_norm": 0.3595753440791428, |
|
"learning_rate": 3.1939042810297328e-06, |
|
"loss": 0.8781, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 1.068241469816273, |
|
"grad_norm": 0.3743448170598354, |
|
"learning_rate": 3.16623442474791e-06, |
|
"loss": 0.8689, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.073490813648294, |
|
"grad_norm": 0.3618551186966609, |
|
"learning_rate": 3.138550375937219e-06, |
|
"loss": 0.9094, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.078740157480315, |
|
"grad_norm": 0.36577516842050983, |
|
"learning_rate": 3.1108544981674356e-06, |
|
"loss": 0.8668, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.083989501312336, |
|
"grad_norm": 0.3985134455319658, |
|
"learning_rate": 3.0831491560182495e-06, |
|
"loss": 0.9016, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 1.0892388451443569, |
|
"grad_norm": 0.37808489525197075, |
|
"learning_rate": 3.0554367148773897e-06, |
|
"loss": 0.895, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.094488188976378, |
|
"grad_norm": 0.4112784941005797, |
|
"learning_rate": 3.027719540738673e-06, |
|
"loss": 0.859, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 1.099737532808399, |
|
"grad_norm": 0.3830296759827936, |
|
"learning_rate": 3e-06, |
|
"loss": 0.8569, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.10498687664042, |
|
"grad_norm": 0.3930755503999148, |
|
"learning_rate": 2.972280459261328e-06, |
|
"loss": 0.8774, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.110236220472441, |
|
"grad_norm": 0.36738851637178116, |
|
"learning_rate": 2.944563285122611e-06, |
|
"loss": 0.9086, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.1154855643044619, |
|
"grad_norm": 0.3897160841039193, |
|
"learning_rate": 2.9168508439817515e-06, |
|
"loss": 0.889, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 1.120734908136483, |
|
"grad_norm": 0.39858146379374537, |
|
"learning_rate": 2.889145501832566e-06, |
|
"loss": 0.8964, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.125984251968504, |
|
"grad_norm": 0.3739395525411432, |
|
"learning_rate": 2.861449624062782e-06, |
|
"loss": 0.8884, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.1312335958005248, |
|
"grad_norm": 0.3755768464864809, |
|
"learning_rate": 2.83376557525209e-06, |
|
"loss": 0.851, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.136482939632546, |
|
"grad_norm": 0.38260315757882735, |
|
"learning_rate": 2.8060957189702674e-06, |
|
"loss": 0.9152, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.141732283464567, |
|
"grad_norm": 0.4205379839527009, |
|
"learning_rate": 2.7784424175753784e-06, |
|
"loss": 0.8683, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.1469816272965878, |
|
"grad_norm": 0.38325260941818995, |
|
"learning_rate": 2.7508080320120888e-06, |
|
"loss": 0.8943, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 1.152230971128609, |
|
"grad_norm": 0.3763198826603672, |
|
"learning_rate": 2.7231949216100943e-06, |
|
"loss": 0.8676, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.1574803149606299, |
|
"grad_norm": 0.3767162287387105, |
|
"learning_rate": 2.6956054438826918e-06, |
|
"loss": 0.8482, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 1.162729658792651, |
|
"grad_norm": 0.3486273740901837, |
|
"learning_rate": 2.668041954325498e-06, |
|
"loss": 0.8879, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.167979002624672, |
|
"grad_norm": 0.39084218665366566, |
|
"learning_rate": 2.640506806215351e-06, |
|
"loss": 0.8679, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 1.1732283464566928, |
|
"grad_norm": 0.3538552501730603, |
|
"learning_rate": 2.613002350409391e-06, |
|
"loss": 0.8871, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.178477690288714, |
|
"grad_norm": 0.36544200913577, |
|
"learning_rate": 2.585530935144354e-06, |
|
"loss": 0.8616, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.1837270341207349, |
|
"grad_norm": 0.3985990462573467, |
|
"learning_rate": 2.558094905836087e-06, |
|
"loss": 0.8917, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.188976377952756, |
|
"grad_norm": 0.42608518999556655, |
|
"learning_rate": 2.5306966048793067e-06, |
|
"loss": 0.8817, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 1.194225721784777, |
|
"grad_norm": 0.37952769789031354, |
|
"learning_rate": 2.5033383714476097e-06, |
|
"loss": 0.8985, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.1994750656167978, |
|
"grad_norm": 0.40804864076806885, |
|
"learning_rate": 2.4760225412937633e-06, |
|
"loss": 0.9073, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 1.204724409448819, |
|
"grad_norm": 0.4167713152946991, |
|
"learning_rate": 2.4487514465502885e-06, |
|
"loss": 0.8566, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.20997375328084, |
|
"grad_norm": 0.4022153540631621, |
|
"learning_rate": 2.42152741553035e-06, |
|
"loss": 0.8713, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.2152230971128608, |
|
"grad_norm": 0.4222065137992956, |
|
"learning_rate": 2.39435277252897e-06, |
|
"loss": 0.9035, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.220472440944882, |
|
"grad_norm": 0.3666365807384159, |
|
"learning_rate": 2.3672298376245908e-06, |
|
"loss": 0.8637, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.2257217847769029, |
|
"grad_norm": 0.3976853335036615, |
|
"learning_rate": 2.3401609264809953e-06, |
|
"loss": 0.9398, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.2309711286089238, |
|
"grad_norm": 0.37956934109451046, |
|
"learning_rate": 2.3131483501495985e-06, |
|
"loss": 0.8353, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.236220472440945, |
|
"grad_norm": 0.33722056538083744, |
|
"learning_rate": 2.2861944148721446e-06, |
|
"loss": 0.8786, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.2414698162729658, |
|
"grad_norm": 0.49777382093647954, |
|
"learning_rate": 2.2593014218838e-06, |
|
"loss": 0.8834, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.246719160104987, |
|
"grad_norm": 0.35315516410389436, |
|
"learning_rate": 2.232471667216693e-06, |
|
"loss": 0.8442, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.2519685039370079, |
|
"grad_norm": 0.3816124424363711, |
|
"learning_rate": 2.2057074415038725e-06, |
|
"loss": 0.8573, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 1.257217847769029, |
|
"grad_norm": 0.36319142999803095, |
|
"learning_rate": 2.1790110297837514e-06, |
|
"loss": 0.8481, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.26246719160105, |
|
"grad_norm": 0.34672889281207053, |
|
"learning_rate": 2.152384711305015e-06, |
|
"loss": 0.8623, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.2677165354330708, |
|
"grad_norm": 0.37448151544392105, |
|
"learning_rate": 2.1258307593320262e-06, |
|
"loss": 0.8751, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.272965879265092, |
|
"grad_norm": 0.37082567424502005, |
|
"learning_rate": 2.099351440950742e-06, |
|
"loss": 0.8914, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.2782152230971129, |
|
"grad_norm": 0.39074992783073415, |
|
"learning_rate": 2.072949016875158e-06, |
|
"loss": 0.9222, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.2834645669291338, |
|
"grad_norm": 0.4150437401629804, |
|
"learning_rate": 2.046625741254295e-06, |
|
"loss": 0.9475, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.288713910761155, |
|
"grad_norm": 0.4504166670407193, |
|
"learning_rate": 2.0203838614797505e-06, |
|
"loss": 0.9026, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.2939632545931758, |
|
"grad_norm": 0.38345958484903814, |
|
"learning_rate": 1.994225617993819e-06, |
|
"loss": 0.9074, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 1.2992125984251968, |
|
"grad_norm": 0.37086048031752866, |
|
"learning_rate": 1.9681532440982154e-06, |
|
"loss": 0.8755, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.304461942257218, |
|
"grad_norm": 0.3775524407980251, |
|
"learning_rate": 1.942168965763402e-06, |
|
"loss": 0.8986, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.3097112860892388, |
|
"grad_norm": 0.364796377340789, |
|
"learning_rate": 1.916275001438541e-06, |
|
"loss": 0.867, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.3149606299212597, |
|
"grad_norm": 0.3705604843330414, |
|
"learning_rate": 1.8904735618620928e-06, |
|
"loss": 0.8875, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.3202099737532809, |
|
"grad_norm": 0.3847344001283667, |
|
"learning_rate": 1.8647668498730693e-06, |
|
"loss": 0.8678, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.3254593175853018, |
|
"grad_norm": 0.3507183610862785, |
|
"learning_rate": 1.8391570602229647e-06, |
|
"loss": 0.8895, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.330708661417323, |
|
"grad_norm": 0.34464955572346173, |
|
"learning_rate": 1.8136463793883725e-06, |
|
"loss": 0.9112, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.3359580052493438, |
|
"grad_norm": 0.3804540728076062, |
|
"learning_rate": 1.7882369853843155e-06, |
|
"loss": 0.8818, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.341207349081365, |
|
"grad_norm": 0.38671544491057547, |
|
"learning_rate": 1.76293104757829e-06, |
|
"loss": 0.8712, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.3464566929133859, |
|
"grad_norm": 0.35028636565033566, |
|
"learning_rate": 1.7377307265050559e-06, |
|
"loss": 0.8795, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.3517060367454068, |
|
"grad_norm": 0.3596694021401425, |
|
"learning_rate": 1.7126381736821732e-06, |
|
"loss": 0.8791, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.356955380577428, |
|
"grad_norm": 0.3833574983214166, |
|
"learning_rate": 1.6876555314263213e-06, |
|
"loss": 0.9108, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.3622047244094488, |
|
"grad_norm": 0.3701840047085969, |
|
"learning_rate": 1.6627849326703855e-06, |
|
"loss": 0.8695, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.3674540682414698, |
|
"grad_norm": 0.36098816535443995, |
|
"learning_rate": 1.6380285007813598e-06, |
|
"loss": 0.876, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.372703412073491, |
|
"grad_norm": 0.3900890284585014, |
|
"learning_rate": 1.6133883493790609e-06, |
|
"loss": 0.8498, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.3779527559055118, |
|
"grad_norm": 0.34906551126755136, |
|
"learning_rate": 1.5888665821556724e-06, |
|
"loss": 0.8513, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.3832020997375327, |
|
"grad_norm": 0.3753732283477496, |
|
"learning_rate": 1.5644652926961407e-06, |
|
"loss": 0.8714, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.3884514435695539, |
|
"grad_norm": 0.34748864593560347, |
|
"learning_rate": 1.5401865642994315e-06, |
|
"loss": 0.9124, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.3937007874015748, |
|
"grad_norm": 0.36698053817770165, |
|
"learning_rate": 1.5160324698006642e-06, |
|
"loss": 0.8814, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.3989501312335957, |
|
"grad_norm": 0.4000964153653425, |
|
"learning_rate": 1.4920050713941398e-06, |
|
"loss": 0.9082, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.4041994750656168, |
|
"grad_norm": 0.3985391177875817, |
|
"learning_rate": 1.4681064204572798e-06, |
|
"loss": 0.8749, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.4094488188976377, |
|
"grad_norm": 0.3578122677174226, |
|
"learning_rate": 1.4443385573754837e-06, |
|
"loss": 0.8608, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.4146981627296589, |
|
"grad_norm": 0.3576093239254431, |
|
"learning_rate": 1.4207035113679322e-06, |
|
"loss": 0.8798, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.4199475065616798, |
|
"grad_norm": 0.35299639204379674, |
|
"learning_rate": 1.3972033003143348e-06, |
|
"loss": 0.8972, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.425196850393701, |
|
"grad_norm": 0.3937775289907907, |
|
"learning_rate": 1.3738399305826516e-06, |
|
"loss": 0.8736, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.4304461942257218, |
|
"grad_norm": 0.3691998032129419, |
|
"learning_rate": 1.3506153968577983e-06, |
|
"loss": 0.8667, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.4356955380577427, |
|
"grad_norm": 0.35764876894907843, |
|
"learning_rate": 1.3275316819713435e-06, |
|
"loss": 0.882, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.4409448818897639, |
|
"grad_norm": 0.3859579688778526, |
|
"learning_rate": 1.3045907567322243e-06, |
|
"loss": 0.844, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.4461942257217848, |
|
"grad_norm": 0.3736621084680505, |
|
"learning_rate": 1.2817945797584844e-06, |
|
"loss": 0.8525, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.4514435695538057, |
|
"grad_norm": 0.36602372507940695, |
|
"learning_rate": 1.2591450973100532e-06, |
|
"loss": 0.8577, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.4566929133858268, |
|
"grad_norm": 0.37926054124030645, |
|
"learning_rate": 1.236644243122581e-06, |
|
"loss": 0.8837, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.4619422572178478, |
|
"grad_norm": 0.3680022216795608, |
|
"learning_rate": 1.214293938242344e-06, |
|
"loss": 0.8984, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.4671916010498687, |
|
"grad_norm": 0.37824901927870175, |
|
"learning_rate": 1.1920960908622313e-06, |
|
"loss": 0.8745, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.4724409448818898, |
|
"grad_norm": 0.3489273490529577, |
|
"learning_rate": 1.17005259615883e-06, |
|
"loss": 0.8628, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.4776902887139107, |
|
"grad_norm": 0.3735770062938505, |
|
"learning_rate": 1.1481653361306215e-06, |
|
"loss": 0.8619, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.4829396325459316, |
|
"grad_norm": 0.3458041443504503, |
|
"learning_rate": 1.1264361794373032e-06, |
|
"loss": 0.8761, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.4881889763779528, |
|
"grad_norm": 0.35998420937846626, |
|
"learning_rate": 1.104866981240248e-06, |
|
"loss": 0.8844, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.4934383202099737, |
|
"grad_norm": 0.4029178073367971, |
|
"learning_rate": 1.0834595830441168e-06, |
|
"loss": 0.8511, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.4986876640419948, |
|
"grad_norm": 0.3763622650814437, |
|
"learning_rate": 1.0622158125396387e-06, |
|
"loss": 0.8599, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.5039370078740157, |
|
"grad_norm": 0.3845965137728459, |
|
"learning_rate": 1.0411374834475678e-06, |
|
"loss": 0.9062, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.5091863517060369, |
|
"grad_norm": 0.34964825506869784, |
|
"learning_rate": 1.020226395363833e-06, |
|
"loss": 0.8525, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.5144356955380578, |
|
"grad_norm": 0.38214019455395715, |
|
"learning_rate": 9.994843336058968e-07, |
|
"loss": 0.8686, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.5196850393700787, |
|
"grad_norm": 0.3808975526218143, |
|
"learning_rate": 9.789130690603284e-07, |
|
"loss": 0.8537, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.5249343832020998, |
|
"grad_norm": 0.3761982373529746, |
|
"learning_rate": 9.585143580316153e-07, |
|
"loss": 0.8826, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.5301837270341208, |
|
"grad_norm": 0.351389916026518, |
|
"learning_rate": 9.382899420922119e-07, |
|
"loss": 0.8683, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.5354330708661417, |
|
"grad_norm": 0.3711139029247798, |
|
"learning_rate": 9.182415479338512e-07, |
|
"loss": 0.8878, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.5406824146981628, |
|
"grad_norm": 0.3717732780588312, |
|
"learning_rate": 8.983708872201271e-07, |
|
"loss": 0.8585, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.5459317585301837, |
|
"grad_norm": 0.3742271193984993, |
|
"learning_rate": 8.786796564403577e-07, |
|
"loss": 0.8579, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.5511811023622046, |
|
"grad_norm": 0.42523434659053005, |
|
"learning_rate": 8.591695367647433e-07, |
|
"loss": 0.8746, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.5564304461942258, |
|
"grad_norm": 0.3794388162880317, |
|
"learning_rate": 8.398421939008367e-07, |
|
"loss": 0.8479, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.5616797900262467, |
|
"grad_norm": 0.3588910082794427, |
|
"learning_rate": 8.206992779513281e-07, |
|
"loss": 0.8329, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.5669291338582676, |
|
"grad_norm": 0.37000389491476643, |
|
"learning_rate": 8.017424232731664e-07, |
|
"loss": 0.8693, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.5721784776902887, |
|
"grad_norm": 0.4003207798760719, |
|
"learning_rate": 7.829732483380231e-07, |
|
"loss": 0.8886, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.5774278215223099, |
|
"grad_norm": 0.37170546863230536, |
|
"learning_rate": 7.64393355594112e-07, |
|
"loss": 0.9035, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.5826771653543306, |
|
"grad_norm": 0.35766484669954807, |
|
"learning_rate": 7.4600433132938e-07, |
|
"loss": 0.848, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.5879265091863517, |
|
"grad_norm": 0.33788894124632585, |
|
"learning_rate": 7.278077455360717e-07, |
|
"loss": 0.8545, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.5931758530183728, |
|
"grad_norm": 0.36604275227388566, |
|
"learning_rate": 7.09805151776691e-07, |
|
"loss": 0.8415, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.5984251968503937, |
|
"grad_norm": 0.33845675028801603, |
|
"learning_rate": 6.919980870513633e-07, |
|
"loss": 0.8472, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.6036745406824147, |
|
"grad_norm": 0.35701657873038517, |
|
"learning_rate": 6.743880716666104e-07, |
|
"loss": 0.8496, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.6089238845144358, |
|
"grad_norm": 0.3779107207471187, |
|
"learning_rate": 6.569766091055539e-07, |
|
"loss": 0.8495, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.6141732283464567, |
|
"grad_norm": 0.3872432739805792, |
|
"learning_rate": 6.397651858995504e-07, |
|
"loss": 0.851, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.6194225721784776, |
|
"grad_norm": 0.3595059488828886, |
|
"learning_rate": 6.227552715012785e-07, |
|
"loss": 0.8855, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.6246719160104988, |
|
"grad_norm": 0.37515207797149636, |
|
"learning_rate": 6.059483181592815e-07, |
|
"loss": 0.8858, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.6299212598425197, |
|
"grad_norm": 0.3629206169447269, |
|
"learning_rate": 5.893457607939788e-07, |
|
"loss": 0.8807, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.6351706036745406, |
|
"grad_norm": 0.3545486266254271, |
|
"learning_rate": 5.72949016875158e-07, |
|
"loss": 0.8955, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.6404199475065617, |
|
"grad_norm": 0.3432909080087375, |
|
"learning_rate": 5.56759486300955e-07, |
|
"loss": 0.8681, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.6456692913385826, |
|
"grad_norm": 0.3639385301913205, |
|
"learning_rate": 5.40778551278337e-07, |
|
"loss": 0.8733, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.6509186351706036, |
|
"grad_norm": 0.38920489291790045, |
|
"learning_rate": 5.250075762050935e-07, |
|
"loss": 0.8745, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.6561679790026247, |
|
"grad_norm": 0.3618641411341515, |
|
"learning_rate": 5.094479075533486e-07, |
|
"loss": 0.8832, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.6614173228346458, |
|
"grad_norm": 0.38194003303963936, |
|
"learning_rate": 4.941008737546039e-07, |
|
"loss": 0.882, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 0.36004508832846943, |
|
"learning_rate": 4.789677850863222e-07, |
|
"loss": 0.8754, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.6719160104986877, |
|
"grad_norm": 0.3713147294795857, |
|
"learning_rate": 4.6404993356005967e-07, |
|
"loss": 0.8496, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.6771653543307088, |
|
"grad_norm": 0.40332844918244803, |
|
"learning_rate": 4.4934859281115804e-07, |
|
"loss": 0.8985, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.6824146981627297, |
|
"grad_norm": 0.37460256078858306, |
|
"learning_rate": 4.34865017990007e-07, |
|
"loss": 0.8997, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.6876640419947506, |
|
"grad_norm": 0.34681859755129757, |
|
"learning_rate": 4.2060044565488264e-07, |
|
"loss": 0.8596, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.6929133858267718, |
|
"grad_norm": 0.36407790059697526, |
|
"learning_rate": 4.0655609366637635e-07, |
|
"loss": 0.8891, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.6981627296587927, |
|
"grad_norm": 0.38031424057937346, |
|
"learning_rate": 3.9273316108341493e-07, |
|
"loss": 0.9026, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.7034120734908136, |
|
"grad_norm": 0.37078158732866173, |
|
"learning_rate": 3.791328280608916e-07, |
|
"loss": 0.8676, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.7086614173228347, |
|
"grad_norm": 0.3744777105843697, |
|
"learning_rate": 3.657562557489063e-07, |
|
"loss": 0.8692, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.7139107611548556, |
|
"grad_norm": 0.35275476224983093, |
|
"learning_rate": 3.52604586193633e-07, |
|
"loss": 0.878, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.7191601049868765, |
|
"grad_norm": 0.3845721220969486, |
|
"learning_rate": 3.396789422398143e-07, |
|
"loss": 0.8715, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.7244094488188977, |
|
"grad_norm": 0.38567935425060995, |
|
"learning_rate": 3.269804274348966e-07, |
|
"loss": 0.8552, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.7296587926509186, |
|
"grad_norm": 0.36984914245968326, |
|
"learning_rate": 3.145101259348133e-07, |
|
"loss": 0.8905, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.7349081364829395, |
|
"grad_norm": 0.3862126346234947, |
|
"learning_rate": 3.022691024114234e-07, |
|
"loss": 0.8759, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.7401574803149606, |
|
"grad_norm": 0.37276644156643624, |
|
"learning_rate": 2.9025840196161345e-07, |
|
"loss": 0.8996, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.7454068241469818, |
|
"grad_norm": 0.3619714746103851, |
|
"learning_rate": 2.784790500180704e-07, |
|
"loss": 0.8734, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.7506561679790025, |
|
"grad_norm": 0.3489848842196673, |
|
"learning_rate": 2.6693205226173466e-07, |
|
"loss": 0.852, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.7559055118110236, |
|
"grad_norm": 0.47292345630417715, |
|
"learning_rate": 2.556183945359369e-07, |
|
"loss": 0.8524, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.7611548556430447, |
|
"grad_norm": 0.3454751472880757, |
|
"learning_rate": 2.4453904276223093e-07, |
|
"loss": 0.8639, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.7664041994750657, |
|
"grad_norm": 0.39520326916346893, |
|
"learning_rate": 2.3369494285792648e-07, |
|
"loss": 0.9011, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.7716535433070866, |
|
"grad_norm": 0.36215879824858, |
|
"learning_rate": 2.2308702065533138e-07, |
|
"loss": 0.8475, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.7769028871391077, |
|
"grad_norm": 0.3785025816595213, |
|
"learning_rate": 2.1271618182270402e-07, |
|
"loss": 0.8785, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.7821522309711286, |
|
"grad_norm": 0.35017564001831825, |
|
"learning_rate": 2.0258331178693291e-07, |
|
"loss": 0.9251, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.7874015748031495, |
|
"grad_norm": 0.3736191583691154, |
|
"learning_rate": 1.926892756579427e-07, |
|
"loss": 0.8638, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.7926509186351707, |
|
"grad_norm": 0.3542533426451256, |
|
"learning_rate": 1.8303491815483076e-07, |
|
"loss": 0.8501, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.7979002624671916, |
|
"grad_norm": 0.3653513060765524, |
|
"learning_rate": 1.7362106353375107e-07, |
|
"loss": 0.8704, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.8031496062992125, |
|
"grad_norm": 0.3699565968914539, |
|
"learning_rate": 1.6444851551754158e-07, |
|
"loss": 0.8659, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.8083989501312336, |
|
"grad_norm": 0.357867752615946, |
|
"learning_rate": 1.5551805722710245e-07, |
|
"loss": 0.8802, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.8136482939632546, |
|
"grad_norm": 0.37125992932731333, |
|
"learning_rate": 1.4683045111453941e-07, |
|
"loss": 0.8368, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.8188976377952755, |
|
"grad_norm": 0.3685836112400432, |
|
"learning_rate": 1.3838643889806568e-07, |
|
"loss": 0.9235, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.8241469816272966, |
|
"grad_norm": 0.36408877164595227, |
|
"learning_rate": 1.3018674149867782e-07, |
|
"loss": 0.8799, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.8293963254593177, |
|
"grad_norm": 0.3642144586766023, |
|
"learning_rate": 1.2223205897860533e-07, |
|
"loss": 0.8777, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.8346456692913384, |
|
"grad_norm": 0.39073442531405206, |
|
"learning_rate": 1.1452307048154286e-07, |
|
"loss": 0.8797, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.8398950131233596, |
|
"grad_norm": 0.3810371400467593, |
|
"learning_rate": 1.0706043417466549e-07, |
|
"loss": 0.91, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.8451443569553807, |
|
"grad_norm": 0.35959663657652996, |
|
"learning_rate": 9.984478719243772e-08, |
|
"loss": 0.8714, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.8503937007874016, |
|
"grad_norm": 0.36819474514599226, |
|
"learning_rate": 9.287674558221737e-08, |
|
"loss": 0.9048, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.8556430446194225, |
|
"grad_norm": 0.3492451150451855, |
|
"learning_rate": 8.615690425165823e-08, |
|
"loss": 0.8589, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.8608923884514437, |
|
"grad_norm": 0.3692991236824256, |
|
"learning_rate": 7.968583691792142e-08, |
|
"loss": 0.8502, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.8661417322834646, |
|
"grad_norm": 0.36506034919430097, |
|
"learning_rate": 7.34640960586902e-08, |
|
"loss": 0.8948, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.8713910761154855, |
|
"grad_norm": 0.39656772129331486, |
|
"learning_rate": 6.749221286500273e-08, |
|
"loss": 0.872, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.8766404199475066, |
|
"grad_norm": 0.3766497135471153, |
|
"learning_rate": 6.177069719590234e-08, |
|
"loss": 0.8459, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.8818897637795275, |
|
"grad_norm": 0.3420155942064905, |
|
"learning_rate": 5.6300037534904644e-08, |
|
"loss": 0.8797, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.8871391076115485, |
|
"grad_norm": 0.3790798456525195, |
|
"learning_rate": 5.108070094829465e-08, |
|
"loss": 0.8374, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.8923884514435696, |
|
"grad_norm": 0.36574760138577367, |
|
"learning_rate": 4.6113133045249225e-08, |
|
"loss": 0.8507, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.8976377952755905, |
|
"grad_norm": 0.39215283711659366, |
|
"learning_rate": 4.139775793979228e-08, |
|
"loss": 0.9416, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.9028871391076114, |
|
"grad_norm": 0.33743123097312766, |
|
"learning_rate": 3.693497821458702e-08, |
|
"loss": 0.8469, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.9081364829396326, |
|
"grad_norm": 0.365685642660726, |
|
"learning_rate": 3.272517488656213e-08, |
|
"loss": 0.8809, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.9133858267716537, |
|
"grad_norm": 0.35701737992686006, |
|
"learning_rate": 2.876870737438475e-08, |
|
"loss": 0.8576, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.9186351706036744, |
|
"grad_norm": 0.4717726718121241, |
|
"learning_rate": 2.506591346777176e-08, |
|
"loss": 0.8882, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.9238845144356955, |
|
"grad_norm": 0.3644123714802389, |
|
"learning_rate": 2.1617109298653126e-08, |
|
"loss": 0.8806, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.9291338582677167, |
|
"grad_norm": 0.37039268056716995, |
|
"learning_rate": 1.842258931417917e-08, |
|
"loss": 0.8646, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.9343832020997376, |
|
"grad_norm": 0.38307646079911417, |
|
"learning_rate": 1.5482626251583364e-08, |
|
"loss": 0.8605, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.9396325459317585, |
|
"grad_norm": 0.34939756521186505, |
|
"learning_rate": 1.2797471114896598e-08, |
|
"loss": 0.8605, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.9448818897637796, |
|
"grad_norm": 0.3630644212614912, |
|
"learning_rate": 1.0367353153516335e-08, |
|
"loss": 0.8874, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.9501312335958005, |
|
"grad_norm": 0.36486914850748925, |
|
"learning_rate": 8.192479842635937e-09, |
|
"loss": 0.8488, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.9553805774278215, |
|
"grad_norm": 0.3477602485063963, |
|
"learning_rate": 6.273036865529158e-09, |
|
"loss": 0.8865, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.9606299212598426, |
|
"grad_norm": 0.3699792979427579, |
|
"learning_rate": 4.6091880976981695e-09, |
|
"loss": 0.8552, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.9658792650918635, |
|
"grad_norm": 0.36569117211522567, |
|
"learning_rate": 3.201075592882741e-09, |
|
"loss": 0.8771, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.9711286089238844, |
|
"grad_norm": 0.3673522439384638, |
|
"learning_rate": 2.0488195709312816e-09, |
|
"loss": 0.8316, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.9763779527559056, |
|
"grad_norm": 0.40741119157495514, |
|
"learning_rate": 1.152518407537717e-09, |
|
"loss": 0.8686, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.9816272965879265, |
|
"grad_norm": 0.3922247762663656, |
|
"learning_rate": 5.122486258418713e-10, |
|
"loss": 0.88, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.9868766404199474, |
|
"grad_norm": 0.36724366518491103, |
|
"learning_rate": 1.2806488989636211e-10, |
|
"loss": 0.863, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.9921259842519685, |
|
"grad_norm": 0.37347664771694333, |
|
"learning_rate": 0.0, |
|
"loss": 0.8553, |
|
"step": 380 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 380, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 95, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.333503833071944e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|