|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.3840639082343302, |
|
"eval_steps": 500, |
|
"global_step": 420000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0004572189383742026, |
|
"grad_norm": 187.55120849609375, |
|
"learning_rate": 5e-06, |
|
"loss": 7.0189, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0009144378767484053, |
|
"grad_norm": 8.421695709228516, |
|
"learning_rate": 1e-05, |
|
"loss": 3.7862, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.0013716568151226078, |
|
"grad_norm": 5.949154853820801, |
|
"learning_rate": 9.995423625806358e-06, |
|
"loss": 2.3315, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.0018288757534968105, |
|
"grad_norm": 6.021435260772705, |
|
"learning_rate": 9.990847251612715e-06, |
|
"loss": 1.7553, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.002286094691871013, |
|
"grad_norm": 6.2066168785095215, |
|
"learning_rate": 9.986270877419073e-06, |
|
"loss": 1.46, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.0027433136302452157, |
|
"grad_norm": 5.304969310760498, |
|
"learning_rate": 9.98169450322543e-06, |
|
"loss": 1.2973, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.0032005325686194183, |
|
"grad_norm": 5.2138566970825195, |
|
"learning_rate": 9.977118129031787e-06, |
|
"loss": 1.1809, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.003657751506993621, |
|
"grad_norm": 6.513549327850342, |
|
"learning_rate": 9.972541754838143e-06, |
|
"loss": 1.0803, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.004114970445367823, |
|
"grad_norm": 4.748714923858643, |
|
"learning_rate": 9.967965380644502e-06, |
|
"loss": 1.0187, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.004572189383742026, |
|
"grad_norm": 3.788388252258301, |
|
"learning_rate": 9.963389006450857e-06, |
|
"loss": 0.95, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.005029408322116229, |
|
"grad_norm": 4.110034465789795, |
|
"learning_rate": 9.958812632257215e-06, |
|
"loss": 0.9065, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.005486627260490431, |
|
"grad_norm": 4.105165481567383, |
|
"learning_rate": 9.954236258063572e-06, |
|
"loss": 0.8702, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.005943846198864634, |
|
"grad_norm": 4.9001145362854, |
|
"learning_rate": 9.94965988386993e-06, |
|
"loss": 0.8165, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.006401065137238837, |
|
"grad_norm": 3.8835108280181885, |
|
"learning_rate": 9.945083509676286e-06, |
|
"loss": 0.7864, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.006858284075613039, |
|
"grad_norm": 4.042098522186279, |
|
"learning_rate": 9.940507135482644e-06, |
|
"loss": 0.7639, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.007315503013987242, |
|
"grad_norm": 3.520855188369751, |
|
"learning_rate": 9.935930761289001e-06, |
|
"loss": 0.7305, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.007772721952361445, |
|
"grad_norm": 3.5829412937164307, |
|
"learning_rate": 9.931354387095358e-06, |
|
"loss": 0.7138, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.008229940890735647, |
|
"grad_norm": 3.618142604827881, |
|
"learning_rate": 9.926778012901714e-06, |
|
"loss": 0.6868, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.00868715982910985, |
|
"grad_norm": 4.708620548248291, |
|
"learning_rate": 9.922201638708073e-06, |
|
"loss": 0.6671, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.009144378767484052, |
|
"grad_norm": 5.403913974761963, |
|
"learning_rate": 9.917625264514428e-06, |
|
"loss": 0.6627, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.009601597705858255, |
|
"grad_norm": 3.972954511642456, |
|
"learning_rate": 9.913048890320787e-06, |
|
"loss": 0.6406, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.010058816644232457, |
|
"grad_norm": 3.42512583732605, |
|
"learning_rate": 9.908472516127143e-06, |
|
"loss": 0.6206, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.01051603558260666, |
|
"grad_norm": 3.6385998725891113, |
|
"learning_rate": 9.903896141933502e-06, |
|
"loss": 0.6068, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.010973254520980863, |
|
"grad_norm": 3.1945295333862305, |
|
"learning_rate": 9.899319767739857e-06, |
|
"loss": 0.6005, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.011430473459355064, |
|
"grad_norm": 2.8585703372955322, |
|
"learning_rate": 9.894743393546215e-06, |
|
"loss": 0.5833, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.011887692397729268, |
|
"grad_norm": 3.1953206062316895, |
|
"learning_rate": 9.890167019352572e-06, |
|
"loss": 0.5754, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.01234491133610347, |
|
"grad_norm": 3.1728343963623047, |
|
"learning_rate": 9.88559064515893e-06, |
|
"loss": 0.5657, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.012802130274477673, |
|
"grad_norm": 3.0139970779418945, |
|
"learning_rate": 9.881014270965287e-06, |
|
"loss": 0.5565, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.013259349212851875, |
|
"grad_norm": 2.9499995708465576, |
|
"learning_rate": 9.876437896771644e-06, |
|
"loss": 0.5455, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.013716568151226079, |
|
"grad_norm": 3.5701255798339844, |
|
"learning_rate": 9.871861522578e-06, |
|
"loss": 0.5314, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.01417378708960028, |
|
"grad_norm": 3.3928213119506836, |
|
"learning_rate": 9.867285148384358e-06, |
|
"loss": 0.5349, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.014631006027974484, |
|
"grad_norm": 3.076819896697998, |
|
"learning_rate": 9.862708774190714e-06, |
|
"loss": 0.518, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.015088224966348686, |
|
"grad_norm": 2.7929725646972656, |
|
"learning_rate": 9.858132399997073e-06, |
|
"loss": 0.5181, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.01554544390472289, |
|
"grad_norm": 2.8731892108917236, |
|
"learning_rate": 9.853556025803429e-06, |
|
"loss": 0.5187, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.016002662843097093, |
|
"grad_norm": 3.5447003841400146, |
|
"learning_rate": 9.848979651609786e-06, |
|
"loss": 0.5065, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.016459881781471293, |
|
"grad_norm": 2.36434006690979, |
|
"learning_rate": 9.844403277416143e-06, |
|
"loss": 0.5, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.016917100719845497, |
|
"grad_norm": 2.4048261642456055, |
|
"learning_rate": 9.8398269032225e-06, |
|
"loss": 0.5015, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.0173743196582197, |
|
"grad_norm": 3.1334474086761475, |
|
"learning_rate": 9.835250529028858e-06, |
|
"loss": 0.4876, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.0178315385965939, |
|
"grad_norm": 2.134216070175171, |
|
"learning_rate": 9.830674154835215e-06, |
|
"loss": 0.487, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.018288757534968104, |
|
"grad_norm": 2.5839178562164307, |
|
"learning_rate": 9.826097780641572e-06, |
|
"loss": 0.4716, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.018745976473342307, |
|
"grad_norm": 2.95695424079895, |
|
"learning_rate": 9.82152140644793e-06, |
|
"loss": 0.4778, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.01920319541171651, |
|
"grad_norm": 3.2409121990203857, |
|
"learning_rate": 9.816945032254285e-06, |
|
"loss": 0.4702, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.01966041435009071, |
|
"grad_norm": 3.0505309104919434, |
|
"learning_rate": 9.812368658060644e-06, |
|
"loss": 0.4737, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.020117633288464914, |
|
"grad_norm": 2.630138397216797, |
|
"learning_rate": 9.807792283867e-06, |
|
"loss": 0.4732, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.020574852226839118, |
|
"grad_norm": 2.780930995941162, |
|
"learning_rate": 9.803215909673357e-06, |
|
"loss": 0.4616, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.02103207116521332, |
|
"grad_norm": 2.8004393577575684, |
|
"learning_rate": 9.798639535479714e-06, |
|
"loss": 0.4586, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.02148929010358752, |
|
"grad_norm": 2.394951581954956, |
|
"learning_rate": 9.794063161286071e-06, |
|
"loss": 0.4527, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.021946509041961725, |
|
"grad_norm": 2.5440549850463867, |
|
"learning_rate": 9.789486787092429e-06, |
|
"loss": 0.4465, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.02240372798033593, |
|
"grad_norm": 2.5639050006866455, |
|
"learning_rate": 9.784910412898786e-06, |
|
"loss": 0.4545, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.02286094691871013, |
|
"grad_norm": 3.256699562072754, |
|
"learning_rate": 9.780334038705143e-06, |
|
"loss": 0.4467, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.023318165857084332, |
|
"grad_norm": 2.7148571014404297, |
|
"learning_rate": 9.7757576645115e-06, |
|
"loss": 0.4377, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.023775384795458536, |
|
"grad_norm": 2.497065544128418, |
|
"learning_rate": 9.771181290317858e-06, |
|
"loss": 0.4348, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.02423260373383274, |
|
"grad_norm": 2.3831052780151367, |
|
"learning_rate": 9.766604916124215e-06, |
|
"loss": 0.4312, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.02468982267220694, |
|
"grad_norm": 2.513948917388916, |
|
"learning_rate": 9.762028541930572e-06, |
|
"loss": 0.4245, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.025147041610581143, |
|
"grad_norm": 2.2912256717681885, |
|
"learning_rate": 9.75745216773693e-06, |
|
"loss": 0.4239, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.025604260548955347, |
|
"grad_norm": 3.100677490234375, |
|
"learning_rate": 9.752875793543285e-06, |
|
"loss": 0.4341, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.02606147948732955, |
|
"grad_norm": 4.546727180480957, |
|
"learning_rate": 9.748299419349643e-06, |
|
"loss": 0.4177, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.02651869842570375, |
|
"grad_norm": 2.070556163787842, |
|
"learning_rate": 9.743723045156e-06, |
|
"loss": 0.4152, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.026975917364077954, |
|
"grad_norm": 2.3387291431427, |
|
"learning_rate": 9.739146670962357e-06, |
|
"loss": 0.4208, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.027433136302452157, |
|
"grad_norm": 2.6462035179138184, |
|
"learning_rate": 9.734570296768714e-06, |
|
"loss": 0.4096, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.02789035524082636, |
|
"grad_norm": 2.098785400390625, |
|
"learning_rate": 9.729993922575072e-06, |
|
"loss": 0.4124, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.02834757417920056, |
|
"grad_norm": 2.7251081466674805, |
|
"learning_rate": 9.725417548381429e-06, |
|
"loss": 0.4131, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.028804793117574765, |
|
"grad_norm": 2.221843957901001, |
|
"learning_rate": 9.720841174187786e-06, |
|
"loss": 0.4099, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.029262012055948968, |
|
"grad_norm": 1.7978463172912598, |
|
"learning_rate": 9.716264799994143e-06, |
|
"loss": 0.4065, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.029719230994323168, |
|
"grad_norm": 2.299729824066162, |
|
"learning_rate": 9.7116884258005e-06, |
|
"loss": 0.403, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.030176449932697372, |
|
"grad_norm": 2.307136058807373, |
|
"learning_rate": 9.707112051606858e-06, |
|
"loss": 0.3997, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.030633668871071575, |
|
"grad_norm": 2.1159164905548096, |
|
"learning_rate": 9.702535677413214e-06, |
|
"loss": 0.3986, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.03109088780944578, |
|
"grad_norm": 2.6387250423431396, |
|
"learning_rate": 9.697959303219573e-06, |
|
"loss": 0.3887, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.03154810674781998, |
|
"grad_norm": 2.5297632217407227, |
|
"learning_rate": 9.693382929025928e-06, |
|
"loss": 0.3902, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.032005325686194186, |
|
"grad_norm": 3.11338472366333, |
|
"learning_rate": 9.688806554832287e-06, |
|
"loss": 0.3879, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.032462544624568386, |
|
"grad_norm": 2.4520089626312256, |
|
"learning_rate": 9.684230180638643e-06, |
|
"loss": 0.3914, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.032919763562942586, |
|
"grad_norm": 2.3968985080718994, |
|
"learning_rate": 9.679653806445e-06, |
|
"loss": 0.3859, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.03337698250131679, |
|
"grad_norm": 1.8716310262680054, |
|
"learning_rate": 9.675077432251357e-06, |
|
"loss": 0.3838, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.03383420143969099, |
|
"grad_norm": 2.634087324142456, |
|
"learning_rate": 9.670501058057715e-06, |
|
"loss": 0.3798, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.03429142037806519, |
|
"grad_norm": 2.2868430614471436, |
|
"learning_rate": 9.665924683864072e-06, |
|
"loss": 0.3781, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.0347486393164394, |
|
"grad_norm": 2.1440744400024414, |
|
"learning_rate": 9.661348309670429e-06, |
|
"loss": 0.3893, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.0352058582548136, |
|
"grad_norm": 4.374706268310547, |
|
"learning_rate": 9.656771935476785e-06, |
|
"loss": 0.3765, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.0356630771931878, |
|
"grad_norm": 2.3860931396484375, |
|
"learning_rate": 9.652195561283144e-06, |
|
"loss": 0.376, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.03612029613156201, |
|
"grad_norm": 5.505861282348633, |
|
"learning_rate": 9.6476191870895e-06, |
|
"loss": 0.3767, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.03657751506993621, |
|
"grad_norm": 2.61763858795166, |
|
"learning_rate": 9.643042812895858e-06, |
|
"loss": 0.3808, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.037034734008310415, |
|
"grad_norm": 2.1524641513824463, |
|
"learning_rate": 9.638466438702214e-06, |
|
"loss": 0.3801, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.037491952946684615, |
|
"grad_norm": 1.7687675952911377, |
|
"learning_rate": 9.633890064508571e-06, |
|
"loss": 0.373, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.037949171885058815, |
|
"grad_norm": 1.924752116203308, |
|
"learning_rate": 9.629313690314928e-06, |
|
"loss": 0.3747, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.03840639082343302, |
|
"grad_norm": 2.0824227333068848, |
|
"learning_rate": 9.624737316121286e-06, |
|
"loss": 0.3665, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.03886360976180722, |
|
"grad_norm": 1.825997233390808, |
|
"learning_rate": 9.620160941927643e-06, |
|
"loss": 0.3678, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.03932082870018142, |
|
"grad_norm": 1.9416835308074951, |
|
"learning_rate": 9.615584567734e-06, |
|
"loss": 0.3727, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.03977804763855563, |
|
"grad_norm": 2.4522104263305664, |
|
"learning_rate": 9.611008193540357e-06, |
|
"loss": 0.3679, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.04023526657692983, |
|
"grad_norm": 2.46500825881958, |
|
"learning_rate": 9.606431819346715e-06, |
|
"loss": 0.3643, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.04069248551530403, |
|
"grad_norm": 2.2443718910217285, |
|
"learning_rate": 9.60185544515307e-06, |
|
"loss": 0.3619, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.041149704453678236, |
|
"grad_norm": 1.5836185216903687, |
|
"learning_rate": 9.59727907095943e-06, |
|
"loss": 0.3648, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.041606923392052436, |
|
"grad_norm": 1.7099242210388184, |
|
"learning_rate": 9.592702696765785e-06, |
|
"loss": 0.3627, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.04206414233042664, |
|
"grad_norm": 2.4821577072143555, |
|
"learning_rate": 9.588126322572144e-06, |
|
"loss": 0.3611, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.04252136126880084, |
|
"grad_norm": 2.2654361724853516, |
|
"learning_rate": 9.5835499483785e-06, |
|
"loss": 0.3608, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.04297858020717504, |
|
"grad_norm": 2.3293862342834473, |
|
"learning_rate": 9.578973574184857e-06, |
|
"loss": 0.368, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.04343579914554925, |
|
"grad_norm": 2.0562539100646973, |
|
"learning_rate": 9.574397199991214e-06, |
|
"loss": 0.3589, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.04389301808392345, |
|
"grad_norm": 2.5742366313934326, |
|
"learning_rate": 9.569820825797571e-06, |
|
"loss": 0.3524, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.04435023702229765, |
|
"grad_norm": 2.3970110416412354, |
|
"learning_rate": 9.565244451603928e-06, |
|
"loss": 0.3608, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.04480745596067186, |
|
"grad_norm": 1.7910202741622925, |
|
"learning_rate": 9.560668077410286e-06, |
|
"loss": 0.3447, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.04526467489904606, |
|
"grad_norm": 1.8098151683807373, |
|
"learning_rate": 9.556091703216643e-06, |
|
"loss": 0.3515, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.04572189383742026, |
|
"grad_norm": 1.9009671211242676, |
|
"learning_rate": 9.551515329023e-06, |
|
"loss": 0.3547, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.046179112775794465, |
|
"grad_norm": 1.8881592750549316, |
|
"learning_rate": 9.546938954829358e-06, |
|
"loss": 0.3542, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.046636331714168665, |
|
"grad_norm": 2.637274742126465, |
|
"learning_rate": 9.542362580635715e-06, |
|
"loss": 0.3489, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.04709355065254287, |
|
"grad_norm": 1.9252406358718872, |
|
"learning_rate": 9.53778620644207e-06, |
|
"loss": 0.3522, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.04755076959091707, |
|
"grad_norm": 1.7176941633224487, |
|
"learning_rate": 9.533209832248428e-06, |
|
"loss": 0.3425, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.04800798852929127, |
|
"grad_norm": 1.9770063161849976, |
|
"learning_rate": 9.528633458054785e-06, |
|
"loss": 0.3469, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.04846520746766548, |
|
"grad_norm": 2.2667782306671143, |
|
"learning_rate": 9.524057083861142e-06, |
|
"loss": 0.3498, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.04892242640603968, |
|
"grad_norm": 2.002631902694702, |
|
"learning_rate": 9.5194807096675e-06, |
|
"loss": 0.3486, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.04937964534441388, |
|
"grad_norm": 1.749894380569458, |
|
"learning_rate": 9.514904335473857e-06, |
|
"loss": 0.344, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.049836864282788086, |
|
"grad_norm": 2.701767921447754, |
|
"learning_rate": 9.510327961280214e-06, |
|
"loss": 0.3403, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.050294083221162286, |
|
"grad_norm": 1.895792007446289, |
|
"learning_rate": 9.505751587086571e-06, |
|
"loss": 0.3387, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.050751302159536486, |
|
"grad_norm": 2.478041410446167, |
|
"learning_rate": 9.501175212892929e-06, |
|
"loss": 0.3387, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.05120852109791069, |
|
"grad_norm": 2.3234288692474365, |
|
"learning_rate": 9.496598838699286e-06, |
|
"loss": 0.3426, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.05166574003628489, |
|
"grad_norm": 2.3493385314941406, |
|
"learning_rate": 9.492022464505643e-06, |
|
"loss": 0.3348, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.0521229589746591, |
|
"grad_norm": 2.0325398445129395, |
|
"learning_rate": 9.487446090311999e-06, |
|
"loss": 0.34, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.0525801779130333, |
|
"grad_norm": 2.0757031440734863, |
|
"learning_rate": 9.482869716118358e-06, |
|
"loss": 0.3396, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.0530373968514075, |
|
"grad_norm": 2.193401575088501, |
|
"learning_rate": 9.478293341924713e-06, |
|
"loss": 0.3352, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.05349461578978171, |
|
"grad_norm": 2.415004253387451, |
|
"learning_rate": 9.47371696773107e-06, |
|
"loss": 0.332, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.05395183472815591, |
|
"grad_norm": 1.8097025156021118, |
|
"learning_rate": 9.469140593537428e-06, |
|
"loss": 0.3395, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.05440905366653011, |
|
"grad_norm": 1.9246598482131958, |
|
"learning_rate": 9.464564219343785e-06, |
|
"loss": 0.3368, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.054866272604904315, |
|
"grad_norm": 1.8323681354522705, |
|
"learning_rate": 9.459987845150142e-06, |
|
"loss": 0.34, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.055323491543278515, |
|
"grad_norm": 2.6949360370635986, |
|
"learning_rate": 9.4554114709565e-06, |
|
"loss": 0.3398, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.05578071048165272, |
|
"grad_norm": 1.7276109457015991, |
|
"learning_rate": 9.450835096762857e-06, |
|
"loss": 0.3325, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.05623792942002692, |
|
"grad_norm": 1.6375492811203003, |
|
"learning_rate": 9.446258722569214e-06, |
|
"loss": 0.3323, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.05669514835840112, |
|
"grad_norm": 7.265068531036377, |
|
"learning_rate": 9.441682348375572e-06, |
|
"loss": 0.3333, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.05715236729677533, |
|
"grad_norm": 1.784020185470581, |
|
"learning_rate": 9.437105974181929e-06, |
|
"loss": 0.3357, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.05760958623514953, |
|
"grad_norm": 2.0214955806732178, |
|
"learning_rate": 9.432529599988284e-06, |
|
"loss": 0.3303, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.05806680517352373, |
|
"grad_norm": 2.3039133548736572, |
|
"learning_rate": 9.427953225794643e-06, |
|
"loss": 0.3292, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.058524024111897936, |
|
"grad_norm": 2.4076285362243652, |
|
"learning_rate": 9.423376851600999e-06, |
|
"loss": 0.3301, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.058981243050272136, |
|
"grad_norm": 1.872653603553772, |
|
"learning_rate": 9.418800477407358e-06, |
|
"loss": 0.3237, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.059438461988646336, |
|
"grad_norm": 1.858178973197937, |
|
"learning_rate": 9.414224103213713e-06, |
|
"loss": 0.3305, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.05989568092702054, |
|
"grad_norm": 2.2404658794403076, |
|
"learning_rate": 9.40964772902007e-06, |
|
"loss": 0.3282, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.060352899865394743, |
|
"grad_norm": 1.943448781967163, |
|
"learning_rate": 9.405071354826428e-06, |
|
"loss": 0.3245, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.06081011880376895, |
|
"grad_norm": 1.7627453804016113, |
|
"learning_rate": 9.400494980632785e-06, |
|
"loss": 0.3263, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.06126733774214315, |
|
"grad_norm": 2.1200695037841797, |
|
"learning_rate": 9.395918606439143e-06, |
|
"loss": 0.3207, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.06172455668051735, |
|
"grad_norm": 2.522911310195923, |
|
"learning_rate": 9.3913422322455e-06, |
|
"loss": 0.326, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.06218177561889156, |
|
"grad_norm": 2.193539619445801, |
|
"learning_rate": 9.386765858051855e-06, |
|
"loss": 0.3223, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.06263899455726575, |
|
"grad_norm": 2.4491043090820312, |
|
"learning_rate": 9.382189483858214e-06, |
|
"loss": 0.3213, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.06309621349563996, |
|
"grad_norm": 1.5971205234527588, |
|
"learning_rate": 9.37761310966457e-06, |
|
"loss": 0.3223, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.06355343243401416, |
|
"grad_norm": 2.126255750656128, |
|
"learning_rate": 9.373036735470929e-06, |
|
"loss": 0.3188, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.06401065137238837, |
|
"grad_norm": 2.074056625366211, |
|
"learning_rate": 9.368460361277285e-06, |
|
"loss": 0.3193, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.06446787031076257, |
|
"grad_norm": 1.6855189800262451, |
|
"learning_rate": 9.363883987083642e-06, |
|
"loss": 0.3133, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.06492508924913677, |
|
"grad_norm": 2.1474835872650146, |
|
"learning_rate": 9.359307612889999e-06, |
|
"loss": 0.3163, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.06538230818751098, |
|
"grad_norm": 1.9591755867004395, |
|
"learning_rate": 9.354731238696356e-06, |
|
"loss": 0.325, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.06583952712588517, |
|
"grad_norm": 1.7906707525253296, |
|
"learning_rate": 9.350154864502714e-06, |
|
"loss": 0.3207, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.06629674606425938, |
|
"grad_norm": 1.956648588180542, |
|
"learning_rate": 9.345578490309071e-06, |
|
"loss": 0.32, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.06675396500263359, |
|
"grad_norm": 2.537899971008301, |
|
"learning_rate": 9.341002116115428e-06, |
|
"loss": 0.3202, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.06721118394100778, |
|
"grad_norm": 2.68613600730896, |
|
"learning_rate": 9.336425741921785e-06, |
|
"loss": 0.3123, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.06766840287938199, |
|
"grad_norm": 1.742925763130188, |
|
"learning_rate": 9.331849367728141e-06, |
|
"loss": 0.3196, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.0681256218177562, |
|
"grad_norm": 1.493833065032959, |
|
"learning_rate": 9.3272729935345e-06, |
|
"loss": 0.3177, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.06858284075613039, |
|
"grad_norm": 2.0670220851898193, |
|
"learning_rate": 9.322696619340856e-06, |
|
"loss": 0.3194, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.0690400596945046, |
|
"grad_norm": 1.7943044900894165, |
|
"learning_rate": 9.318120245147213e-06, |
|
"loss": 0.3142, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.0694972786328788, |
|
"grad_norm": 2.0750091075897217, |
|
"learning_rate": 9.31354387095357e-06, |
|
"loss": 0.3121, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.069954497571253, |
|
"grad_norm": 2.5226950645446777, |
|
"learning_rate": 9.308967496759927e-06, |
|
"loss": 0.3167, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.0704117165096272, |
|
"grad_norm": 1.6280384063720703, |
|
"learning_rate": 9.304391122566285e-06, |
|
"loss": 0.3171, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.07086893544800141, |
|
"grad_norm": 1.8891403675079346, |
|
"learning_rate": 9.299814748372642e-06, |
|
"loss": 0.3161, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.0713261543863756, |
|
"grad_norm": 2.048211097717285, |
|
"learning_rate": 9.295238374179e-06, |
|
"loss": 0.3115, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.07178337332474981, |
|
"grad_norm": 1.7160500288009644, |
|
"learning_rate": 9.290661999985357e-06, |
|
"loss": 0.3189, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.07224059226312401, |
|
"grad_norm": 1.8395957946777344, |
|
"learning_rate": 9.286085625791714e-06, |
|
"loss": 0.3096, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.07269781120149822, |
|
"grad_norm": 1.92539381980896, |
|
"learning_rate": 9.281509251598071e-06, |
|
"loss": 0.3144, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.07315503013987242, |
|
"grad_norm": 2.474168300628662, |
|
"learning_rate": 9.276932877404428e-06, |
|
"loss": 0.3099, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.07361224907824662, |
|
"grad_norm": 2.2422871589660645, |
|
"learning_rate": 9.272356503210786e-06, |
|
"loss": 0.3129, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.07406946801662083, |
|
"grad_norm": 1.5611120462417603, |
|
"learning_rate": 9.267780129017141e-06, |
|
"loss": 0.3075, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.07452668695499502, |
|
"grad_norm": 1.408894658088684, |
|
"learning_rate": 9.263203754823499e-06, |
|
"loss": 0.3017, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.07498390589336923, |
|
"grad_norm": 1.664436936378479, |
|
"learning_rate": 9.258627380629856e-06, |
|
"loss": 0.3074, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.07544112483174344, |
|
"grad_norm": 1.3899191617965698, |
|
"learning_rate": 9.254051006436213e-06, |
|
"loss": 0.3061, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.07589834377011763, |
|
"grad_norm": 1.7736977338790894, |
|
"learning_rate": 9.24947463224257e-06, |
|
"loss": 0.3075, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.07635556270849184, |
|
"grad_norm": 1.743217945098877, |
|
"learning_rate": 9.244898258048928e-06, |
|
"loss": 0.3066, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.07681278164686604, |
|
"grad_norm": 2.4577653408050537, |
|
"learning_rate": 9.240321883855285e-06, |
|
"loss": 0.3083, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.07727000058524024, |
|
"grad_norm": 1.7819156646728516, |
|
"learning_rate": 9.235745509661642e-06, |
|
"loss": 0.3016, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.07772721952361444, |
|
"grad_norm": 1.5945593118667603, |
|
"learning_rate": 9.231169135468e-06, |
|
"loss": 0.3053, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.07818443846198865, |
|
"grad_norm": 3.3662831783294678, |
|
"learning_rate": 9.226592761274357e-06, |
|
"loss": 0.3008, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.07864165740036284, |
|
"grad_norm": 1.748854637145996, |
|
"learning_rate": 9.222016387080714e-06, |
|
"loss": 0.3026, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.07909887633873705, |
|
"grad_norm": 2.074263334274292, |
|
"learning_rate": 9.21744001288707e-06, |
|
"loss": 0.3045, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.07955609527711126, |
|
"grad_norm": 6.21075439453125, |
|
"learning_rate": 9.212863638693429e-06, |
|
"loss": 0.3078, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.08001331421548545, |
|
"grad_norm": 1.6198980808258057, |
|
"learning_rate": 9.208287264499784e-06, |
|
"loss": 0.3019, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.08047053315385966, |
|
"grad_norm": 1.760921835899353, |
|
"learning_rate": 9.203710890306143e-06, |
|
"loss": 0.2948, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.08092775209223387, |
|
"grad_norm": 2.7417385578155518, |
|
"learning_rate": 9.199134516112499e-06, |
|
"loss": 0.3054, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.08138497103060806, |
|
"grad_norm": 1.767946481704712, |
|
"learning_rate": 9.194558141918856e-06, |
|
"loss": 0.3405, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.08184218996898227, |
|
"grad_norm": 1.6789219379425049, |
|
"learning_rate": 9.189981767725213e-06, |
|
"loss": 0.3024, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.08229940890735647, |
|
"grad_norm": 1.6791198253631592, |
|
"learning_rate": 9.18540539353157e-06, |
|
"loss": 0.2987, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.08275662784573068, |
|
"grad_norm": 1.9289532899856567, |
|
"learning_rate": 9.180829019337928e-06, |
|
"loss": 0.3007, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.08321384678410487, |
|
"grad_norm": 1.9767258167266846, |
|
"learning_rate": 9.176252645144285e-06, |
|
"loss": 0.309, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.08367106572247908, |
|
"grad_norm": 2.790158271789551, |
|
"learning_rate": 9.17167627095064e-06, |
|
"loss": 0.3001, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.08412828466085329, |
|
"grad_norm": 2.0886495113372803, |
|
"learning_rate": 9.167099896757e-06, |
|
"loss": 0.2948, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.08458550359922748, |
|
"grad_norm": 1.426714539527893, |
|
"learning_rate": 9.162523522563355e-06, |
|
"loss": 0.2945, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.08504272253760169, |
|
"grad_norm": 1.887513279914856, |
|
"learning_rate": 9.157947148369714e-06, |
|
"loss": 0.2991, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.0854999414759759, |
|
"grad_norm": 2.1559338569641113, |
|
"learning_rate": 9.15337077417607e-06, |
|
"loss": 0.2984, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.08595716041435009, |
|
"grad_norm": 1.6978403329849243, |
|
"learning_rate": 9.148794399982429e-06, |
|
"loss": 0.3042, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.0864143793527243, |
|
"grad_norm": 1.7569996118545532, |
|
"learning_rate": 9.144218025788784e-06, |
|
"loss": 0.2929, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.0868715982910985, |
|
"grad_norm": 1.8148245811462402, |
|
"learning_rate": 9.139641651595142e-06, |
|
"loss": 0.2992, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.0873288172294727, |
|
"grad_norm": 2.9660353660583496, |
|
"learning_rate": 9.135065277401499e-06, |
|
"loss": 0.2967, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.0877860361678469, |
|
"grad_norm": 1.5390568971633911, |
|
"learning_rate": 9.130488903207856e-06, |
|
"loss": 0.2973, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.08824325510622111, |
|
"grad_norm": 2.3900351524353027, |
|
"learning_rate": 9.125912529014213e-06, |
|
"loss": 0.2954, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.0887004740445953, |
|
"grad_norm": 1.924519419670105, |
|
"learning_rate": 9.12133615482057e-06, |
|
"loss": 0.2989, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.08915769298296951, |
|
"grad_norm": 2.075025796890259, |
|
"learning_rate": 9.116759780626926e-06, |
|
"loss": 0.2974, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.08961491192134372, |
|
"grad_norm": 1.8780020475387573, |
|
"learning_rate": 9.112183406433285e-06, |
|
"loss": 0.2972, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.09007213085971791, |
|
"grad_norm": 1.8856852054595947, |
|
"learning_rate": 9.10760703223964e-06, |
|
"loss": 0.2951, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.09052934979809212, |
|
"grad_norm": 1.982252597808838, |
|
"learning_rate": 9.103030658046e-06, |
|
"loss": 0.2983, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.09098656873646632, |
|
"grad_norm": 1.7523550987243652, |
|
"learning_rate": 9.098454283852355e-06, |
|
"loss": 0.2936, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 0.09144378767484052, |
|
"grad_norm": 1.9436618089675903, |
|
"learning_rate": 9.093877909658713e-06, |
|
"loss": 0.2891, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.09190100661321472, |
|
"grad_norm": 1.929366946220398, |
|
"learning_rate": 9.08930153546507e-06, |
|
"loss": 0.2889, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 0.09235822555158893, |
|
"grad_norm": 2.2968223094940186, |
|
"learning_rate": 9.084725161271427e-06, |
|
"loss": 0.2951, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.09281544448996314, |
|
"grad_norm": 1.944568157196045, |
|
"learning_rate": 9.080148787077784e-06, |
|
"loss": 0.2966, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 0.09327266342833733, |
|
"grad_norm": 1.3778146505355835, |
|
"learning_rate": 9.075572412884142e-06, |
|
"loss": 0.2906, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.09372988236671154, |
|
"grad_norm": 1.755247712135315, |
|
"learning_rate": 9.070996038690499e-06, |
|
"loss": 0.2893, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 0.09418710130508574, |
|
"grad_norm": 1.6563775539398193, |
|
"learning_rate": 9.066419664496856e-06, |
|
"loss": 0.2952, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.09464432024345994, |
|
"grad_norm": 1.7801234722137451, |
|
"learning_rate": 9.061843290303214e-06, |
|
"loss": 0.2925, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 0.09510153918183414, |
|
"grad_norm": 2.3495497703552246, |
|
"learning_rate": 9.05726691610957e-06, |
|
"loss": 0.2928, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.09555875812020835, |
|
"grad_norm": 1.450566053390503, |
|
"learning_rate": 9.052690541915926e-06, |
|
"loss": 0.2845, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 0.09601597705858254, |
|
"grad_norm": 1.4703044891357422, |
|
"learning_rate": 9.048114167722284e-06, |
|
"loss": 0.2875, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.09647319599695675, |
|
"grad_norm": 1.6310155391693115, |
|
"learning_rate": 9.043537793528641e-06, |
|
"loss": 0.2975, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 0.09693041493533096, |
|
"grad_norm": 2.081167459487915, |
|
"learning_rate": 9.038961419334998e-06, |
|
"loss": 0.2935, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.09738763387370515, |
|
"grad_norm": 1.8510127067565918, |
|
"learning_rate": 9.034385045141356e-06, |
|
"loss": 0.2916, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 0.09784485281207936, |
|
"grad_norm": 2.0282094478607178, |
|
"learning_rate": 9.029808670947713e-06, |
|
"loss": 0.2894, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.09830207175045357, |
|
"grad_norm": 1.4554340839385986, |
|
"learning_rate": 9.02523229675407e-06, |
|
"loss": 0.2918, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 0.09875929068882776, |
|
"grad_norm": 1.4794038534164429, |
|
"learning_rate": 9.020655922560427e-06, |
|
"loss": 0.292, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.09921650962720197, |
|
"grad_norm": 1.5430374145507812, |
|
"learning_rate": 9.016079548366785e-06, |
|
"loss": 0.282, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 0.09967372856557617, |
|
"grad_norm": 2.4614310264587402, |
|
"learning_rate": 9.011503174173142e-06, |
|
"loss": 0.2941, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.10013094750395037, |
|
"grad_norm": 1.9759284257888794, |
|
"learning_rate": 9.0069267999795e-06, |
|
"loss": 0.2854, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 0.10058816644232457, |
|
"grad_norm": 1.8766002655029297, |
|
"learning_rate": 9.002350425785855e-06, |
|
"loss": 0.2894, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.10104538538069878, |
|
"grad_norm": 1.603816270828247, |
|
"learning_rate": 8.997774051592214e-06, |
|
"loss": 0.2871, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 0.10150260431907297, |
|
"grad_norm": 1.4415063858032227, |
|
"learning_rate": 8.99319767739857e-06, |
|
"loss": 0.2892, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 0.10195982325744718, |
|
"grad_norm": 2.01898193359375, |
|
"learning_rate": 8.988621303204927e-06, |
|
"loss": 0.286, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 0.10241704219582139, |
|
"grad_norm": 1.7956452369689941, |
|
"learning_rate": 8.984044929011284e-06, |
|
"loss": 0.2876, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.1028742611341956, |
|
"grad_norm": 1.8005551099777222, |
|
"learning_rate": 8.979468554817641e-06, |
|
"loss": 0.2859, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 0.10333148007256979, |
|
"grad_norm": 1.5132607221603394, |
|
"learning_rate": 8.974892180623998e-06, |
|
"loss": 0.2828, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 0.103788699010944, |
|
"grad_norm": 1.9613267183303833, |
|
"learning_rate": 8.970315806430356e-06, |
|
"loss": 0.2814, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 0.1042459179493182, |
|
"grad_norm": 2.240898370742798, |
|
"learning_rate": 8.965739432236713e-06, |
|
"loss": 0.286, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 0.1047031368876924, |
|
"grad_norm": 1.7905975580215454, |
|
"learning_rate": 8.96116305804307e-06, |
|
"loss": 0.2864, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 0.1051603558260666, |
|
"grad_norm": 2.4146153926849365, |
|
"learning_rate": 8.956586683849428e-06, |
|
"loss": 0.2823, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.10561757476444081, |
|
"grad_norm": 2.2988457679748535, |
|
"learning_rate": 8.952010309655785e-06, |
|
"loss": 0.2842, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 0.106074793702815, |
|
"grad_norm": 2.073253631591797, |
|
"learning_rate": 8.94743393546214e-06, |
|
"loss": 0.2811, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 0.10653201264118921, |
|
"grad_norm": 1.5774530172348022, |
|
"learning_rate": 8.9428575612685e-06, |
|
"loss": 0.2843, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 0.10698923157956342, |
|
"grad_norm": 2.8328728675842285, |
|
"learning_rate": 8.938281187074855e-06, |
|
"loss": 0.2847, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 0.10744645051793761, |
|
"grad_norm": 1.9653736352920532, |
|
"learning_rate": 8.933704812881214e-06, |
|
"loss": 0.2826, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 0.10790366945631182, |
|
"grad_norm": 1.9079234600067139, |
|
"learning_rate": 8.92912843868757e-06, |
|
"loss": 0.2799, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 0.10836088839468602, |
|
"grad_norm": 1.7807742357254028, |
|
"learning_rate": 8.924552064493927e-06, |
|
"loss": 0.2788, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 0.10881810733306022, |
|
"grad_norm": 1.857607364654541, |
|
"learning_rate": 8.919975690300284e-06, |
|
"loss": 0.2808, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 0.10927532627143442, |
|
"grad_norm": 1.8199599981307983, |
|
"learning_rate": 8.915399316106641e-06, |
|
"loss": 0.2875, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 0.10973254520980863, |
|
"grad_norm": 1.4623470306396484, |
|
"learning_rate": 8.910822941912999e-06, |
|
"loss": 0.3283, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.11018976414818282, |
|
"grad_norm": 1.5743190050125122, |
|
"learning_rate": 8.906246567719356e-06, |
|
"loss": 0.284, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 0.11064698308655703, |
|
"grad_norm": 1.7710552215576172, |
|
"learning_rate": 8.901670193525711e-06, |
|
"loss": 0.2847, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 0.11110420202493124, |
|
"grad_norm": 1.6554839611053467, |
|
"learning_rate": 8.89709381933207e-06, |
|
"loss": 0.2844, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 0.11156142096330544, |
|
"grad_norm": 1.8272452354431152, |
|
"learning_rate": 8.892517445138426e-06, |
|
"loss": 0.2842, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 0.11201863990167964, |
|
"grad_norm": 1.7126985788345337, |
|
"learning_rate": 8.887941070944785e-06, |
|
"loss": 0.2797, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 0.11247585884005384, |
|
"grad_norm": 2.158935546875, |
|
"learning_rate": 8.88336469675114e-06, |
|
"loss": 0.2771, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 0.11293307777842805, |
|
"grad_norm": 1.8630131483078003, |
|
"learning_rate": 8.878788322557498e-06, |
|
"loss": 0.2785, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 0.11339029671680224, |
|
"grad_norm": 1.4368232488632202, |
|
"learning_rate": 8.874211948363855e-06, |
|
"loss": 0.2835, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 0.11384751565517645, |
|
"grad_norm": 1.773201584815979, |
|
"learning_rate": 8.869635574170212e-06, |
|
"loss": 0.2846, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 0.11430473459355066, |
|
"grad_norm": 2.004790782928467, |
|
"learning_rate": 8.86505919997657e-06, |
|
"loss": 0.2813, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.11476195353192485, |
|
"grad_norm": 1.8280359506607056, |
|
"learning_rate": 8.860482825782927e-06, |
|
"loss": 0.2794, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 0.11521917247029906, |
|
"grad_norm": 1.10916268825531, |
|
"learning_rate": 8.855906451589284e-06, |
|
"loss": 0.2742, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 0.11567639140867327, |
|
"grad_norm": 1.524181604385376, |
|
"learning_rate": 8.851330077395641e-06, |
|
"loss": 0.2778, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 0.11613361034704746, |
|
"grad_norm": 1.8285144567489624, |
|
"learning_rate": 8.846753703201997e-06, |
|
"loss": 0.2781, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 0.11659082928542167, |
|
"grad_norm": 2.387599229812622, |
|
"learning_rate": 8.842177329008356e-06, |
|
"loss": 0.2729, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 0.11704804822379587, |
|
"grad_norm": 1.5542514324188232, |
|
"learning_rate": 8.837600954814712e-06, |
|
"loss": 0.2745, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.11750526716217007, |
|
"grad_norm": 1.4079362154006958, |
|
"learning_rate": 8.83302458062107e-06, |
|
"loss": 0.2815, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 0.11796248610054427, |
|
"grad_norm": 1.8694310188293457, |
|
"learning_rate": 8.828448206427426e-06, |
|
"loss": 0.277, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 0.11841970503891848, |
|
"grad_norm": 1.2781902551651, |
|
"learning_rate": 8.823871832233783e-06, |
|
"loss": 0.2804, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 0.11887692397729267, |
|
"grad_norm": 2.4223721027374268, |
|
"learning_rate": 8.81929545804014e-06, |
|
"loss": 0.282, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.11933414291566688, |
|
"grad_norm": 1.4259532690048218, |
|
"learning_rate": 8.814719083846498e-06, |
|
"loss": 0.2765, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 0.11979136185404109, |
|
"grad_norm": 4.0724568367004395, |
|
"learning_rate": 8.810142709652855e-06, |
|
"loss": 0.2781, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 0.12024858079241528, |
|
"grad_norm": 1.7051255702972412, |
|
"learning_rate": 8.805566335459213e-06, |
|
"loss": 0.2791, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 0.12070579973078949, |
|
"grad_norm": 1.7078741788864136, |
|
"learning_rate": 8.80098996126557e-06, |
|
"loss": 0.2796, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 0.1211630186691637, |
|
"grad_norm": 1.6474307775497437, |
|
"learning_rate": 8.796413587071927e-06, |
|
"loss": 0.271, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 0.1216202376075379, |
|
"grad_norm": 1.9740554094314575, |
|
"learning_rate": 8.791837212878284e-06, |
|
"loss": 0.2802, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 0.1220774565459121, |
|
"grad_norm": 1.4887925386428833, |
|
"learning_rate": 8.787260838684642e-06, |
|
"loss": 0.2707, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 0.1225346754842863, |
|
"grad_norm": 1.815319538116455, |
|
"learning_rate": 8.782684464490999e-06, |
|
"loss": 0.2751, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 0.12299189442266051, |
|
"grad_norm": 2.604151487350464, |
|
"learning_rate": 8.778108090297355e-06, |
|
"loss": 0.2779, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 0.1234491133610347, |
|
"grad_norm": 1.8312991857528687, |
|
"learning_rate": 8.773531716103712e-06, |
|
"loss": 0.2757, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 0.12390633229940891, |
|
"grad_norm": 2.094054698944092, |
|
"learning_rate": 8.768955341910069e-06, |
|
"loss": 0.2788, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 0.12436355123778312, |
|
"grad_norm": 1.7696080207824707, |
|
"learning_rate": 8.764378967716426e-06, |
|
"loss": 0.2717, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.12482077017615731, |
|
"grad_norm": 1.6877754926681519, |
|
"learning_rate": 8.759802593522784e-06, |
|
"loss": 0.2712, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 0.1252779891145315, |
|
"grad_norm": 2.1642048358917236, |
|
"learning_rate": 8.755226219329141e-06, |
|
"loss": 0.2727, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 0.12573520805290572, |
|
"grad_norm": 2.3550350666046143, |
|
"learning_rate": 8.750649845135498e-06, |
|
"loss": 0.2707, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 0.12619242699127992, |
|
"grad_norm": 1.6955220699310303, |
|
"learning_rate": 8.746073470941855e-06, |
|
"loss": 0.2699, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 0.12664964592965414, |
|
"grad_norm": 1.873693823814392, |
|
"learning_rate": 8.741497096748213e-06, |
|
"loss": 0.2679, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 0.12710686486802833, |
|
"grad_norm": 1.5458048582077026, |
|
"learning_rate": 8.73692072255457e-06, |
|
"loss": 0.2698, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 0.12756408380640252, |
|
"grad_norm": 2.3633434772491455, |
|
"learning_rate": 8.732344348360926e-06, |
|
"loss": 0.2708, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 0.12802130274477674, |
|
"grad_norm": 1.4097380638122559, |
|
"learning_rate": 8.727767974167285e-06, |
|
"loss": 0.274, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.12847852168315094, |
|
"grad_norm": 1.7990530729293823, |
|
"learning_rate": 8.72319159997364e-06, |
|
"loss": 0.2706, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 0.12893574062152513, |
|
"grad_norm": 1.9841113090515137, |
|
"learning_rate": 8.718615225779999e-06, |
|
"loss": 0.2739, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 0.12939295955989935, |
|
"grad_norm": 1.222854495048523, |
|
"learning_rate": 8.714038851586355e-06, |
|
"loss": 0.2686, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 0.12985017849827354, |
|
"grad_norm": 1.891701340675354, |
|
"learning_rate": 8.709462477392712e-06, |
|
"loss": 0.2688, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 0.13030739743664774, |
|
"grad_norm": 1.841719627380371, |
|
"learning_rate": 8.70488610319907e-06, |
|
"loss": 0.2695, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 0.13076461637502196, |
|
"grad_norm": 1.5631014108657837, |
|
"learning_rate": 8.700309729005426e-06, |
|
"loss": 0.2706, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 0.13122183531339615, |
|
"grad_norm": 1.9422105550765991, |
|
"learning_rate": 8.695733354811784e-06, |
|
"loss": 0.269, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 0.13167905425177034, |
|
"grad_norm": 1.475142002105713, |
|
"learning_rate": 8.691156980618141e-06, |
|
"loss": 0.2694, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.13213627319014457, |
|
"grad_norm": 2.2062432765960693, |
|
"learning_rate": 8.686580606424498e-06, |
|
"loss": 0.2695, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 0.13259349212851876, |
|
"grad_norm": 1.754489541053772, |
|
"learning_rate": 8.682004232230856e-06, |
|
"loss": 0.2743, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 0.13305071106689295, |
|
"grad_norm": 1.6598039865493774, |
|
"learning_rate": 8.677427858037211e-06, |
|
"loss": 0.269, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 0.13350793000526717, |
|
"grad_norm": 1.045148253440857, |
|
"learning_rate": 8.67285148384357e-06, |
|
"loss": 0.2662, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 0.13396514894364137, |
|
"grad_norm": 1.2887623310089111, |
|
"learning_rate": 8.668275109649926e-06, |
|
"loss": 0.2735, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 0.13442236788201556, |
|
"grad_norm": 1.5989199876785278, |
|
"learning_rate": 8.663698735456285e-06, |
|
"loss": 0.2688, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 0.13487958682038978, |
|
"grad_norm": 1.9200626611709595, |
|
"learning_rate": 8.65912236126264e-06, |
|
"loss": 0.2712, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 0.13533680575876397, |
|
"grad_norm": 1.7635419368743896, |
|
"learning_rate": 8.654545987068998e-06, |
|
"loss": 0.2672, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 0.13579402469713817, |
|
"grad_norm": 1.6450468301773071, |
|
"learning_rate": 8.649969612875355e-06, |
|
"loss": 0.2656, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 0.1362512436355124, |
|
"grad_norm": 2.2584726810455322, |
|
"learning_rate": 8.645393238681712e-06, |
|
"loss": 0.2677, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 0.13670846257388658, |
|
"grad_norm": 1.372758388519287, |
|
"learning_rate": 8.64081686448807e-06, |
|
"loss": 0.2726, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 0.13716568151226077, |
|
"grad_norm": 1.8561943769454956, |
|
"learning_rate": 8.636240490294427e-06, |
|
"loss": 0.2721, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.137622900450635, |
|
"grad_norm": 1.548618197441101, |
|
"learning_rate": 8.631664116100782e-06, |
|
"loss": 0.2676, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 0.1380801193890092, |
|
"grad_norm": 1.3110601902008057, |
|
"learning_rate": 8.627087741907141e-06, |
|
"loss": 0.2661, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 0.13853733832738338, |
|
"grad_norm": 1.4244693517684937, |
|
"learning_rate": 8.622511367713497e-06, |
|
"loss": 0.2712, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 0.1389945572657576, |
|
"grad_norm": 2.187041759490967, |
|
"learning_rate": 8.617934993519856e-06, |
|
"loss": 0.2679, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.1394517762041318, |
|
"grad_norm": 1.7944238185882568, |
|
"learning_rate": 8.613358619326211e-06, |
|
"loss": 0.2682, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 0.139908995142506, |
|
"grad_norm": 1.7159152030944824, |
|
"learning_rate": 8.608782245132569e-06, |
|
"loss": 0.2689, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 0.1403662140808802, |
|
"grad_norm": 1.8711001873016357, |
|
"learning_rate": 8.604205870938926e-06, |
|
"loss": 0.2685, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 0.1408234330192544, |
|
"grad_norm": 1.7059112787246704, |
|
"learning_rate": 8.599629496745283e-06, |
|
"loss": 0.2695, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 0.1412806519576286, |
|
"grad_norm": 1.720859408378601, |
|
"learning_rate": 8.59505312255164e-06, |
|
"loss": 0.2703, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 0.14173787089600282, |
|
"grad_norm": 1.665474772453308, |
|
"learning_rate": 8.590476748357998e-06, |
|
"loss": 0.269, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 0.142195089834377, |
|
"grad_norm": 1.6061115264892578, |
|
"learning_rate": 8.585900374164355e-06, |
|
"loss": 0.2659, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 0.1426523087727512, |
|
"grad_norm": 1.6262190341949463, |
|
"learning_rate": 8.581323999970712e-06, |
|
"loss": 0.2652, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 0.14310952771112542, |
|
"grad_norm": 1.9662021398544312, |
|
"learning_rate": 8.57674762577707e-06, |
|
"loss": 0.2659, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 0.14356674664949962, |
|
"grad_norm": 1.2154645919799805, |
|
"learning_rate": 8.572171251583427e-06, |
|
"loss": 0.2686, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 0.1440239655878738, |
|
"grad_norm": 1.8387107849121094, |
|
"learning_rate": 8.567594877389782e-06, |
|
"loss": 0.2668, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 0.14448118452624803, |
|
"grad_norm": 1.4331964254379272, |
|
"learning_rate": 8.56301850319614e-06, |
|
"loss": 0.2635, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 0.14493840346462222, |
|
"grad_norm": 1.503548502922058, |
|
"learning_rate": 8.558442129002497e-06, |
|
"loss": 0.2681, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 0.14539562240299644, |
|
"grad_norm": 2.2931318283081055, |
|
"learning_rate": 8.553865754808854e-06, |
|
"loss": 0.2657, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 0.14585284134137064, |
|
"grad_norm": 1.415092945098877, |
|
"learning_rate": 8.549289380615212e-06, |
|
"loss": 0.2563, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 0.14631006027974483, |
|
"grad_norm": 1.3481783866882324, |
|
"learning_rate": 8.544713006421569e-06, |
|
"loss": 0.2615, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.14676727921811905, |
|
"grad_norm": 2.6668007373809814, |
|
"learning_rate": 8.540136632227926e-06, |
|
"loss": 0.2689, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 0.14722449815649324, |
|
"grad_norm": 1.9730263948440552, |
|
"learning_rate": 8.535560258034283e-06, |
|
"loss": 0.2625, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 0.14768171709486744, |
|
"grad_norm": 1.5329406261444092, |
|
"learning_rate": 8.53098388384064e-06, |
|
"loss": 0.2583, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 0.14813893603324166, |
|
"grad_norm": 1.8120336532592773, |
|
"learning_rate": 8.526407509646998e-06, |
|
"loss": 0.2626, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 0.14859615497161585, |
|
"grad_norm": 1.5694791078567505, |
|
"learning_rate": 8.521831135453355e-06, |
|
"loss": 0.2638, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 0.14905337390999004, |
|
"grad_norm": 1.6131516695022583, |
|
"learning_rate": 8.517254761259712e-06, |
|
"loss": 0.2616, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 0.14951059284836427, |
|
"grad_norm": 1.7939931154251099, |
|
"learning_rate": 8.51267838706607e-06, |
|
"loss": 0.2632, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 0.14996781178673846, |
|
"grad_norm": 1.0342079401016235, |
|
"learning_rate": 8.508102012872425e-06, |
|
"loss": 0.2646, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 0.15042503072511265, |
|
"grad_norm": 1.1683495044708252, |
|
"learning_rate": 8.503525638678784e-06, |
|
"loss": 0.2607, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 0.15088224966348687, |
|
"grad_norm": 1.189745306968689, |
|
"learning_rate": 8.49894926448514e-06, |
|
"loss": 0.2643, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 0.15133946860186107, |
|
"grad_norm": 1.996500015258789, |
|
"learning_rate": 8.494372890291497e-06, |
|
"loss": 0.2603, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 0.15179668754023526, |
|
"grad_norm": 1.9063647985458374, |
|
"learning_rate": 8.489796516097854e-06, |
|
"loss": 0.2697, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 0.15225390647860948, |
|
"grad_norm": 1.3559688329696655, |
|
"learning_rate": 8.485220141904212e-06, |
|
"loss": 0.2626, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 0.15271112541698367, |
|
"grad_norm": 1.9531289339065552, |
|
"learning_rate": 8.480643767710569e-06, |
|
"loss": 0.2557, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 0.15316834435535787, |
|
"grad_norm": 1.3879919052124023, |
|
"learning_rate": 8.476067393516926e-06, |
|
"loss": 0.258, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 0.1536255632937321, |
|
"grad_norm": 1.7489395141601562, |
|
"learning_rate": 8.471491019323284e-06, |
|
"loss": 0.2579, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 0.15408278223210628, |
|
"grad_norm": 1.183287501335144, |
|
"learning_rate": 8.46691464512964e-06, |
|
"loss": 0.263, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 0.15454000117048047, |
|
"grad_norm": 1.538761019706726, |
|
"learning_rate": 8.462338270935996e-06, |
|
"loss": 0.2596, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 0.1549972201088547, |
|
"grad_norm": 1.6584478616714478, |
|
"learning_rate": 8.457761896742355e-06, |
|
"loss": 0.2594, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 0.1554544390472289, |
|
"grad_norm": 1.4705157279968262, |
|
"learning_rate": 8.453185522548711e-06, |
|
"loss": 0.2537, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.15591165798560308, |
|
"grad_norm": 2.3619368076324463, |
|
"learning_rate": 8.44860914835507e-06, |
|
"loss": 0.2595, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 0.1563688769239773, |
|
"grad_norm": 1.5578237771987915, |
|
"learning_rate": 8.444032774161425e-06, |
|
"loss": 0.2611, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 0.1568260958623515, |
|
"grad_norm": 1.4956451654434204, |
|
"learning_rate": 8.439456399967783e-06, |
|
"loss": 0.2661, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 0.1572833148007257, |
|
"grad_norm": 1.7658261060714722, |
|
"learning_rate": 8.43488002577414e-06, |
|
"loss": 0.2618, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 0.1577405337390999, |
|
"grad_norm": 1.9475387334823608, |
|
"learning_rate": 8.430303651580497e-06, |
|
"loss": 0.2584, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 0.1581977526774741, |
|
"grad_norm": 1.3033366203308105, |
|
"learning_rate": 8.425727277386855e-06, |
|
"loss": 0.2619, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 0.1586549716158483, |
|
"grad_norm": 1.1210391521453857, |
|
"learning_rate": 8.421150903193212e-06, |
|
"loss": 0.2598, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 0.15911219055422252, |
|
"grad_norm": 2.0735795497894287, |
|
"learning_rate": 8.416574528999567e-06, |
|
"loss": 0.259, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 0.1595694094925967, |
|
"grad_norm": 1.4574170112609863, |
|
"learning_rate": 8.411998154805926e-06, |
|
"loss": 0.2606, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 0.1600266284309709, |
|
"grad_norm": 1.5683772563934326, |
|
"learning_rate": 8.407421780612282e-06, |
|
"loss": 0.2609, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 0.16048384736934512, |
|
"grad_norm": 1.9865988492965698, |
|
"learning_rate": 8.402845406418641e-06, |
|
"loss": 0.2613, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 0.16094106630771932, |
|
"grad_norm": 1.9525185823440552, |
|
"learning_rate": 8.398269032224997e-06, |
|
"loss": 0.2546, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.1613982852460935, |
|
"grad_norm": 1.6674350500106812, |
|
"learning_rate": 8.393692658031354e-06, |
|
"loss": 0.256, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 0.16185550418446773, |
|
"grad_norm": 2.0394787788391113, |
|
"learning_rate": 8.389116283837711e-06, |
|
"loss": 0.2629, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 0.16231272312284192, |
|
"grad_norm": 2.1897048950195312, |
|
"learning_rate": 8.384539909644068e-06, |
|
"loss": 0.2559, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 0.16276994206121612, |
|
"grad_norm": 1.0547202825546265, |
|
"learning_rate": 8.379963535450426e-06, |
|
"loss": 0.2593, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 0.16322716099959034, |
|
"grad_norm": 1.8409370183944702, |
|
"learning_rate": 8.375387161256783e-06, |
|
"loss": 0.2621, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 0.16368437993796453, |
|
"grad_norm": 1.753064513206482, |
|
"learning_rate": 8.37081078706314e-06, |
|
"loss": 0.2597, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 0.16414159887633872, |
|
"grad_norm": 1.866620421409607, |
|
"learning_rate": 8.366234412869497e-06, |
|
"loss": 0.2602, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 0.16459881781471294, |
|
"grad_norm": 1.6045613288879395, |
|
"learning_rate": 8.361658038675855e-06, |
|
"loss": 0.2585, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.16505603675308714, |
|
"grad_norm": 1.262148380279541, |
|
"learning_rate": 8.357081664482212e-06, |
|
"loss": 0.2605, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 0.16551325569146136, |
|
"grad_norm": 1.3324670791625977, |
|
"learning_rate": 8.352505290288568e-06, |
|
"loss": 0.259, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 0.16597047462983555, |
|
"grad_norm": 1.5552209615707397, |
|
"learning_rate": 8.347928916094927e-06, |
|
"loss": 0.2536, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 0.16642769356820974, |
|
"grad_norm": 2.5258872509002686, |
|
"learning_rate": 8.343352541901282e-06, |
|
"loss": 0.256, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 0.16688491250658397, |
|
"grad_norm": 1.462498664855957, |
|
"learning_rate": 8.33877616770764e-06, |
|
"loss": 0.2574, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 0.16734213144495816, |
|
"grad_norm": 1.5125452280044556, |
|
"learning_rate": 8.334199793513997e-06, |
|
"loss": 0.2567, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 0.16779935038333235, |
|
"grad_norm": 1.6528276205062866, |
|
"learning_rate": 8.329623419320354e-06, |
|
"loss": 0.2674, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 0.16825656932170657, |
|
"grad_norm": 1.1524349451065063, |
|
"learning_rate": 8.325047045126711e-06, |
|
"loss": 0.257, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 0.16871378826008077, |
|
"grad_norm": 1.5361084938049316, |
|
"learning_rate": 8.320470670933069e-06, |
|
"loss": 0.2617, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 0.16917100719845496, |
|
"grad_norm": 1.7371759414672852, |
|
"learning_rate": 8.315894296739426e-06, |
|
"loss": 0.257, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 0.16962822613682918, |
|
"grad_norm": 2.3449254035949707, |
|
"learning_rate": 8.311317922545783e-06, |
|
"loss": 0.2527, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 0.17008544507520337, |
|
"grad_norm": 1.259590983390808, |
|
"learning_rate": 8.30674154835214e-06, |
|
"loss": 0.2518, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 0.17054266401357757, |
|
"grad_norm": 1.6850295066833496, |
|
"learning_rate": 8.302165174158498e-06, |
|
"loss": 0.2545, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 0.1709998829519518, |
|
"grad_norm": 1.8006367683410645, |
|
"learning_rate": 8.297588799964855e-06, |
|
"loss": 0.2569, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 0.17145710189032598, |
|
"grad_norm": 1.3569294214248657, |
|
"learning_rate": 8.29301242577121e-06, |
|
"loss": 0.2554, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 0.17191432082870017, |
|
"grad_norm": 1.310188889503479, |
|
"learning_rate": 8.288436051577568e-06, |
|
"loss": 0.2593, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 0.1723715397670744, |
|
"grad_norm": 1.741705298423767, |
|
"learning_rate": 8.283859677383925e-06, |
|
"loss": 0.252, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 0.1728287587054486, |
|
"grad_norm": 1.834928035736084, |
|
"learning_rate": 8.279283303190282e-06, |
|
"loss": 0.2516, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 0.17328597764382278, |
|
"grad_norm": 1.4775325059890747, |
|
"learning_rate": 8.27470692899664e-06, |
|
"loss": 0.2567, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 0.173743196582197, |
|
"grad_norm": 1.818657398223877, |
|
"learning_rate": 8.270130554802997e-06, |
|
"loss": 0.26, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 0.1742004155205712, |
|
"grad_norm": 1.9210857152938843, |
|
"learning_rate": 8.265554180609354e-06, |
|
"loss": 0.2496, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 0.1746576344589454, |
|
"grad_norm": 1.676413893699646, |
|
"learning_rate": 8.260977806415711e-06, |
|
"loss": 0.2539, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 0.1751148533973196, |
|
"grad_norm": 2.254531145095825, |
|
"learning_rate": 8.256401432222069e-06, |
|
"loss": 0.2552, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 0.1755720723356938, |
|
"grad_norm": 1.4928869009017944, |
|
"learning_rate": 8.251825058028426e-06, |
|
"loss": 0.2565, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.176029291274068, |
|
"grad_norm": 1.4001063108444214, |
|
"learning_rate": 8.247248683834782e-06, |
|
"loss": 0.2547, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 0.17648651021244222, |
|
"grad_norm": 1.8143495321273804, |
|
"learning_rate": 8.24267230964114e-06, |
|
"loss": 0.2563, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 0.1769437291508164, |
|
"grad_norm": 1.865336537361145, |
|
"learning_rate": 8.238095935447496e-06, |
|
"loss": 0.2568, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 0.1774009480891906, |
|
"grad_norm": 1.7321306467056274, |
|
"learning_rate": 8.233519561253855e-06, |
|
"loss": 0.2561, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 0.17785816702756482, |
|
"grad_norm": 1.6060725450515747, |
|
"learning_rate": 8.22894318706021e-06, |
|
"loss": 0.252, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 0.17831538596593902, |
|
"grad_norm": 1.4754799604415894, |
|
"learning_rate": 8.22436681286657e-06, |
|
"loss": 0.247, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 0.1787726049043132, |
|
"grad_norm": 1.8268160820007324, |
|
"learning_rate": 8.219790438672925e-06, |
|
"loss": 0.2558, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 0.17922982384268743, |
|
"grad_norm": 1.5629231929779053, |
|
"learning_rate": 8.215214064479282e-06, |
|
"loss": 0.2578, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 0.17968704278106162, |
|
"grad_norm": 1.7426457405090332, |
|
"learning_rate": 8.21063769028564e-06, |
|
"loss": 0.2569, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 0.18014426171943582, |
|
"grad_norm": 1.6766743659973145, |
|
"learning_rate": 8.206061316091997e-06, |
|
"loss": 0.2528, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 0.18060148065781004, |
|
"grad_norm": 1.3292638063430786, |
|
"learning_rate": 8.201484941898354e-06, |
|
"loss": 0.2485, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 0.18105869959618423, |
|
"grad_norm": 2.073800563812256, |
|
"learning_rate": 8.196908567704712e-06, |
|
"loss": 0.2538, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 0.18151591853455842, |
|
"grad_norm": 1.4113343954086304, |
|
"learning_rate": 8.192332193511067e-06, |
|
"loss": 0.2536, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 0.18197313747293264, |
|
"grad_norm": 2.1124043464660645, |
|
"learning_rate": 8.187755819317426e-06, |
|
"loss": 0.2564, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 0.18243035641130684, |
|
"grad_norm": 1.423259973526001, |
|
"learning_rate": 8.183179445123782e-06, |
|
"loss": 0.2553, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 0.18288757534968103, |
|
"grad_norm": 1.9814764261245728, |
|
"learning_rate": 8.17860307093014e-06, |
|
"loss": 0.2521, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.18334479428805525, |
|
"grad_norm": 1.2298426628112793, |
|
"learning_rate": 8.174026696736496e-06, |
|
"loss": 0.2539, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 0.18380201322642944, |
|
"grad_norm": 1.2353808879852295, |
|
"learning_rate": 8.169450322542854e-06, |
|
"loss": 0.2589, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 0.18425923216480367, |
|
"grad_norm": 1.585706114768982, |
|
"learning_rate": 8.16487394834921e-06, |
|
"loss": 0.2535, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 0.18471645110317786, |
|
"grad_norm": 1.6619884967803955, |
|
"learning_rate": 8.160297574155568e-06, |
|
"loss": 0.2523, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 0.18517367004155205, |
|
"grad_norm": 1.504461407661438, |
|
"learning_rate": 8.155721199961925e-06, |
|
"loss": 0.2508, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 0.18563088897992627, |
|
"grad_norm": 1.1175047159194946, |
|
"learning_rate": 8.151144825768283e-06, |
|
"loss": 0.2554, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 0.18608810791830047, |
|
"grad_norm": 1.6364964246749878, |
|
"learning_rate": 8.146568451574638e-06, |
|
"loss": 0.2573, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 0.18654532685667466, |
|
"grad_norm": 1.436776876449585, |
|
"learning_rate": 8.141992077380997e-06, |
|
"loss": 0.2527, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 0.18700254579504888, |
|
"grad_norm": 1.684793472290039, |
|
"learning_rate": 8.137415703187353e-06, |
|
"loss": 0.2556, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 0.18745976473342307, |
|
"grad_norm": 2.135289430618286, |
|
"learning_rate": 8.132839328993712e-06, |
|
"loss": 0.2536, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 0.18791698367179727, |
|
"grad_norm": 1.6975624561309814, |
|
"learning_rate": 8.128262954800067e-06, |
|
"loss": 0.2452, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 0.1883742026101715, |
|
"grad_norm": 1.3779131174087524, |
|
"learning_rate": 8.123686580606425e-06, |
|
"loss": 0.2519, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 0.18883142154854568, |
|
"grad_norm": 2.1386914253234863, |
|
"learning_rate": 8.119110206412782e-06, |
|
"loss": 0.2521, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 0.18928864048691987, |
|
"grad_norm": 2.1056151390075684, |
|
"learning_rate": 8.11453383221914e-06, |
|
"loss": 0.2519, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 0.1897458594252941, |
|
"grad_norm": 1.797166109085083, |
|
"learning_rate": 8.109957458025496e-06, |
|
"loss": 0.2498, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 0.1902030783636683, |
|
"grad_norm": 1.8904006481170654, |
|
"learning_rate": 8.105381083831854e-06, |
|
"loss": 0.2537, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.19066029730204248, |
|
"grad_norm": 2.1598122119903564, |
|
"learning_rate": 8.100804709638211e-06, |
|
"loss": 0.2539, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 0.1911175162404167, |
|
"grad_norm": 1.544722318649292, |
|
"learning_rate": 8.096228335444568e-06, |
|
"loss": 0.2486, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 0.1915747351787909, |
|
"grad_norm": 1.8575553894042969, |
|
"learning_rate": 8.091651961250926e-06, |
|
"loss": 0.2531, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 0.1920319541171651, |
|
"grad_norm": 0.9131256341934204, |
|
"learning_rate": 8.087075587057283e-06, |
|
"loss": 0.2485, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 0.1924891730555393, |
|
"grad_norm": 2.0034356117248535, |
|
"learning_rate": 8.08249921286364e-06, |
|
"loss": 0.2522, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 0.1929463919939135, |
|
"grad_norm": 1.5028212070465088, |
|
"learning_rate": 8.077922838669996e-06, |
|
"loss": 0.2462, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 0.1934036109322877, |
|
"grad_norm": 1.4713739156723022, |
|
"learning_rate": 8.073346464476353e-06, |
|
"loss": 0.2483, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 0.19386082987066192, |
|
"grad_norm": 1.6516448259353638, |
|
"learning_rate": 8.06877009028271e-06, |
|
"loss": 0.2446, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 0.1943180488090361, |
|
"grad_norm": 1.0185027122497559, |
|
"learning_rate": 8.064193716089068e-06, |
|
"loss": 0.2465, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 0.1947752677474103, |
|
"grad_norm": 1.6575361490249634, |
|
"learning_rate": 8.059617341895425e-06, |
|
"loss": 0.248, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 0.19523248668578452, |
|
"grad_norm": 1.0781890153884888, |
|
"learning_rate": 8.055040967701782e-06, |
|
"loss": 0.2543, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 0.19568970562415872, |
|
"grad_norm": 1.0661412477493286, |
|
"learning_rate": 8.05046459350814e-06, |
|
"loss": 0.2482, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 0.1961469245625329, |
|
"grad_norm": 2.0978198051452637, |
|
"learning_rate": 8.045888219314497e-06, |
|
"loss": 0.2479, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 0.19660414350090713, |
|
"grad_norm": 1.5128875970840454, |
|
"learning_rate": 8.041311845120854e-06, |
|
"loss": 0.2482, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 0.19706136243928132, |
|
"grad_norm": 1.4031188488006592, |
|
"learning_rate": 8.036735470927211e-06, |
|
"loss": 0.2505, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 0.19751858137765552, |
|
"grad_norm": 1.6590416431427002, |
|
"learning_rate": 8.032159096733568e-06, |
|
"loss": 0.2487, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 0.19797580031602974, |
|
"grad_norm": 1.5777417421340942, |
|
"learning_rate": 8.027582722539926e-06, |
|
"loss": 0.2464, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 0.19843301925440393, |
|
"grad_norm": 1.3186599016189575, |
|
"learning_rate": 8.023006348346281e-06, |
|
"loss": 0.2469, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 0.19889023819277812, |
|
"grad_norm": 1.8318928480148315, |
|
"learning_rate": 8.01842997415264e-06, |
|
"loss": 0.2418, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 0.19934745713115234, |
|
"grad_norm": 1.4368090629577637, |
|
"learning_rate": 8.013853599958996e-06, |
|
"loss": 0.2483, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 0.19980467606952654, |
|
"grad_norm": 1.7631844282150269, |
|
"learning_rate": 8.009277225765353e-06, |
|
"loss": 0.2517, |
|
"step": 218500 |
|
}, |
|
{ |
|
"epoch": 0.20026189500790073, |
|
"grad_norm": 1.421195387840271, |
|
"learning_rate": 8.00470085157171e-06, |
|
"loss": 0.2506, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 0.20071911394627495, |
|
"grad_norm": 2.1690146923065186, |
|
"learning_rate": 8.000124477378068e-06, |
|
"loss": 0.2459, |
|
"step": 219500 |
|
}, |
|
{ |
|
"epoch": 0.20117633288464914, |
|
"grad_norm": 1.6307331323623657, |
|
"learning_rate": 7.995548103184425e-06, |
|
"loss": 0.2499, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 0.20163355182302334, |
|
"grad_norm": 1.4969900846481323, |
|
"learning_rate": 7.990971728990782e-06, |
|
"loss": 0.2504, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 0.20209077076139756, |
|
"grad_norm": 1.8687270879745483, |
|
"learning_rate": 7.98639535479714e-06, |
|
"loss": 0.2429, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 0.20254798969977175, |
|
"grad_norm": 1.7077059745788574, |
|
"learning_rate": 7.981818980603497e-06, |
|
"loss": 0.2428, |
|
"step": 221500 |
|
}, |
|
{ |
|
"epoch": 0.20300520863814595, |
|
"grad_norm": 2.0460216999053955, |
|
"learning_rate": 7.977242606409852e-06, |
|
"loss": 0.2521, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 0.20346242757652017, |
|
"grad_norm": 1.2996711730957031, |
|
"learning_rate": 7.972666232216211e-06, |
|
"loss": 0.2484, |
|
"step": 222500 |
|
}, |
|
{ |
|
"epoch": 0.20391964651489436, |
|
"grad_norm": 1.2837764024734497, |
|
"learning_rate": 7.968089858022567e-06, |
|
"loss": 0.2473, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 0.20437686545326858, |
|
"grad_norm": 1.495692253112793, |
|
"learning_rate": 7.963513483828926e-06, |
|
"loss": 0.2557, |
|
"step": 223500 |
|
}, |
|
{ |
|
"epoch": 0.20483408439164277, |
|
"grad_norm": 1.4509683847427368, |
|
"learning_rate": 7.958937109635281e-06, |
|
"loss": 0.2475, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.20529130333001697, |
|
"grad_norm": 1.1807700395584106, |
|
"learning_rate": 7.954360735441639e-06, |
|
"loss": 0.2467, |
|
"step": 224500 |
|
}, |
|
{ |
|
"epoch": 0.2057485222683912, |
|
"grad_norm": 3.423560619354248, |
|
"learning_rate": 7.949784361247996e-06, |
|
"loss": 0.2477, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 0.20620574120676538, |
|
"grad_norm": 1.9667267799377441, |
|
"learning_rate": 7.945207987054353e-06, |
|
"loss": 0.2473, |
|
"step": 225500 |
|
}, |
|
{ |
|
"epoch": 0.20666296014513957, |
|
"grad_norm": 1.695909023284912, |
|
"learning_rate": 7.94063161286071e-06, |
|
"loss": 0.2485, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 0.2071201790835138, |
|
"grad_norm": 1.5767865180969238, |
|
"learning_rate": 7.936055238667068e-06, |
|
"loss": 0.2462, |
|
"step": 226500 |
|
}, |
|
{ |
|
"epoch": 0.207577398021888, |
|
"grad_norm": 1.427411675453186, |
|
"learning_rate": 7.931478864473423e-06, |
|
"loss": 0.2495, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 0.20803461696026218, |
|
"grad_norm": 1.1181446313858032, |
|
"learning_rate": 7.926902490279782e-06, |
|
"loss": 0.2444, |
|
"step": 227500 |
|
}, |
|
{ |
|
"epoch": 0.2084918358986364, |
|
"grad_norm": 1.3804079294204712, |
|
"learning_rate": 7.922326116086138e-06, |
|
"loss": 0.2459, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 0.2089490548370106, |
|
"grad_norm": 1.2145448923110962, |
|
"learning_rate": 7.917749741892497e-06, |
|
"loss": 0.2458, |
|
"step": 228500 |
|
}, |
|
{ |
|
"epoch": 0.2094062737753848, |
|
"grad_norm": 1.2149016857147217, |
|
"learning_rate": 7.913173367698853e-06, |
|
"loss": 0.2392, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 0.209863492713759, |
|
"grad_norm": 1.4271708726882935, |
|
"learning_rate": 7.908596993505211e-06, |
|
"loss": 0.2439, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 0.2103207116521332, |
|
"grad_norm": 1.336596965789795, |
|
"learning_rate": 7.904020619311567e-06, |
|
"loss": 0.2481, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 0.2107779305905074, |
|
"grad_norm": 1.6744037866592407, |
|
"learning_rate": 7.899444245117924e-06, |
|
"loss": 0.2442, |
|
"step": 230500 |
|
}, |
|
{ |
|
"epoch": 0.21123514952888162, |
|
"grad_norm": 1.5563931465148926, |
|
"learning_rate": 7.894867870924282e-06, |
|
"loss": 0.2498, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 0.2116923684672558, |
|
"grad_norm": 1.8821616172790527, |
|
"learning_rate": 7.890291496730639e-06, |
|
"loss": 0.2443, |
|
"step": 231500 |
|
}, |
|
{ |
|
"epoch": 0.21214958740563, |
|
"grad_norm": 2.037843704223633, |
|
"learning_rate": 7.885715122536996e-06, |
|
"loss": 0.2434, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 0.21260680634400422, |
|
"grad_norm": 1.0804463624954224, |
|
"learning_rate": 7.881138748343353e-06, |
|
"loss": 0.2509, |
|
"step": 232500 |
|
}, |
|
{ |
|
"epoch": 0.21306402528237842, |
|
"grad_norm": 1.5283472537994385, |
|
"learning_rate": 7.87656237414971e-06, |
|
"loss": 0.2436, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 0.2135212442207526, |
|
"grad_norm": 1.7273632287979126, |
|
"learning_rate": 7.871985999956068e-06, |
|
"loss": 0.2477, |
|
"step": 233500 |
|
}, |
|
{ |
|
"epoch": 0.21397846315912683, |
|
"grad_norm": 1.5856326818466187, |
|
"learning_rate": 7.867409625762424e-06, |
|
"loss": 0.2406, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 0.21443568209750102, |
|
"grad_norm": 1.1935285329818726, |
|
"learning_rate": 7.862833251568783e-06, |
|
"loss": 0.2474, |
|
"step": 234500 |
|
}, |
|
{ |
|
"epoch": 0.21489290103587522, |
|
"grad_norm": 1.7221565246582031, |
|
"learning_rate": 7.858256877375138e-06, |
|
"loss": 0.2411, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 0.21535011997424944, |
|
"grad_norm": 1.7638108730316162, |
|
"learning_rate": 7.853680503181495e-06, |
|
"loss": 0.2487, |
|
"step": 235500 |
|
}, |
|
{ |
|
"epoch": 0.21580733891262363, |
|
"grad_norm": 1.392970085144043, |
|
"learning_rate": 7.849104128987853e-06, |
|
"loss": 0.2475, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 0.21626455785099782, |
|
"grad_norm": 1.30288565158844, |
|
"learning_rate": 7.84452775479421e-06, |
|
"loss": 0.2485, |
|
"step": 236500 |
|
}, |
|
{ |
|
"epoch": 0.21672177678937204, |
|
"grad_norm": 1.2558834552764893, |
|
"learning_rate": 7.839951380600567e-06, |
|
"loss": 0.246, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 0.21717899572774624, |
|
"grad_norm": 9.420547485351562, |
|
"learning_rate": 7.835375006406925e-06, |
|
"loss": 0.2465, |
|
"step": 237500 |
|
}, |
|
{ |
|
"epoch": 0.21763621466612043, |
|
"grad_norm": 1.3113701343536377, |
|
"learning_rate": 7.830798632213282e-06, |
|
"loss": 0.2479, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 0.21809343360449465, |
|
"grad_norm": 1.3305801153182983, |
|
"learning_rate": 7.826222258019639e-06, |
|
"loss": 0.2454, |
|
"step": 238500 |
|
}, |
|
{ |
|
"epoch": 0.21855065254286885, |
|
"grad_norm": 1.7414227724075317, |
|
"learning_rate": 7.821645883825996e-06, |
|
"loss": 0.2419, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 0.21900787148124304, |
|
"grad_norm": 2.2423360347747803, |
|
"learning_rate": 7.817069509632354e-06, |
|
"loss": 0.245, |
|
"step": 239500 |
|
}, |
|
{ |
|
"epoch": 0.21946509041961726, |
|
"grad_norm": 1.4997841119766235, |
|
"learning_rate": 7.812493135438711e-06, |
|
"loss": 0.2454, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.21992230935799145, |
|
"grad_norm": 1.442734718322754, |
|
"learning_rate": 7.807916761245066e-06, |
|
"loss": 0.2411, |
|
"step": 240500 |
|
}, |
|
{ |
|
"epoch": 0.22037952829636565, |
|
"grad_norm": 1.715790033340454, |
|
"learning_rate": 7.803340387051424e-06, |
|
"loss": 0.2453, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 0.22083674723473987, |
|
"grad_norm": 1.3321577310562134, |
|
"learning_rate": 7.798764012857781e-06, |
|
"loss": 0.2493, |
|
"step": 241500 |
|
}, |
|
{ |
|
"epoch": 0.22129396617311406, |
|
"grad_norm": 1.7420936822891235, |
|
"learning_rate": 7.794187638664138e-06, |
|
"loss": 0.2388, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 0.22175118511148825, |
|
"grad_norm": 1.81510329246521, |
|
"learning_rate": 7.789611264470496e-06, |
|
"loss": 0.2473, |
|
"step": 242500 |
|
}, |
|
{ |
|
"epoch": 0.22220840404986247, |
|
"grad_norm": 1.5320991277694702, |
|
"learning_rate": 7.785034890276853e-06, |
|
"loss": 0.245, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 0.22266562298823667, |
|
"grad_norm": 1.9116175174713135, |
|
"learning_rate": 7.78045851608321e-06, |
|
"loss": 0.2387, |
|
"step": 243500 |
|
}, |
|
{ |
|
"epoch": 0.2231228419266109, |
|
"grad_norm": 1.2568988800048828, |
|
"learning_rate": 7.775882141889567e-06, |
|
"loss": 0.2426, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 0.22358006086498508, |
|
"grad_norm": 1.2286899089813232, |
|
"learning_rate": 7.771305767695925e-06, |
|
"loss": 0.237, |
|
"step": 244500 |
|
}, |
|
{ |
|
"epoch": 0.22403727980335927, |
|
"grad_norm": 1.5561753511428833, |
|
"learning_rate": 7.766729393502282e-06, |
|
"loss": 0.241, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 0.2244944987417335, |
|
"grad_norm": 1.5937217473983765, |
|
"learning_rate": 7.76215301930864e-06, |
|
"loss": 0.2478, |
|
"step": 245500 |
|
}, |
|
{ |
|
"epoch": 0.2249517176801077, |
|
"grad_norm": 1.533897042274475, |
|
"learning_rate": 7.757576645114997e-06, |
|
"loss": 0.2402, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 0.22540893661848188, |
|
"grad_norm": 1.7771514654159546, |
|
"learning_rate": 7.753000270921352e-06, |
|
"loss": 0.2472, |
|
"step": 246500 |
|
}, |
|
{ |
|
"epoch": 0.2258661555568561, |
|
"grad_norm": 1.8437062501907349, |
|
"learning_rate": 7.748423896727711e-06, |
|
"loss": 0.2441, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 0.2263233744952303, |
|
"grad_norm": 0.9731696844100952, |
|
"learning_rate": 7.743847522534067e-06, |
|
"loss": 0.2422, |
|
"step": 247500 |
|
}, |
|
{ |
|
"epoch": 0.2267805934336045, |
|
"grad_norm": 1.3486838340759277, |
|
"learning_rate": 7.739271148340426e-06, |
|
"loss": 0.2404, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 0.2272378123719787, |
|
"grad_norm": 1.1618529558181763, |
|
"learning_rate": 7.734694774146781e-06, |
|
"loss": 0.2431, |
|
"step": 248500 |
|
}, |
|
{ |
|
"epoch": 0.2276950313103529, |
|
"grad_norm": 1.7412848472595215, |
|
"learning_rate": 7.730118399953138e-06, |
|
"loss": 0.2398, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 0.2281522502487271, |
|
"grad_norm": 1.6766868829727173, |
|
"learning_rate": 7.725542025759496e-06, |
|
"loss": 0.2409, |
|
"step": 249500 |
|
}, |
|
{ |
|
"epoch": 0.22860946918710132, |
|
"grad_norm": 1.1682603359222412, |
|
"learning_rate": 7.720965651565853e-06, |
|
"loss": 0.2429, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 0.2290666881254755, |
|
"grad_norm": 1.1686651706695557, |
|
"learning_rate": 7.71638927737221e-06, |
|
"loss": 0.247, |
|
"step": 250500 |
|
}, |
|
{ |
|
"epoch": 0.2295239070638497, |
|
"grad_norm": 1.0763442516326904, |
|
"learning_rate": 7.711812903178568e-06, |
|
"loss": 0.2405, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 0.22998112600222392, |
|
"grad_norm": 1.3527404069900513, |
|
"learning_rate": 7.707236528984923e-06, |
|
"loss": 0.2397, |
|
"step": 251500 |
|
}, |
|
{ |
|
"epoch": 0.23043834494059812, |
|
"grad_norm": 1.5660016536712646, |
|
"learning_rate": 7.702660154791282e-06, |
|
"loss": 0.2465, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 0.2308955638789723, |
|
"grad_norm": 1.876938819885254, |
|
"learning_rate": 7.698083780597638e-06, |
|
"loss": 0.2413, |
|
"step": 252500 |
|
}, |
|
{ |
|
"epoch": 0.23135278281734653, |
|
"grad_norm": 1.446905255317688, |
|
"learning_rate": 7.693507406403997e-06, |
|
"loss": 0.2422, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 0.23181000175572072, |
|
"grad_norm": 1.2305630445480347, |
|
"learning_rate": 7.688931032210352e-06, |
|
"loss": 0.2435, |
|
"step": 253500 |
|
}, |
|
{ |
|
"epoch": 0.23226722069409492, |
|
"grad_norm": 1.6017937660217285, |
|
"learning_rate": 7.68435465801671e-06, |
|
"loss": 0.2375, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 0.23272443963246914, |
|
"grad_norm": 1.593798041343689, |
|
"learning_rate": 7.679778283823067e-06, |
|
"loss": 0.2413, |
|
"step": 254500 |
|
}, |
|
{ |
|
"epoch": 0.23318165857084333, |
|
"grad_norm": 1.7218447923660278, |
|
"learning_rate": 7.675201909629424e-06, |
|
"loss": 0.2406, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 0.23363887750921752, |
|
"grad_norm": 1.631316900253296, |
|
"learning_rate": 7.670625535435781e-06, |
|
"loss": 0.2391, |
|
"step": 255500 |
|
}, |
|
{ |
|
"epoch": 0.23409609644759174, |
|
"grad_norm": 1.3699698448181152, |
|
"learning_rate": 7.666049161242139e-06, |
|
"loss": 0.2406, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 0.23455331538596594, |
|
"grad_norm": 1.853630542755127, |
|
"learning_rate": 7.661472787048494e-06, |
|
"loss": 0.2396, |
|
"step": 256500 |
|
}, |
|
{ |
|
"epoch": 0.23501053432434013, |
|
"grad_norm": 1.54131019115448, |
|
"learning_rate": 7.656896412854853e-06, |
|
"loss": 0.2435, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 0.23546775326271435, |
|
"grad_norm": 1.9329149723052979, |
|
"learning_rate": 7.652320038661209e-06, |
|
"loss": 0.2384, |
|
"step": 257500 |
|
}, |
|
{ |
|
"epoch": 0.23592497220108855, |
|
"grad_norm": 1.2017878293991089, |
|
"learning_rate": 7.647743664467568e-06, |
|
"loss": 0.2427, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 0.23638219113946274, |
|
"grad_norm": 1.0747284889221191, |
|
"learning_rate": 7.643167290273923e-06, |
|
"loss": 0.2381, |
|
"step": 258500 |
|
}, |
|
{ |
|
"epoch": 0.23683941007783696, |
|
"grad_norm": 1.9844415187835693, |
|
"learning_rate": 7.63859091608028e-06, |
|
"loss": 0.2357, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 0.23729662901621115, |
|
"grad_norm": 1.74272620677948, |
|
"learning_rate": 7.634014541886638e-06, |
|
"loss": 0.2429, |
|
"step": 259500 |
|
}, |
|
{ |
|
"epoch": 0.23775384795458535, |
|
"grad_norm": 1.6719539165496826, |
|
"learning_rate": 7.629438167692995e-06, |
|
"loss": 0.237, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 0.23821106689295957, |
|
"grad_norm": 1.420264720916748, |
|
"learning_rate": 7.624861793499353e-06, |
|
"loss": 0.2392, |
|
"step": 260500 |
|
}, |
|
{ |
|
"epoch": 0.23866828583133376, |
|
"grad_norm": 1.3896255493164062, |
|
"learning_rate": 7.62028541930571e-06, |
|
"loss": 0.239, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 0.23912550476970795, |
|
"grad_norm": 1.210802674293518, |
|
"learning_rate": 7.615709045112066e-06, |
|
"loss": 0.2407, |
|
"step": 261500 |
|
}, |
|
{ |
|
"epoch": 0.23958272370808217, |
|
"grad_norm": 1.5185495615005493, |
|
"learning_rate": 7.611132670918424e-06, |
|
"loss": 0.2363, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 0.24003994264645637, |
|
"grad_norm": 1.4552907943725586, |
|
"learning_rate": 7.606556296724781e-06, |
|
"loss": 0.2423, |
|
"step": 262500 |
|
}, |
|
{ |
|
"epoch": 0.24049716158483056, |
|
"grad_norm": 1.2917208671569824, |
|
"learning_rate": 7.601979922531139e-06, |
|
"loss": 0.2423, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 0.24095438052320478, |
|
"grad_norm": 1.4719345569610596, |
|
"learning_rate": 7.597403548337495e-06, |
|
"loss": 0.2375, |
|
"step": 263500 |
|
}, |
|
{ |
|
"epoch": 0.24141159946157897, |
|
"grad_norm": 1.5045033693313599, |
|
"learning_rate": 7.592827174143853e-06, |
|
"loss": 0.2404, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 0.24186881839995317, |
|
"grad_norm": 1.222699761390686, |
|
"learning_rate": 7.58825079995021e-06, |
|
"loss": 0.2428, |
|
"step": 264500 |
|
}, |
|
{ |
|
"epoch": 0.2423260373383274, |
|
"grad_norm": 1.681038498878479, |
|
"learning_rate": 7.583674425756566e-06, |
|
"loss": 0.2424, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 0.24278325627670158, |
|
"grad_norm": 1.6437132358551025, |
|
"learning_rate": 7.579098051562924e-06, |
|
"loss": 0.2363, |
|
"step": 265500 |
|
}, |
|
{ |
|
"epoch": 0.2432404752150758, |
|
"grad_norm": 1.6310714483261108, |
|
"learning_rate": 7.574521677369281e-06, |
|
"loss": 0.2334, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 0.24369769415345, |
|
"grad_norm": 0.9880791902542114, |
|
"learning_rate": 7.569945303175639e-06, |
|
"loss": 0.2398, |
|
"step": 266500 |
|
}, |
|
{ |
|
"epoch": 0.2441549130918242, |
|
"grad_norm": 1.508718729019165, |
|
"learning_rate": 7.565368928981995e-06, |
|
"loss": 0.2388, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 0.2446121320301984, |
|
"grad_norm": 2.1801445484161377, |
|
"learning_rate": 7.560792554788352e-06, |
|
"loss": 0.2414, |
|
"step": 267500 |
|
}, |
|
{ |
|
"epoch": 0.2450693509685726, |
|
"grad_norm": 1.4477598667144775, |
|
"learning_rate": 7.55621618059471e-06, |
|
"loss": 0.2414, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 0.2455265699069468, |
|
"grad_norm": 1.3772883415222168, |
|
"learning_rate": 7.551639806401066e-06, |
|
"loss": 0.2358, |
|
"step": 268500 |
|
}, |
|
{ |
|
"epoch": 0.24598378884532102, |
|
"grad_norm": 1.6583518981933594, |
|
"learning_rate": 7.5470634322074244e-06, |
|
"loss": 0.2402, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 0.2464410077836952, |
|
"grad_norm": 1.6297439336776733, |
|
"learning_rate": 7.542487058013781e-06, |
|
"loss": 0.2406, |
|
"step": 269500 |
|
}, |
|
{ |
|
"epoch": 0.2468982267220694, |
|
"grad_norm": 1.544605016708374, |
|
"learning_rate": 7.537910683820137e-06, |
|
"loss": 0.2401, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 0.24735544566044362, |
|
"grad_norm": 2.170027256011963, |
|
"learning_rate": 7.533334309626495e-06, |
|
"loss": 0.2433, |
|
"step": 270500 |
|
}, |
|
{ |
|
"epoch": 0.24781266459881782, |
|
"grad_norm": 1.6215145587921143, |
|
"learning_rate": 7.528757935432852e-06, |
|
"loss": 0.2361, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 0.248269883537192, |
|
"grad_norm": 1.4996685981750488, |
|
"learning_rate": 7.52418156123921e-06, |
|
"loss": 0.2366, |
|
"step": 271500 |
|
}, |
|
{ |
|
"epoch": 0.24872710247556623, |
|
"grad_norm": 1.610382080078125, |
|
"learning_rate": 7.519605187045566e-06, |
|
"loss": 0.2429, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 0.24918432141394042, |
|
"grad_norm": 1.7235709428787231, |
|
"learning_rate": 7.515028812851924e-06, |
|
"loss": 0.2395, |
|
"step": 272500 |
|
}, |
|
{ |
|
"epoch": 0.24964154035231462, |
|
"grad_norm": 1.6454797983169556, |
|
"learning_rate": 7.510452438658281e-06, |
|
"loss": 0.2352, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 0.2500987592906888, |
|
"grad_norm": 1.6150950193405151, |
|
"learning_rate": 7.505876064464637e-06, |
|
"loss": 0.2431, |
|
"step": 273500 |
|
}, |
|
{ |
|
"epoch": 0.250555978229063, |
|
"grad_norm": 1.4403808116912842, |
|
"learning_rate": 7.5012996902709955e-06, |
|
"loss": 0.2377, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 0.25101319716743725, |
|
"grad_norm": 7.061529636383057, |
|
"learning_rate": 7.496723316077352e-06, |
|
"loss": 0.2444, |
|
"step": 274500 |
|
}, |
|
{ |
|
"epoch": 0.25147041610581145, |
|
"grad_norm": 1.3562450408935547, |
|
"learning_rate": 7.492146941883709e-06, |
|
"loss": 0.2329, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 0.25192763504418564, |
|
"grad_norm": 1.495060920715332, |
|
"learning_rate": 7.4875705676900665e-06, |
|
"loss": 0.2419, |
|
"step": 275500 |
|
}, |
|
{ |
|
"epoch": 0.25238485398255983, |
|
"grad_norm": 1.5048260688781738, |
|
"learning_rate": 7.482994193496424e-06, |
|
"loss": 0.2341, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 0.252842072920934, |
|
"grad_norm": 1.3745087385177612, |
|
"learning_rate": 7.478417819302781e-06, |
|
"loss": 0.239, |
|
"step": 276500 |
|
}, |
|
{ |
|
"epoch": 0.2532992918593083, |
|
"grad_norm": 1.4182616472244263, |
|
"learning_rate": 7.473841445109138e-06, |
|
"loss": 0.2339, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 0.25375651079768247, |
|
"grad_norm": 1.4499032497406006, |
|
"learning_rate": 7.469265070915495e-06, |
|
"loss": 0.2418, |
|
"step": 277500 |
|
}, |
|
{ |
|
"epoch": 0.25421372973605666, |
|
"grad_norm": 1.2812670469284058, |
|
"learning_rate": 7.464688696721852e-06, |
|
"loss": 0.2341, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 0.25467094867443085, |
|
"grad_norm": 1.7163888216018677, |
|
"learning_rate": 7.460112322528209e-06, |
|
"loss": 0.239, |
|
"step": 278500 |
|
}, |
|
{ |
|
"epoch": 0.25512816761280505, |
|
"grad_norm": 1.596152424812317, |
|
"learning_rate": 7.4555359483345666e-06, |
|
"loss": 0.2364, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 0.25558538655117924, |
|
"grad_norm": 1.5450259447097778, |
|
"learning_rate": 7.450959574140924e-06, |
|
"loss": 0.2417, |
|
"step": 279500 |
|
}, |
|
{ |
|
"epoch": 0.2560426054895535, |
|
"grad_norm": 1.5092369318008423, |
|
"learning_rate": 7.446383199947281e-06, |
|
"loss": 0.239, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 0.2564998244279277, |
|
"grad_norm": 1.8400509357452393, |
|
"learning_rate": 7.441806825753638e-06, |
|
"loss": 0.239, |
|
"step": 280500 |
|
}, |
|
{ |
|
"epoch": 0.2569570433663019, |
|
"grad_norm": 1.4208296537399292, |
|
"learning_rate": 7.437230451559995e-06, |
|
"loss": 0.2384, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 0.25741426230467607, |
|
"grad_norm": 1.839404821395874, |
|
"learning_rate": 7.432654077366352e-06, |
|
"loss": 0.2413, |
|
"step": 281500 |
|
}, |
|
{ |
|
"epoch": 0.25787148124305026, |
|
"grad_norm": 1.3527752161026, |
|
"learning_rate": 7.428077703172709e-06, |
|
"loss": 0.2366, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 0.25832870018142445, |
|
"grad_norm": 1.7706711292266846, |
|
"learning_rate": 7.423501328979067e-06, |
|
"loss": 0.2387, |
|
"step": 282500 |
|
}, |
|
{ |
|
"epoch": 0.2587859191197987, |
|
"grad_norm": 1.1660232543945312, |
|
"learning_rate": 7.418924954785424e-06, |
|
"loss": 0.2381, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 0.2592431380581729, |
|
"grad_norm": 1.6995941400527954, |
|
"learning_rate": 7.41434858059178e-06, |
|
"loss": 0.2333, |
|
"step": 283500 |
|
}, |
|
{ |
|
"epoch": 0.2597003569965471, |
|
"grad_norm": 1.5616917610168457, |
|
"learning_rate": 7.4097722063981385e-06, |
|
"loss": 0.2354, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 0.2601575759349213, |
|
"grad_norm": 1.7792470455169678, |
|
"learning_rate": 7.405195832204495e-06, |
|
"loss": 0.2338, |
|
"step": 284500 |
|
}, |
|
{ |
|
"epoch": 0.2606147948732955, |
|
"grad_norm": 1.0877039432525635, |
|
"learning_rate": 7.400619458010852e-06, |
|
"loss": 0.231, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 0.26107201381166967, |
|
"grad_norm": 1.5051804780960083, |
|
"learning_rate": 7.3960430838172095e-06, |
|
"loss": 0.2325, |
|
"step": 285500 |
|
}, |
|
{ |
|
"epoch": 0.2615292327500439, |
|
"grad_norm": 1.912229061126709, |
|
"learning_rate": 7.391466709623566e-06, |
|
"loss": 0.2329, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 0.2619864516884181, |
|
"grad_norm": 1.576975703239441, |
|
"learning_rate": 7.386890335429924e-06, |
|
"loss": 0.2341, |
|
"step": 286500 |
|
}, |
|
{ |
|
"epoch": 0.2624436706267923, |
|
"grad_norm": 1.5463943481445312, |
|
"learning_rate": 7.3823139612362804e-06, |
|
"loss": 0.2363, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 0.2629008895651665, |
|
"grad_norm": 1.4972643852233887, |
|
"learning_rate": 7.3777375870426386e-06, |
|
"loss": 0.2358, |
|
"step": 287500 |
|
}, |
|
{ |
|
"epoch": 0.2633581085035407, |
|
"grad_norm": 1.4807320833206177, |
|
"learning_rate": 7.373161212848995e-06, |
|
"loss": 0.2372, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 0.2638153274419149, |
|
"grad_norm": 1.362641453742981, |
|
"learning_rate": 7.368584838655351e-06, |
|
"loss": 0.2369, |
|
"step": 288500 |
|
}, |
|
{ |
|
"epoch": 0.26427254638028913, |
|
"grad_norm": 2.3555381298065186, |
|
"learning_rate": 7.3640084644617095e-06, |
|
"loss": 0.2376, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 0.2647297653186633, |
|
"grad_norm": 1.4322718381881714, |
|
"learning_rate": 7.359432090268066e-06, |
|
"loss": 0.2344, |
|
"step": 289500 |
|
}, |
|
{ |
|
"epoch": 0.2651869842570375, |
|
"grad_norm": 1.7021692991256714, |
|
"learning_rate": 7.354855716074424e-06, |
|
"loss": 0.2383, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 0.2656442031954117, |
|
"grad_norm": 1.3686518669128418, |
|
"learning_rate": 7.3502793418807805e-06, |
|
"loss": 0.233, |
|
"step": 290500 |
|
}, |
|
{ |
|
"epoch": 0.2661014221337859, |
|
"grad_norm": 1.8416357040405273, |
|
"learning_rate": 7.345702967687137e-06, |
|
"loss": 0.2354, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 0.2665586410721601, |
|
"grad_norm": 1.4981660842895508, |
|
"learning_rate": 7.341126593493495e-06, |
|
"loss": 0.2382, |
|
"step": 291500 |
|
}, |
|
{ |
|
"epoch": 0.26701586001053434, |
|
"grad_norm": 1.022232174873352, |
|
"learning_rate": 7.3365502192998515e-06, |
|
"loss": 0.2325, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 0.26747307894890854, |
|
"grad_norm": 1.6213542222976685, |
|
"learning_rate": 7.33197384510621e-06, |
|
"loss": 0.2357, |
|
"step": 292500 |
|
}, |
|
{ |
|
"epoch": 0.26793029788728273, |
|
"grad_norm": 1.7134053707122803, |
|
"learning_rate": 7.327397470912566e-06, |
|
"loss": 0.2387, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 0.2683875168256569, |
|
"grad_norm": 1.051689863204956, |
|
"learning_rate": 7.3228210967189225e-06, |
|
"loss": 0.2318, |
|
"step": 293500 |
|
}, |
|
{ |
|
"epoch": 0.2688447357640311, |
|
"grad_norm": 1.5515960454940796, |
|
"learning_rate": 7.318244722525281e-06, |
|
"loss": 0.2377, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 0.2693019547024053, |
|
"grad_norm": 1.414265513420105, |
|
"learning_rate": 7.313668348331637e-06, |
|
"loss": 0.2368, |
|
"step": 294500 |
|
}, |
|
{ |
|
"epoch": 0.26975917364077956, |
|
"grad_norm": 3.989739418029785, |
|
"learning_rate": 7.309091974137995e-06, |
|
"loss": 0.2372, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 0.27021639257915375, |
|
"grad_norm": 1.0414005517959595, |
|
"learning_rate": 7.304515599944352e-06, |
|
"loss": 0.2398, |
|
"step": 295500 |
|
}, |
|
{ |
|
"epoch": 0.27067361151752795, |
|
"grad_norm": 2.2172224521636963, |
|
"learning_rate": 7.29993922575071e-06, |
|
"loss": 0.2375, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 0.27113083045590214, |
|
"grad_norm": 1.6848254203796387, |
|
"learning_rate": 7.295362851557066e-06, |
|
"loss": 0.2349, |
|
"step": 296500 |
|
}, |
|
{ |
|
"epoch": 0.27158804939427633, |
|
"grad_norm": 1.2511268854141235, |
|
"learning_rate": 7.2907864773634226e-06, |
|
"loss": 0.2331, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 0.2720452683326506, |
|
"grad_norm": 1.4679317474365234, |
|
"learning_rate": 7.286210103169781e-06, |
|
"loss": 0.2452, |
|
"step": 297500 |
|
}, |
|
{ |
|
"epoch": 0.2725024872710248, |
|
"grad_norm": 1.6700774431228638, |
|
"learning_rate": 7.281633728976137e-06, |
|
"loss": 0.2329, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 0.27295970620939897, |
|
"grad_norm": 1.0634691715240479, |
|
"learning_rate": 7.277057354782495e-06, |
|
"loss": 0.2339, |
|
"step": 298500 |
|
}, |
|
{ |
|
"epoch": 0.27341692514777316, |
|
"grad_norm": 1.706181287765503, |
|
"learning_rate": 7.272480980588852e-06, |
|
"loss": 0.2382, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 0.27387414408614735, |
|
"grad_norm": 1.1611428260803223, |
|
"learning_rate": 7.267904606395209e-06, |
|
"loss": 0.2377, |
|
"step": 299500 |
|
}, |
|
{ |
|
"epoch": 0.27433136302452155, |
|
"grad_norm": 1.2169976234436035, |
|
"learning_rate": 7.263328232201566e-06, |
|
"loss": 0.2326, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 0.2747885819628958, |
|
"grad_norm": 1.947888970375061, |
|
"learning_rate": 7.258751858007923e-06, |
|
"loss": 0.2316, |
|
"step": 300500 |
|
}, |
|
{ |
|
"epoch": 0.27524580090127, |
|
"grad_norm": 1.2794651985168457, |
|
"learning_rate": 7.254175483814281e-06, |
|
"loss": 0.2373, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 0.2757030198396442, |
|
"grad_norm": 1.6995222568511963, |
|
"learning_rate": 7.249599109620637e-06, |
|
"loss": 0.2338, |
|
"step": 301500 |
|
}, |
|
{ |
|
"epoch": 0.2761602387780184, |
|
"grad_norm": 1.4030494689941406, |
|
"learning_rate": 7.2450227354269945e-06, |
|
"loss": 0.2353, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 0.27661745771639257, |
|
"grad_norm": 1.529697299003601, |
|
"learning_rate": 7.240446361233352e-06, |
|
"loss": 0.2318, |
|
"step": 302500 |
|
}, |
|
{ |
|
"epoch": 0.27707467665476676, |
|
"grad_norm": 1.3057571649551392, |
|
"learning_rate": 7.235869987039709e-06, |
|
"loss": 0.2331, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 0.277531895593141, |
|
"grad_norm": 1.6574506759643555, |
|
"learning_rate": 7.231293612846066e-06, |
|
"loss": 0.2373, |
|
"step": 303500 |
|
}, |
|
{ |
|
"epoch": 0.2779891145315152, |
|
"grad_norm": 1.468015432357788, |
|
"learning_rate": 7.226717238652423e-06, |
|
"loss": 0.2339, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 0.2784463334698894, |
|
"grad_norm": 1.5622738599777222, |
|
"learning_rate": 7.22214086445878e-06, |
|
"loss": 0.239, |
|
"step": 304500 |
|
}, |
|
{ |
|
"epoch": 0.2789035524082636, |
|
"grad_norm": 1.5072553157806396, |
|
"learning_rate": 7.217564490265137e-06, |
|
"loss": 0.2345, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 0.2793607713466378, |
|
"grad_norm": 1.3062992095947266, |
|
"learning_rate": 7.2129881160714946e-06, |
|
"loss": 0.2327, |
|
"step": 305500 |
|
}, |
|
{ |
|
"epoch": 0.279817990285012, |
|
"grad_norm": 1.2529741525650024, |
|
"learning_rate": 7.208411741877852e-06, |
|
"loss": 0.2317, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 0.2802752092233862, |
|
"grad_norm": 1.3761204481124878, |
|
"learning_rate": 7.203835367684209e-06, |
|
"loss": 0.2323, |
|
"step": 306500 |
|
}, |
|
{ |
|
"epoch": 0.2807324281617604, |
|
"grad_norm": 1.5438259840011597, |
|
"learning_rate": 7.1992589934905655e-06, |
|
"loss": 0.2314, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 0.2811896471001346, |
|
"grad_norm": 1.3885058164596558, |
|
"learning_rate": 7.194682619296924e-06, |
|
"loss": 0.2381, |
|
"step": 307500 |
|
}, |
|
{ |
|
"epoch": 0.2816468660385088, |
|
"grad_norm": 2.722839117050171, |
|
"learning_rate": 7.19010624510328e-06, |
|
"loss": 0.2352, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 0.282104084976883, |
|
"grad_norm": 1.6473603248596191, |
|
"learning_rate": 7.185529870909637e-06, |
|
"loss": 0.2267, |
|
"step": 308500 |
|
}, |
|
{ |
|
"epoch": 0.2825613039152572, |
|
"grad_norm": 4.843954086303711, |
|
"learning_rate": 7.180953496715995e-06, |
|
"loss": 0.2323, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 0.28301852285363144, |
|
"grad_norm": 1.541217565536499, |
|
"learning_rate": 7.176377122522351e-06, |
|
"loss": 0.2249, |
|
"step": 309500 |
|
}, |
|
{ |
|
"epoch": 0.28347574179200563, |
|
"grad_norm": 3.621250867843628, |
|
"learning_rate": 7.171800748328709e-06, |
|
"loss": 0.2354, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 0.2839329607303798, |
|
"grad_norm": 1.7025402784347534, |
|
"learning_rate": 7.167224374135066e-06, |
|
"loss": 0.2367, |
|
"step": 310500 |
|
}, |
|
{ |
|
"epoch": 0.284390179668754, |
|
"grad_norm": 1.3267154693603516, |
|
"learning_rate": 7.162647999941424e-06, |
|
"loss": 0.2346, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 0.2848473986071282, |
|
"grad_norm": 1.163634181022644, |
|
"learning_rate": 7.15807162574778e-06, |
|
"loss": 0.2317, |
|
"step": 311500 |
|
}, |
|
{ |
|
"epoch": 0.2853046175455024, |
|
"grad_norm": 1.2446917295455933, |
|
"learning_rate": 7.153495251554137e-06, |
|
"loss": 0.2336, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 0.28576183648387665, |
|
"grad_norm": 1.2674944400787354, |
|
"learning_rate": 7.148918877360495e-06, |
|
"loss": 0.235, |
|
"step": 312500 |
|
}, |
|
{ |
|
"epoch": 0.28621905542225085, |
|
"grad_norm": 1.5385478734970093, |
|
"learning_rate": 7.144342503166851e-06, |
|
"loss": 0.2352, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 0.28667627436062504, |
|
"grad_norm": 1.1245741844177246, |
|
"learning_rate": 7.139766128973209e-06, |
|
"loss": 0.2357, |
|
"step": 313500 |
|
}, |
|
{ |
|
"epoch": 0.28713349329899923, |
|
"grad_norm": 1.405200719833374, |
|
"learning_rate": 7.135189754779566e-06, |
|
"loss": 0.2317, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 0.2875907122373734, |
|
"grad_norm": 1.4755611419677734, |
|
"learning_rate": 7.130613380585924e-06, |
|
"loss": 0.2333, |
|
"step": 314500 |
|
}, |
|
{ |
|
"epoch": 0.2880479311757476, |
|
"grad_norm": 1.551849603652954, |
|
"learning_rate": 7.12603700639228e-06, |
|
"loss": 0.2349, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 0.28850515011412187, |
|
"grad_norm": 1.5056345462799072, |
|
"learning_rate": 7.121460632198637e-06, |
|
"loss": 0.2324, |
|
"step": 315500 |
|
}, |
|
{ |
|
"epoch": 0.28896236905249606, |
|
"grad_norm": 1.2897391319274902, |
|
"learning_rate": 7.116884258004995e-06, |
|
"loss": 0.2348, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 0.28941958799087025, |
|
"grad_norm": 4.2867560386657715, |
|
"learning_rate": 7.112307883811351e-06, |
|
"loss": 0.2302, |
|
"step": 316500 |
|
}, |
|
{ |
|
"epoch": 0.28987680692924445, |
|
"grad_norm": 1.673755407333374, |
|
"learning_rate": 7.107731509617709e-06, |
|
"loss": 0.2349, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 0.29033402586761864, |
|
"grad_norm": 1.3654760122299194, |
|
"learning_rate": 7.103155135424066e-06, |
|
"loss": 0.2331, |
|
"step": 317500 |
|
}, |
|
{ |
|
"epoch": 0.2907912448059929, |
|
"grad_norm": 1.285056471824646, |
|
"learning_rate": 7.098578761230422e-06, |
|
"loss": 0.2322, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 0.2912484637443671, |
|
"grad_norm": 1.6767268180847168, |
|
"learning_rate": 7.09400238703678e-06, |
|
"loss": 0.235, |
|
"step": 318500 |
|
}, |
|
{ |
|
"epoch": 0.2917056826827413, |
|
"grad_norm": 1.429954171180725, |
|
"learning_rate": 7.089426012843137e-06, |
|
"loss": 0.2329, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 0.29216290162111547, |
|
"grad_norm": 1.201323390007019, |
|
"learning_rate": 7.084849638649495e-06, |
|
"loss": 0.2315, |
|
"step": 319500 |
|
}, |
|
{ |
|
"epoch": 0.29262012055948966, |
|
"grad_norm": 0.9910763502120972, |
|
"learning_rate": 7.080273264455851e-06, |
|
"loss": 0.2324, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 0.29307733949786385, |
|
"grad_norm": 1.744512915611267, |
|
"learning_rate": 7.075696890262208e-06, |
|
"loss": 0.2322, |
|
"step": 320500 |
|
}, |
|
{ |
|
"epoch": 0.2935345584362381, |
|
"grad_norm": 1.4103306531906128, |
|
"learning_rate": 7.071120516068566e-06, |
|
"loss": 0.2305, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 0.2939917773746123, |
|
"grad_norm": 1.1745530366897583, |
|
"learning_rate": 7.066544141874922e-06, |
|
"loss": 0.2322, |
|
"step": 321500 |
|
}, |
|
{ |
|
"epoch": 0.2944489963129865, |
|
"grad_norm": 1.4597954750061035, |
|
"learning_rate": 7.0619677676812804e-06, |
|
"loss": 0.2317, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 0.2949062152513607, |
|
"grad_norm": 1.2206711769104004, |
|
"learning_rate": 7.057391393487637e-06, |
|
"loss": 0.233, |
|
"step": 322500 |
|
}, |
|
{ |
|
"epoch": 0.2953634341897349, |
|
"grad_norm": 1.5247403383255005, |
|
"learning_rate": 7.052815019293993e-06, |
|
"loss": 0.2304, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 0.29582065312810907, |
|
"grad_norm": 1.1999021768569946, |
|
"learning_rate": 7.048238645100351e-06, |
|
"loss": 0.2338, |
|
"step": 323500 |
|
}, |
|
{ |
|
"epoch": 0.2962778720664833, |
|
"grad_norm": 1.4971832036972046, |
|
"learning_rate": 7.043662270906708e-06, |
|
"loss": 0.228, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 0.2967350910048575, |
|
"grad_norm": 1.3966095447540283, |
|
"learning_rate": 7.039085896713066e-06, |
|
"loss": 0.2272, |
|
"step": 324500 |
|
}, |
|
{ |
|
"epoch": 0.2971923099432317, |
|
"grad_norm": 2.6198599338531494, |
|
"learning_rate": 7.034509522519422e-06, |
|
"loss": 0.231, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 0.2976495288816059, |
|
"grad_norm": 1.6076655387878418, |
|
"learning_rate": 7.02993314832578e-06, |
|
"loss": 0.2294, |
|
"step": 325500 |
|
}, |
|
{ |
|
"epoch": 0.2981067478199801, |
|
"grad_norm": 1.1746286153793335, |
|
"learning_rate": 7.025356774132137e-06, |
|
"loss": 0.2307, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 0.2985639667583543, |
|
"grad_norm": 1.1930854320526123, |
|
"learning_rate": 7.020780399938494e-06, |
|
"loss": 0.2275, |
|
"step": 326500 |
|
}, |
|
{ |
|
"epoch": 0.29902118569672853, |
|
"grad_norm": 1.406855821609497, |
|
"learning_rate": 7.0162040257448515e-06, |
|
"loss": 0.2294, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 0.2994784046351027, |
|
"grad_norm": 1.669003963470459, |
|
"learning_rate": 7.011627651551208e-06, |
|
"loss": 0.2285, |
|
"step": 327500 |
|
}, |
|
{ |
|
"epoch": 0.2999356235734769, |
|
"grad_norm": 1.32759690284729, |
|
"learning_rate": 7.007051277357565e-06, |
|
"loss": 0.2349, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 0.3003928425118511, |
|
"grad_norm": 1.2544605731964111, |
|
"learning_rate": 7.0024749031639225e-06, |
|
"loss": 0.2302, |
|
"step": 328500 |
|
}, |
|
{ |
|
"epoch": 0.3008500614502253, |
|
"grad_norm": 1.6793965101242065, |
|
"learning_rate": 6.99789852897028e-06, |
|
"loss": 0.2323, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 0.3013072803885995, |
|
"grad_norm": 2.0287883281707764, |
|
"learning_rate": 6.993322154776637e-06, |
|
"loss": 0.2269, |
|
"step": 329500 |
|
}, |
|
{ |
|
"epoch": 0.30176449932697375, |
|
"grad_norm": 1.4484792947769165, |
|
"learning_rate": 6.988745780582994e-06, |
|
"loss": 0.2333, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 0.30222171826534794, |
|
"grad_norm": 57.33815383911133, |
|
"learning_rate": 6.9841694063893516e-06, |
|
"loss": 0.2303, |
|
"step": 330500 |
|
}, |
|
{ |
|
"epoch": 0.30267893720372213, |
|
"grad_norm": 1.2864947319030762, |
|
"learning_rate": 6.979593032195708e-06, |
|
"loss": 0.2288, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 0.3031361561420963, |
|
"grad_norm": 1.2493833303451538, |
|
"learning_rate": 6.975016658002065e-06, |
|
"loss": 0.2296, |
|
"step": 331500 |
|
}, |
|
{ |
|
"epoch": 0.3035933750804705, |
|
"grad_norm": 1.4573181867599487, |
|
"learning_rate": 6.9704402838084226e-06, |
|
"loss": 0.2241, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 0.3040505940188447, |
|
"grad_norm": 1.5703788995742798, |
|
"learning_rate": 6.96586390961478e-06, |
|
"loss": 0.2349, |
|
"step": 332500 |
|
}, |
|
{ |
|
"epoch": 0.30450781295721896, |
|
"grad_norm": 1.7643167972564697, |
|
"learning_rate": 6.961287535421137e-06, |
|
"loss": 0.2313, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 0.30496503189559315, |
|
"grad_norm": 1.4087350368499756, |
|
"learning_rate": 6.956711161227494e-06, |
|
"loss": 0.2277, |
|
"step": 333500 |
|
}, |
|
{ |
|
"epoch": 0.30542225083396735, |
|
"grad_norm": 1.5593231916427612, |
|
"learning_rate": 6.952134787033851e-06, |
|
"loss": 0.2315, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 0.30587946977234154, |
|
"grad_norm": 1.3212522268295288, |
|
"learning_rate": 6.947558412840208e-06, |
|
"loss": 0.2299, |
|
"step": 334500 |
|
}, |
|
{ |
|
"epoch": 0.30633668871071573, |
|
"grad_norm": 1.4686697721481323, |
|
"learning_rate": 6.942982038646565e-06, |
|
"loss": 0.2308, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 0.3067939076490899, |
|
"grad_norm": 1.436549186706543, |
|
"learning_rate": 6.938405664452923e-06, |
|
"loss": 0.2311, |
|
"step": 335500 |
|
}, |
|
{ |
|
"epoch": 0.3072511265874642, |
|
"grad_norm": 1.471840262413025, |
|
"learning_rate": 6.93382929025928e-06, |
|
"loss": 0.228, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 0.30770834552583837, |
|
"grad_norm": 1.3207478523254395, |
|
"learning_rate": 6.929252916065636e-06, |
|
"loss": 0.2274, |
|
"step": 336500 |
|
}, |
|
{ |
|
"epoch": 0.30816556446421256, |
|
"grad_norm": 1.2210125923156738, |
|
"learning_rate": 6.9246765418719945e-06, |
|
"loss": 0.2286, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 0.30862278340258675, |
|
"grad_norm": 1.3577375411987305, |
|
"learning_rate": 6.920100167678351e-06, |
|
"loss": 0.2292, |
|
"step": 337500 |
|
}, |
|
{ |
|
"epoch": 0.30908000234096095, |
|
"grad_norm": 1.1750257015228271, |
|
"learning_rate": 6.915523793484709e-06, |
|
"loss": 0.2324, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 0.30953722127933514, |
|
"grad_norm": 2.1666083335876465, |
|
"learning_rate": 6.9109474192910654e-06, |
|
"loss": 0.2287, |
|
"step": 338500 |
|
}, |
|
{ |
|
"epoch": 0.3099944402177094, |
|
"grad_norm": 1.4774967432022095, |
|
"learning_rate": 6.906371045097422e-06, |
|
"loss": 0.2285, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 0.3104516591560836, |
|
"grad_norm": 1.6809563636779785, |
|
"learning_rate": 6.90179467090378e-06, |
|
"loss": 0.2302, |
|
"step": 339500 |
|
}, |
|
{ |
|
"epoch": 0.3109088780944578, |
|
"grad_norm": 1.0657724142074585, |
|
"learning_rate": 6.8972182967101364e-06, |
|
"loss": 0.2317, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 0.31136609703283197, |
|
"grad_norm": 1.375364899635315, |
|
"learning_rate": 6.8926419225164945e-06, |
|
"loss": 0.2286, |
|
"step": 340500 |
|
}, |
|
{ |
|
"epoch": 0.31182331597120616, |
|
"grad_norm": 2.182593584060669, |
|
"learning_rate": 6.888065548322851e-06, |
|
"loss": 0.2347, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 0.3122805349095804, |
|
"grad_norm": 1.7022758722305298, |
|
"learning_rate": 6.883489174129207e-06, |
|
"loss": 0.2286, |
|
"step": 341500 |
|
}, |
|
{ |
|
"epoch": 0.3127377538479546, |
|
"grad_norm": 1.5238574743270874, |
|
"learning_rate": 6.8789127999355655e-06, |
|
"loss": 0.226, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 0.3131949727863288, |
|
"grad_norm": 1.5397921800613403, |
|
"learning_rate": 6.874336425741922e-06, |
|
"loss": 0.2314, |
|
"step": 342500 |
|
}, |
|
{ |
|
"epoch": 0.313652191724703, |
|
"grad_norm": 1.4972351789474487, |
|
"learning_rate": 6.86976005154828e-06, |
|
"loss": 0.2263, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 0.3141094106630772, |
|
"grad_norm": 1.541048288345337, |
|
"learning_rate": 6.8651836773546365e-06, |
|
"loss": 0.226, |
|
"step": 343500 |
|
}, |
|
{ |
|
"epoch": 0.3145666296014514, |
|
"grad_norm": 1.155745029449463, |
|
"learning_rate": 6.860607303160993e-06, |
|
"loss": 0.2299, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 0.3150238485398256, |
|
"grad_norm": 1.356096863746643, |
|
"learning_rate": 6.856030928967351e-06, |
|
"loss": 0.2283, |
|
"step": 344500 |
|
}, |
|
{ |
|
"epoch": 0.3154810674781998, |
|
"grad_norm": 1.1130493879318237, |
|
"learning_rate": 6.8514545547737075e-06, |
|
"loss": 0.2255, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 0.315938286416574, |
|
"grad_norm": 1.4475120306015015, |
|
"learning_rate": 6.846878180580066e-06, |
|
"loss": 0.2311, |
|
"step": 345500 |
|
}, |
|
{ |
|
"epoch": 0.3163955053549482, |
|
"grad_norm": 1.6176073551177979, |
|
"learning_rate": 6.842301806386422e-06, |
|
"loss": 0.225, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 0.3168527242933224, |
|
"grad_norm": 1.8481721878051758, |
|
"learning_rate": 6.83772543219278e-06, |
|
"loss": 0.2284, |
|
"step": 346500 |
|
}, |
|
{ |
|
"epoch": 0.3173099432316966, |
|
"grad_norm": 1.6207536458969116, |
|
"learning_rate": 6.833149057999137e-06, |
|
"loss": 0.228, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 0.31776716217007084, |
|
"grad_norm": 1.3753981590270996, |
|
"learning_rate": 6.828572683805493e-06, |
|
"loss": 0.2297, |
|
"step": 347500 |
|
}, |
|
{ |
|
"epoch": 0.31822438110844503, |
|
"grad_norm": 1.168278455734253, |
|
"learning_rate": 6.823996309611851e-06, |
|
"loss": 0.2301, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 0.3186816000468192, |
|
"grad_norm": 1.7938873767852783, |
|
"learning_rate": 6.8194199354182076e-06, |
|
"loss": 0.2313, |
|
"step": 348500 |
|
}, |
|
{ |
|
"epoch": 0.3191388189851934, |
|
"grad_norm": 1.2588731050491333, |
|
"learning_rate": 6.814843561224566e-06, |
|
"loss": 0.2287, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 0.3195960379235676, |
|
"grad_norm": 1.5052902698516846, |
|
"learning_rate": 6.810267187030922e-06, |
|
"loss": 0.2221, |
|
"step": 349500 |
|
}, |
|
{ |
|
"epoch": 0.3200532568619418, |
|
"grad_norm": 1.4498345851898193, |
|
"learning_rate": 6.8056908128372786e-06, |
|
"loss": 0.2286, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 0.32051047580031605, |
|
"grad_norm": 1.5637640953063965, |
|
"learning_rate": 6.801114438643637e-06, |
|
"loss": 0.2325, |
|
"step": 350500 |
|
}, |
|
{ |
|
"epoch": 0.32096769473869025, |
|
"grad_norm": 0.9277071952819824, |
|
"learning_rate": 6.796538064449993e-06, |
|
"loss": 0.2241, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 0.32142491367706444, |
|
"grad_norm": 1.4922164678573608, |
|
"learning_rate": 6.791961690256351e-06, |
|
"loss": 0.2271, |
|
"step": 351500 |
|
}, |
|
{ |
|
"epoch": 0.32188213261543863, |
|
"grad_norm": 1.3462028503417969, |
|
"learning_rate": 6.787385316062708e-06, |
|
"loss": 0.2251, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 0.3223393515538128, |
|
"grad_norm": 1.138120412826538, |
|
"learning_rate": 6.782808941869065e-06, |
|
"loss": 0.2244, |
|
"step": 352500 |
|
}, |
|
{ |
|
"epoch": 0.322796570492187, |
|
"grad_norm": 1.3926693201065063, |
|
"learning_rate": 6.778232567675422e-06, |
|
"loss": 0.2261, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 0.32325378943056127, |
|
"grad_norm": 1.3903855085372925, |
|
"learning_rate": 6.773656193481779e-06, |
|
"loss": 0.2337, |
|
"step": 353500 |
|
}, |
|
{ |
|
"epoch": 0.32371100836893546, |
|
"grad_norm": 1.4542618989944458, |
|
"learning_rate": 6.769079819288137e-06, |
|
"loss": 0.2337, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 0.32416822730730965, |
|
"grad_norm": 1.0457683801651, |
|
"learning_rate": 6.764503445094493e-06, |
|
"loss": 0.2305, |
|
"step": 354500 |
|
}, |
|
{ |
|
"epoch": 0.32462544624568385, |
|
"grad_norm": 1.3533685207366943, |
|
"learning_rate": 6.7599270709008505e-06, |
|
"loss": 0.2239, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 0.32508266518405804, |
|
"grad_norm": 1.5367493629455566, |
|
"learning_rate": 6.755350696707208e-06, |
|
"loss": 0.229, |
|
"step": 355500 |
|
}, |
|
{ |
|
"epoch": 0.32553988412243223, |
|
"grad_norm": 0.9888611435890198, |
|
"learning_rate": 6.750774322513565e-06, |
|
"loss": 0.2274, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 0.3259971030608065, |
|
"grad_norm": 2.0140717029571533, |
|
"learning_rate": 6.746197948319922e-06, |
|
"loss": 0.225, |
|
"step": 356500 |
|
}, |
|
{ |
|
"epoch": 0.3264543219991807, |
|
"grad_norm": 1.2105058431625366, |
|
"learning_rate": 6.7416215741262796e-06, |
|
"loss": 0.225, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 0.32691154093755487, |
|
"grad_norm": 1.452605962753296, |
|
"learning_rate": 6.737045199932636e-06, |
|
"loss": 0.2264, |
|
"step": 357500 |
|
}, |
|
{ |
|
"epoch": 0.32736875987592906, |
|
"grad_norm": 1.4895436763763428, |
|
"learning_rate": 6.732468825738993e-06, |
|
"loss": 0.2342, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 0.32782597881430325, |
|
"grad_norm": 1.7278785705566406, |
|
"learning_rate": 6.7278924515453505e-06, |
|
"loss": 0.2266, |
|
"step": 358500 |
|
}, |
|
{ |
|
"epoch": 0.32828319775267745, |
|
"grad_norm": 1.0101240873336792, |
|
"learning_rate": 6.723316077351708e-06, |
|
"loss": 0.2265, |
|
"step": 359000 |
|
}, |
|
{ |
|
"epoch": 0.3287404166910517, |
|
"grad_norm": 1.5752644538879395, |
|
"learning_rate": 6.718739703158065e-06, |
|
"loss": 0.2246, |
|
"step": 359500 |
|
}, |
|
{ |
|
"epoch": 0.3291976356294259, |
|
"grad_norm": 1.188202977180481, |
|
"learning_rate": 6.7141633289644215e-06, |
|
"loss": 0.2229, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 0.3296548545678001, |
|
"grad_norm": 1.657990574836731, |
|
"learning_rate": 6.70958695477078e-06, |
|
"loss": 0.2291, |
|
"step": 360500 |
|
}, |
|
{ |
|
"epoch": 0.3301120735061743, |
|
"grad_norm": 1.1453895568847656, |
|
"learning_rate": 6.705010580577136e-06, |
|
"loss": 0.2227, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 0.33056929244454847, |
|
"grad_norm": 1.4932241439819336, |
|
"learning_rate": 6.700434206383493e-06, |
|
"loss": 0.2297, |
|
"step": 361500 |
|
}, |
|
{ |
|
"epoch": 0.3310265113829227, |
|
"grad_norm": 1.0189024209976196, |
|
"learning_rate": 6.695857832189851e-06, |
|
"loss": 0.2226, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 0.3314837303212969, |
|
"grad_norm": 1.1504535675048828, |
|
"learning_rate": 6.691281457996207e-06, |
|
"loss": 0.2221, |
|
"step": 362500 |
|
}, |
|
{ |
|
"epoch": 0.3319409492596711, |
|
"grad_norm": 1.1485751867294312, |
|
"learning_rate": 6.686705083802565e-06, |
|
"loss": 0.2321, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 0.3323981681980453, |
|
"grad_norm": 1.4840065240859985, |
|
"learning_rate": 6.682128709608922e-06, |
|
"loss": 0.2242, |
|
"step": 363500 |
|
}, |
|
{ |
|
"epoch": 0.3328553871364195, |
|
"grad_norm": 1.8786394596099854, |
|
"learning_rate": 6.67755233541528e-06, |
|
"loss": 0.2266, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 0.3333126060747937, |
|
"grad_norm": 1.5424981117248535, |
|
"learning_rate": 6.672975961221636e-06, |
|
"loss": 0.2244, |
|
"step": 364500 |
|
}, |
|
{ |
|
"epoch": 0.33376982501316793, |
|
"grad_norm": 1.5438640117645264, |
|
"learning_rate": 6.6683995870279934e-06, |
|
"loss": 0.2277, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 0.3342270439515421, |
|
"grad_norm": 1.3231064081192017, |
|
"learning_rate": 6.663823212834351e-06, |
|
"loss": 0.2252, |
|
"step": 365500 |
|
}, |
|
{ |
|
"epoch": 0.3346842628899163, |
|
"grad_norm": 1.649964451789856, |
|
"learning_rate": 6.659246838640707e-06, |
|
"loss": 0.2253, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 0.3351414818282905, |
|
"grad_norm": 1.6227779388427734, |
|
"learning_rate": 6.654670464447065e-06, |
|
"loss": 0.2256, |
|
"step": 366500 |
|
}, |
|
{ |
|
"epoch": 0.3355987007666647, |
|
"grad_norm": 1.197120189666748, |
|
"learning_rate": 6.650094090253422e-06, |
|
"loss": 0.2249, |
|
"step": 367000 |
|
}, |
|
{ |
|
"epoch": 0.3360559197050389, |
|
"grad_norm": 1.308526873588562, |
|
"learning_rate": 6.64551771605978e-06, |
|
"loss": 0.2214, |
|
"step": 367500 |
|
}, |
|
{ |
|
"epoch": 0.33651313864341315, |
|
"grad_norm": 1.3108173608779907, |
|
"learning_rate": 6.640941341866136e-06, |
|
"loss": 0.2271, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 0.33697035758178734, |
|
"grad_norm": 1.6136078834533691, |
|
"learning_rate": 6.636364967672493e-06, |
|
"loss": 0.2288, |
|
"step": 368500 |
|
}, |
|
{ |
|
"epoch": 0.33742757652016153, |
|
"grad_norm": 1.7667053937911987, |
|
"learning_rate": 6.631788593478851e-06, |
|
"loss": 0.2225, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 0.3378847954585357, |
|
"grad_norm": 1.384122610092163, |
|
"learning_rate": 6.627212219285207e-06, |
|
"loss": 0.2237, |
|
"step": 369500 |
|
}, |
|
{ |
|
"epoch": 0.3383420143969099, |
|
"grad_norm": 1.4266330003738403, |
|
"learning_rate": 6.622635845091565e-06, |
|
"loss": 0.2259, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 0.3387992333352841, |
|
"grad_norm": 1.2225444316864014, |
|
"learning_rate": 6.618059470897922e-06, |
|
"loss": 0.2277, |
|
"step": 370500 |
|
}, |
|
{ |
|
"epoch": 0.33925645227365836, |
|
"grad_norm": 1.3453285694122314, |
|
"learning_rate": 6.613483096704278e-06, |
|
"loss": 0.2252, |
|
"step": 371000 |
|
}, |
|
{ |
|
"epoch": 0.33971367121203255, |
|
"grad_norm": 1.1494442224502563, |
|
"learning_rate": 6.608906722510636e-06, |
|
"loss": 0.2235, |
|
"step": 371500 |
|
}, |
|
{ |
|
"epoch": 0.34017089015040675, |
|
"grad_norm": 2.2398324012756348, |
|
"learning_rate": 6.604330348316993e-06, |
|
"loss": 0.2235, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 0.34062810908878094, |
|
"grad_norm": 1.5684269666671753, |
|
"learning_rate": 6.599753974123351e-06, |
|
"loss": 0.2243, |
|
"step": 372500 |
|
}, |
|
{ |
|
"epoch": 0.34108532802715513, |
|
"grad_norm": 1.7672525644302368, |
|
"learning_rate": 6.595177599929707e-06, |
|
"loss": 0.2266, |
|
"step": 373000 |
|
}, |
|
{ |
|
"epoch": 0.3415425469655293, |
|
"grad_norm": 1.8046706914901733, |
|
"learning_rate": 6.590601225736064e-06, |
|
"loss": 0.2243, |
|
"step": 373500 |
|
}, |
|
{ |
|
"epoch": 0.3419997659039036, |
|
"grad_norm": 1.8992114067077637, |
|
"learning_rate": 6.586024851542422e-06, |
|
"loss": 0.226, |
|
"step": 374000 |
|
}, |
|
{ |
|
"epoch": 0.34245698484227777, |
|
"grad_norm": 1.6175910234451294, |
|
"learning_rate": 6.581448477348778e-06, |
|
"loss": 0.2233, |
|
"step": 374500 |
|
}, |
|
{ |
|
"epoch": 0.34291420378065196, |
|
"grad_norm": 1.470871090888977, |
|
"learning_rate": 6.576872103155136e-06, |
|
"loss": 0.2289, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 0.34337142271902615, |
|
"grad_norm": 1.562513828277588, |
|
"learning_rate": 6.572295728961493e-06, |
|
"loss": 0.2293, |
|
"step": 375500 |
|
}, |
|
{ |
|
"epoch": 0.34382864165740035, |
|
"grad_norm": 1.1901838779449463, |
|
"learning_rate": 6.56771935476785e-06, |
|
"loss": 0.2266, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 0.34428586059577454, |
|
"grad_norm": 1.8072021007537842, |
|
"learning_rate": 6.563142980574207e-06, |
|
"loss": 0.2235, |
|
"step": 376500 |
|
}, |
|
{ |
|
"epoch": 0.3447430795341488, |
|
"grad_norm": 1.430627703666687, |
|
"learning_rate": 6.558566606380564e-06, |
|
"loss": 0.23, |
|
"step": 377000 |
|
}, |
|
{ |
|
"epoch": 0.345200298472523, |
|
"grad_norm": 1.7450295686721802, |
|
"learning_rate": 6.553990232186922e-06, |
|
"loss": 0.2238, |
|
"step": 377500 |
|
}, |
|
{ |
|
"epoch": 0.3456575174108972, |
|
"grad_norm": 1.278794288635254, |
|
"learning_rate": 6.549413857993278e-06, |
|
"loss": 0.2247, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 0.34611473634927137, |
|
"grad_norm": 1.6127958297729492, |
|
"learning_rate": 6.544837483799636e-06, |
|
"loss": 0.224, |
|
"step": 378500 |
|
}, |
|
{ |
|
"epoch": 0.34657195528764556, |
|
"grad_norm": 1.3669660091400146, |
|
"learning_rate": 6.540261109605993e-06, |
|
"loss": 0.2228, |
|
"step": 379000 |
|
}, |
|
{ |
|
"epoch": 0.34702917422601975, |
|
"grad_norm": 1.3551899194717407, |
|
"learning_rate": 6.53568473541235e-06, |
|
"loss": 0.2205, |
|
"step": 379500 |
|
}, |
|
{ |
|
"epoch": 0.347486393164394, |
|
"grad_norm": 1.0663011074066162, |
|
"learning_rate": 6.5311083612187075e-06, |
|
"loss": 0.2256, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 0.3479436121027682, |
|
"grad_norm": 1.0649718046188354, |
|
"learning_rate": 6.526531987025064e-06, |
|
"loss": 0.2248, |
|
"step": 380500 |
|
}, |
|
{ |
|
"epoch": 0.3484008310411424, |
|
"grad_norm": 1.4018280506134033, |
|
"learning_rate": 6.521955612831422e-06, |
|
"loss": 0.2266, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 0.3488580499795166, |
|
"grad_norm": 1.750497579574585, |
|
"learning_rate": 6.5173792386377785e-06, |
|
"loss": 0.2208, |
|
"step": 381500 |
|
}, |
|
{ |
|
"epoch": 0.3493152689178908, |
|
"grad_norm": 1.3639546632766724, |
|
"learning_rate": 6.512802864444136e-06, |
|
"loss": 0.2271, |
|
"step": 382000 |
|
}, |
|
{ |
|
"epoch": 0.349772487856265, |
|
"grad_norm": 0.8961055278778076, |
|
"learning_rate": 6.508226490250493e-06, |
|
"loss": 0.2258, |
|
"step": 382500 |
|
}, |
|
{ |
|
"epoch": 0.3502297067946392, |
|
"grad_norm": 1.4955040216445923, |
|
"learning_rate": 6.50365011605685e-06, |
|
"loss": 0.2168, |
|
"step": 383000 |
|
}, |
|
{ |
|
"epoch": 0.3506869257330134, |
|
"grad_norm": 1.7707324028015137, |
|
"learning_rate": 6.4990737418632076e-06, |
|
"loss": 0.223, |
|
"step": 383500 |
|
}, |
|
{ |
|
"epoch": 0.3511441446713876, |
|
"grad_norm": 1.7343741655349731, |
|
"learning_rate": 6.494497367669564e-06, |
|
"loss": 0.2243, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 0.3516013636097618, |
|
"grad_norm": 1.3605395555496216, |
|
"learning_rate": 6.489920993475921e-06, |
|
"loss": 0.2208, |
|
"step": 384500 |
|
}, |
|
{ |
|
"epoch": 0.352058582548136, |
|
"grad_norm": 1.7057133913040161, |
|
"learning_rate": 6.4853446192822785e-06, |
|
"loss": 0.2242, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 0.35251580148651024, |
|
"grad_norm": 1.440219521522522, |
|
"learning_rate": 6.480768245088636e-06, |
|
"loss": 0.2198, |
|
"step": 385500 |
|
}, |
|
{ |
|
"epoch": 0.35297302042488443, |
|
"grad_norm": 1.1752151250839233, |
|
"learning_rate": 6.476191870894993e-06, |
|
"loss": 0.2299, |
|
"step": 386000 |
|
}, |
|
{ |
|
"epoch": 0.3534302393632586, |
|
"grad_norm": 1.0869592428207397, |
|
"learning_rate": 6.47161549670135e-06, |
|
"loss": 0.2196, |
|
"step": 386500 |
|
}, |
|
{ |
|
"epoch": 0.3538874583016328, |
|
"grad_norm": 1.5555566549301147, |
|
"learning_rate": 6.467039122507707e-06, |
|
"loss": 0.2204, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 0.354344677240007, |
|
"grad_norm": 1.148882508277893, |
|
"learning_rate": 6.462462748314065e-06, |
|
"loss": 0.222, |
|
"step": 387500 |
|
}, |
|
{ |
|
"epoch": 0.3548018961783812, |
|
"grad_norm": 1.7266398668289185, |
|
"learning_rate": 6.457886374120421e-06, |
|
"loss": 0.2239, |
|
"step": 388000 |
|
}, |
|
{ |
|
"epoch": 0.35525911511675545, |
|
"grad_norm": 1.8156708478927612, |
|
"learning_rate": 6.453309999926779e-06, |
|
"loss": 0.2254, |
|
"step": 388500 |
|
}, |
|
{ |
|
"epoch": 0.35571633405512965, |
|
"grad_norm": 1.3709605932235718, |
|
"learning_rate": 6.448733625733136e-06, |
|
"loss": 0.2236, |
|
"step": 389000 |
|
}, |
|
{ |
|
"epoch": 0.35617355299350384, |
|
"grad_norm": 1.373267412185669, |
|
"learning_rate": 6.444157251539492e-06, |
|
"loss": 0.2182, |
|
"step": 389500 |
|
}, |
|
{ |
|
"epoch": 0.35663077193187803, |
|
"grad_norm": 1.8290654420852661, |
|
"learning_rate": 6.4395808773458505e-06, |
|
"loss": 0.2199, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 0.3570879908702522, |
|
"grad_norm": 1.2703052759170532, |
|
"learning_rate": 6.435004503152207e-06, |
|
"loss": 0.2209, |
|
"step": 390500 |
|
}, |
|
{ |
|
"epoch": 0.3575452098086264, |
|
"grad_norm": 1.3054262399673462, |
|
"learning_rate": 6.430428128958565e-06, |
|
"loss": 0.2275, |
|
"step": 391000 |
|
}, |
|
{ |
|
"epoch": 0.35800242874700067, |
|
"grad_norm": 1.6827231645584106, |
|
"learning_rate": 6.4258517547649214e-06, |
|
"loss": 0.225, |
|
"step": 391500 |
|
}, |
|
{ |
|
"epoch": 0.35845964768537486, |
|
"grad_norm": 1.0806723833084106, |
|
"learning_rate": 6.421275380571278e-06, |
|
"loss": 0.2203, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 0.35891686662374905, |
|
"grad_norm": 1.3846522569656372, |
|
"learning_rate": 6.416699006377636e-06, |
|
"loss": 0.2196, |
|
"step": 392500 |
|
}, |
|
{ |
|
"epoch": 0.35937408556212325, |
|
"grad_norm": 1.3533942699432373, |
|
"learning_rate": 6.412122632183992e-06, |
|
"loss": 0.2245, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 0.35983130450049744, |
|
"grad_norm": 1.6266448497772217, |
|
"learning_rate": 6.4075462579903505e-06, |
|
"loss": 0.2237, |
|
"step": 393500 |
|
}, |
|
{ |
|
"epoch": 0.36028852343887163, |
|
"grad_norm": 1.3846949338912964, |
|
"learning_rate": 6.402969883796707e-06, |
|
"loss": 0.2153, |
|
"step": 394000 |
|
}, |
|
{ |
|
"epoch": 0.3607457423772459, |
|
"grad_norm": 1.707695484161377, |
|
"learning_rate": 6.398393509603063e-06, |
|
"loss": 0.221, |
|
"step": 394500 |
|
}, |
|
{ |
|
"epoch": 0.3612029613156201, |
|
"grad_norm": 1.2363784313201904, |
|
"learning_rate": 6.3938171354094215e-06, |
|
"loss": 0.2239, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 0.36166018025399427, |
|
"grad_norm": 1.168939232826233, |
|
"learning_rate": 6.389240761215778e-06, |
|
"loss": 0.2219, |
|
"step": 395500 |
|
}, |
|
{ |
|
"epoch": 0.36211739919236846, |
|
"grad_norm": 0.9597660899162292, |
|
"learning_rate": 6.384664387022136e-06, |
|
"loss": 0.2199, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 0.36257461813074265, |
|
"grad_norm": 1.228608250617981, |
|
"learning_rate": 6.3800880128284925e-06, |
|
"loss": 0.2209, |
|
"step": 396500 |
|
}, |
|
{ |
|
"epoch": 0.36303183706911685, |
|
"grad_norm": 0.8621894121170044, |
|
"learning_rate": 6.375511638634851e-06, |
|
"loss": 0.2209, |
|
"step": 397000 |
|
}, |
|
{ |
|
"epoch": 0.3634890560074911, |
|
"grad_norm": 1.686716079711914, |
|
"learning_rate": 6.370935264441207e-06, |
|
"loss": 0.2255, |
|
"step": 397500 |
|
}, |
|
{ |
|
"epoch": 0.3639462749458653, |
|
"grad_norm": 1.3068556785583496, |
|
"learning_rate": 6.3663588902475635e-06, |
|
"loss": 0.2287, |
|
"step": 398000 |
|
}, |
|
{ |
|
"epoch": 0.3644034938842395, |
|
"grad_norm": 1.07680344581604, |
|
"learning_rate": 6.361782516053922e-06, |
|
"loss": 0.2236, |
|
"step": 398500 |
|
}, |
|
{ |
|
"epoch": 0.3648607128226137, |
|
"grad_norm": 2.0120232105255127, |
|
"learning_rate": 6.357206141860278e-06, |
|
"loss": 0.2236, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 0.36531793176098787, |
|
"grad_norm": 1.7183582782745361, |
|
"learning_rate": 6.352629767666636e-06, |
|
"loss": 0.2191, |
|
"step": 399500 |
|
}, |
|
{ |
|
"epoch": 0.36577515069936206, |
|
"grad_norm": 1.2929768562316895, |
|
"learning_rate": 6.348053393472993e-06, |
|
"loss": 0.2247, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 0.3662323696377363, |
|
"grad_norm": 1.0887680053710938, |
|
"learning_rate": 6.343477019279349e-06, |
|
"loss": 0.2245, |
|
"step": 400500 |
|
}, |
|
{ |
|
"epoch": 0.3666895885761105, |
|
"grad_norm": 1.428952932357788, |
|
"learning_rate": 6.338900645085707e-06, |
|
"loss": 0.2198, |
|
"step": 401000 |
|
}, |
|
{ |
|
"epoch": 0.3671468075144847, |
|
"grad_norm": 1.679784893989563, |
|
"learning_rate": 6.3343242708920636e-06, |
|
"loss": 0.219, |
|
"step": 401500 |
|
}, |
|
{ |
|
"epoch": 0.3676040264528589, |
|
"grad_norm": 1.9559983015060425, |
|
"learning_rate": 6.329747896698422e-06, |
|
"loss": 0.2208, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 0.3680612453912331, |
|
"grad_norm": 1.2725555896759033, |
|
"learning_rate": 6.325171522504778e-06, |
|
"loss": 0.2157, |
|
"step": 402500 |
|
}, |
|
{ |
|
"epoch": 0.36851846432960733, |
|
"grad_norm": 1.522418737411499, |
|
"learning_rate": 6.3205951483111345e-06, |
|
"loss": 0.2224, |
|
"step": 403000 |
|
}, |
|
{ |
|
"epoch": 0.3689756832679815, |
|
"grad_norm": 1.372866153717041, |
|
"learning_rate": 6.316018774117493e-06, |
|
"loss": 0.2226, |
|
"step": 403500 |
|
}, |
|
{ |
|
"epoch": 0.3694329022063557, |
|
"grad_norm": 1.6181350946426392, |
|
"learning_rate": 6.311442399923849e-06, |
|
"loss": 0.2229, |
|
"step": 404000 |
|
}, |
|
{ |
|
"epoch": 0.3698901211447299, |
|
"grad_norm": 1.3286571502685547, |
|
"learning_rate": 6.306866025730207e-06, |
|
"loss": 0.2242, |
|
"step": 404500 |
|
}, |
|
{ |
|
"epoch": 0.3703473400831041, |
|
"grad_norm": 1.808738350868225, |
|
"learning_rate": 6.302289651536564e-06, |
|
"loss": 0.2187, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 0.3708045590214783, |
|
"grad_norm": 1.438143253326416, |
|
"learning_rate": 6.297713277342921e-06, |
|
"loss": 0.2217, |
|
"step": 405500 |
|
}, |
|
{ |
|
"epoch": 0.37126177795985255, |
|
"grad_norm": 1.8223944902420044, |
|
"learning_rate": 6.293136903149278e-06, |
|
"loss": 0.2269, |
|
"step": 406000 |
|
}, |
|
{ |
|
"epoch": 0.37171899689822674, |
|
"grad_norm": 1.175521969795227, |
|
"learning_rate": 6.2885605289556355e-06, |
|
"loss": 0.2235, |
|
"step": 406500 |
|
}, |
|
{ |
|
"epoch": 0.37217621583660093, |
|
"grad_norm": 2.208753824234009, |
|
"learning_rate": 6.283984154761993e-06, |
|
"loss": 0.2267, |
|
"step": 407000 |
|
}, |
|
{ |
|
"epoch": 0.3726334347749751, |
|
"grad_norm": 1.0367563962936401, |
|
"learning_rate": 6.279407780568349e-06, |
|
"loss": 0.2227, |
|
"step": 407500 |
|
}, |
|
{ |
|
"epoch": 0.3730906537133493, |
|
"grad_norm": 1.719056487083435, |
|
"learning_rate": 6.2748314063747065e-06, |
|
"loss": 0.2217, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 0.3735478726517235, |
|
"grad_norm": 1.4684566259384155, |
|
"learning_rate": 6.270255032181064e-06, |
|
"loss": 0.2247, |
|
"step": 408500 |
|
}, |
|
{ |
|
"epoch": 0.37400509159009776, |
|
"grad_norm": 1.1170936822891235, |
|
"learning_rate": 6.265678657987421e-06, |
|
"loss": 0.2194, |
|
"step": 409000 |
|
}, |
|
{ |
|
"epoch": 0.37446231052847195, |
|
"grad_norm": 1.2900408506393433, |
|
"learning_rate": 6.261102283793778e-06, |
|
"loss": 0.2181, |
|
"step": 409500 |
|
}, |
|
{ |
|
"epoch": 0.37491952946684615, |
|
"grad_norm": 1.4425394535064697, |
|
"learning_rate": 6.2565259096001356e-06, |
|
"loss": 0.2204, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 0.37537674840522034, |
|
"grad_norm": 1.2810927629470825, |
|
"learning_rate": 6.251949535406492e-06, |
|
"loss": 0.2189, |
|
"step": 410500 |
|
}, |
|
{ |
|
"epoch": 0.37583396734359453, |
|
"grad_norm": 1.4359560012817383, |
|
"learning_rate": 6.247373161212849e-06, |
|
"loss": 0.2206, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 0.3762911862819687, |
|
"grad_norm": 1.2622240781784058, |
|
"learning_rate": 6.2427967870192065e-06, |
|
"loss": 0.219, |
|
"step": 411500 |
|
}, |
|
{ |
|
"epoch": 0.376748405220343, |
|
"grad_norm": 1.365540862083435, |
|
"learning_rate": 6.238220412825564e-06, |
|
"loss": 0.22, |
|
"step": 412000 |
|
}, |
|
{ |
|
"epoch": 0.37720562415871717, |
|
"grad_norm": 1.4828884601593018, |
|
"learning_rate": 6.233644038631921e-06, |
|
"loss": 0.2263, |
|
"step": 412500 |
|
}, |
|
{ |
|
"epoch": 0.37766284309709136, |
|
"grad_norm": 1.3468049764633179, |
|
"learning_rate": 6.2290676644382775e-06, |
|
"loss": 0.2236, |
|
"step": 413000 |
|
}, |
|
{ |
|
"epoch": 0.37812006203546555, |
|
"grad_norm": 1.2755866050720215, |
|
"learning_rate": 6.224491290244636e-06, |
|
"loss": 0.2258, |
|
"step": 413500 |
|
}, |
|
{ |
|
"epoch": 0.37857728097383975, |
|
"grad_norm": 1.38784658908844, |
|
"learning_rate": 6.219914916050992e-06, |
|
"loss": 0.2236, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 0.37903449991221394, |
|
"grad_norm": 1.3065296411514282, |
|
"learning_rate": 6.215338541857349e-06, |
|
"loss": 0.2206, |
|
"step": 414500 |
|
}, |
|
{ |
|
"epoch": 0.3794917188505882, |
|
"grad_norm": 1.0447067022323608, |
|
"learning_rate": 6.210762167663707e-06, |
|
"loss": 0.2177, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 0.3799489377889624, |
|
"grad_norm": 1.609236478805542, |
|
"learning_rate": 6.206185793470064e-06, |
|
"loss": 0.2243, |
|
"step": 415500 |
|
}, |
|
{ |
|
"epoch": 0.3804061567273366, |
|
"grad_norm": 1.5531622171401978, |
|
"learning_rate": 6.201609419276421e-06, |
|
"loss": 0.221, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 0.38086337566571077, |
|
"grad_norm": 1.4250783920288086, |
|
"learning_rate": 6.197033045082778e-06, |
|
"loss": 0.2164, |
|
"step": 416500 |
|
}, |
|
{ |
|
"epoch": 0.38132059460408496, |
|
"grad_norm": 1.1696795225143433, |
|
"learning_rate": 6.192456670889136e-06, |
|
"loss": 0.2225, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 0.38177781354245915, |
|
"grad_norm": 1.422655701637268, |
|
"learning_rate": 6.187880296695492e-06, |
|
"loss": 0.2223, |
|
"step": 417500 |
|
}, |
|
{ |
|
"epoch": 0.3822350324808334, |
|
"grad_norm": 1.3113077878952026, |
|
"learning_rate": 6.1833039225018494e-06, |
|
"loss": 0.2195, |
|
"step": 418000 |
|
}, |
|
{ |
|
"epoch": 0.3826922514192076, |
|
"grad_norm": 1.46403968334198, |
|
"learning_rate": 6.178727548308207e-06, |
|
"loss": 0.213, |
|
"step": 418500 |
|
}, |
|
{ |
|
"epoch": 0.3831494703575818, |
|
"grad_norm": 1.528786540031433, |
|
"learning_rate": 6.174151174114563e-06, |
|
"loss": 0.2158, |
|
"step": 419000 |
|
}, |
|
{ |
|
"epoch": 0.383606689295956, |
|
"grad_norm": 1.4497718811035156, |
|
"learning_rate": 6.169574799920921e-06, |
|
"loss": 0.2196, |
|
"step": 419500 |
|
}, |
|
{ |
|
"epoch": 0.3840639082343302, |
|
"grad_norm": 1.6955440044403076, |
|
"learning_rate": 6.164998425727278e-06, |
|
"loss": 0.2227, |
|
"step": 420000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1093568, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 20000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.992271060696826e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|