|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.2703818369453046, |
|
"eval_steps": 500, |
|
"global_step": 2200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010319917440660475, |
|
"grad_norm": 2.1213459968566895, |
|
"learning_rate": 2.2727272727272728e-06, |
|
"loss": 0.8274, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02063983488132095, |
|
"grad_norm": 1.1507428884506226, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"loss": 0.7784, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.030959752321981424, |
|
"grad_norm": 0.6039409637451172, |
|
"learning_rate": 6.818181818181818e-06, |
|
"loss": 0.7275, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0412796697626419, |
|
"grad_norm": 0.5366007089614868, |
|
"learning_rate": 9.090909090909091e-06, |
|
"loss": 0.7066, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05159958720330237, |
|
"grad_norm": 0.5229462385177612, |
|
"learning_rate": 1.1363636363636366e-05, |
|
"loss": 0.6901, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06191950464396285, |
|
"grad_norm": 0.5291970372200012, |
|
"learning_rate": 1.3636363636363637e-05, |
|
"loss": 0.6741, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07223942208462332, |
|
"grad_norm": 0.5639849901199341, |
|
"learning_rate": 1.590909090909091e-05, |
|
"loss": 0.6625, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0825593395252838, |
|
"grad_norm": 0.5302533507347107, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 0.6513, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09287925696594428, |
|
"grad_norm": 0.7249609231948853, |
|
"learning_rate": 1.9999975160696756e-05, |
|
"loss": 0.653, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.10319917440660474, |
|
"grad_norm": 0.5580225586891174, |
|
"learning_rate": 1.999910579803988e-05, |
|
"loss": 0.6456, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11351909184726522, |
|
"grad_norm": 0.5612972974777222, |
|
"learning_rate": 1.9996994593616145e-05, |
|
"loss": 0.6364, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1238390092879257, |
|
"grad_norm": 0.5823021531105042, |
|
"learning_rate": 1.9993641809627166e-05, |
|
"loss": 0.6302, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13415892672858618, |
|
"grad_norm": 0.5421112775802612, |
|
"learning_rate": 1.9989047862472904e-05, |
|
"loss": 0.6277, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14447884416924664, |
|
"grad_norm": 0.622350811958313, |
|
"learning_rate": 1.9983213322699926e-05, |
|
"loss": 0.625, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15479876160990713, |
|
"grad_norm": 0.6643008589744568, |
|
"learning_rate": 1.997613891493054e-05, |
|
"loss": 0.6229, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1651186790505676, |
|
"grad_norm": 0.6329976320266724, |
|
"learning_rate": 1.996782551777282e-05, |
|
"loss": 0.6099, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.17543859649122806, |
|
"grad_norm": 0.5071990489959717, |
|
"learning_rate": 1.995827416371147e-05, |
|
"loss": 0.6035, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.18575851393188855, |
|
"grad_norm": 0.5435068011283875, |
|
"learning_rate": 1.9947486038979606e-05, |
|
"loss": 0.601, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.19607843137254902, |
|
"grad_norm": 0.5333253145217896, |
|
"learning_rate": 1.993546248341142e-05, |
|
"loss": 0.5995, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.20639834881320948, |
|
"grad_norm": 0.5422487258911133, |
|
"learning_rate": 1.9922204990275788e-05, |
|
"loss": 0.5924, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21671826625386997, |
|
"grad_norm": 0.5293141007423401, |
|
"learning_rate": 1.9907715206090817e-05, |
|
"loss": 0.5962, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.22703818369453044, |
|
"grad_norm": 0.6123012900352478, |
|
"learning_rate": 1.989199493041935e-05, |
|
"loss": 0.585, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.23735810113519093, |
|
"grad_norm": 0.5224360823631287, |
|
"learning_rate": 1.9875046115645443e-05, |
|
"loss": 0.5859, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.2476780185758514, |
|
"grad_norm": 0.6205320358276367, |
|
"learning_rate": 1.9856870866731946e-05, |
|
"loss": 0.5856, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2579979360165119, |
|
"grad_norm": 0.6249591708183289, |
|
"learning_rate": 1.983747144095902e-05, |
|
"loss": 0.581, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.26831785345717235, |
|
"grad_norm": 0.5411680340766907, |
|
"learning_rate": 1.9816850247643834e-05, |
|
"loss": 0.5738, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2786377708978328, |
|
"grad_norm": 0.5577812790870667, |
|
"learning_rate": 1.97950098478413e-05, |
|
"loss": 0.5717, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2889576883384933, |
|
"grad_norm": 0.5821300148963928, |
|
"learning_rate": 1.9771952954026038e-05, |
|
"loss": 0.5714, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.29927760577915374, |
|
"grad_norm": 0.5330939292907715, |
|
"learning_rate": 1.9747682429755493e-05, |
|
"loss": 0.5685, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.30959752321981426, |
|
"grad_norm": 0.5918645262718201, |
|
"learning_rate": 1.972220128931427e-05, |
|
"loss": 0.571, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.31991744066047473, |
|
"grad_norm": 0.5980034470558167, |
|
"learning_rate": 1.9695512697339797e-05, |
|
"loss": 0.5657, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.3302373581011352, |
|
"grad_norm": 0.5418304800987244, |
|
"learning_rate": 1.966761996842929e-05, |
|
"loss": 0.566, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.34055727554179566, |
|
"grad_norm": 0.5538172125816345, |
|
"learning_rate": 1.9638526566728088e-05, |
|
"loss": 0.5551, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.3508771929824561, |
|
"grad_norm": 0.504206120967865, |
|
"learning_rate": 1.960823610549943e-05, |
|
"loss": 0.5557, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.36119711042311664, |
|
"grad_norm": 0.51911461353302, |
|
"learning_rate": 1.9576752346675692e-05, |
|
"loss": 0.5561, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.3715170278637771, |
|
"grad_norm": 0.5813859701156616, |
|
"learning_rate": 1.954407920039119e-05, |
|
"loss": 0.5606, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.38183694530443757, |
|
"grad_norm": 0.609829843044281, |
|
"learning_rate": 1.951022072449655e-05, |
|
"loss": 0.5515, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.39215686274509803, |
|
"grad_norm": 0.5120096802711487, |
|
"learning_rate": 1.9475181124054742e-05, |
|
"loss": 0.5529, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.4024767801857585, |
|
"grad_norm": 0.5144566297531128, |
|
"learning_rate": 1.9438964750818833e-05, |
|
"loss": 0.5457, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.41279669762641896, |
|
"grad_norm": 0.5255650877952576, |
|
"learning_rate": 1.940157610269152e-05, |
|
"loss": 0.5447, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4231166150670795, |
|
"grad_norm": 0.5155138969421387, |
|
"learning_rate": 1.9363019823166506e-05, |
|
"loss": 0.548, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.43343653250773995, |
|
"grad_norm": 0.5140640735626221, |
|
"learning_rate": 1.9323300700751816e-05, |
|
"loss": 0.5456, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.4437564499484004, |
|
"grad_norm": 0.4923473596572876, |
|
"learning_rate": 1.9282423668375064e-05, |
|
"loss": 0.5429, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.4540763673890609, |
|
"grad_norm": 0.5069249272346497, |
|
"learning_rate": 1.9240393802770824e-05, |
|
"loss": 0.5441, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.46439628482972134, |
|
"grad_norm": 0.47837620973587036, |
|
"learning_rate": 1.9197216323850122e-05, |
|
"loss": 0.5396, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.47471620227038186, |
|
"grad_norm": 0.5408268570899963, |
|
"learning_rate": 1.9152896594052134e-05, |
|
"loss": 0.5332, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.4850361197110423, |
|
"grad_norm": 0.5018060803413391, |
|
"learning_rate": 1.910744011767821e-05, |
|
"loss": 0.5314, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.4953560371517028, |
|
"grad_norm": 0.47133609652519226, |
|
"learning_rate": 1.9060852540208277e-05, |
|
"loss": 0.5352, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.5056759545923633, |
|
"grad_norm": 0.496358186006546, |
|
"learning_rate": 1.9013139647599656e-05, |
|
"loss": 0.5344, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.5159958720330238, |
|
"grad_norm": 0.527833104133606, |
|
"learning_rate": 1.8964307365568513e-05, |
|
"loss": 0.5246, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5263157894736842, |
|
"grad_norm": 0.4785318374633789, |
|
"learning_rate": 1.89143617588539e-05, |
|
"loss": 0.5257, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.5366357069143447, |
|
"grad_norm": 0.4716449975967407, |
|
"learning_rate": 1.886330903046454e-05, |
|
"loss": 0.5294, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.5469556243550051, |
|
"grad_norm": 0.489279180765152, |
|
"learning_rate": 1.8811155520908445e-05, |
|
"loss": 0.5278, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.5572755417956656, |
|
"grad_norm": 0.5271186232566833, |
|
"learning_rate": 1.8757907707405456e-05, |
|
"loss": 0.5196, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.5675954592363261, |
|
"grad_norm": 0.47395509481430054, |
|
"learning_rate": 1.8703572203082795e-05, |
|
"loss": 0.5233, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5779153766769866, |
|
"grad_norm": 0.4861067235469818, |
|
"learning_rate": 1.8648155756153768e-05, |
|
"loss": 0.5191, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5882352941176471, |
|
"grad_norm": 0.5004777908325195, |
|
"learning_rate": 1.859166524907963e-05, |
|
"loss": 0.5214, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.5985552115583075, |
|
"grad_norm": 0.5303364992141724, |
|
"learning_rate": 1.8534107697714864e-05, |
|
"loss": 0.5269, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.608875128998968, |
|
"grad_norm": 0.4722409248352051, |
|
"learning_rate": 1.84754902504358e-05, |
|
"loss": 0.5315, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.6191950464396285, |
|
"grad_norm": 0.4983135163784027, |
|
"learning_rate": 1.8415820187252847e-05, |
|
"loss": 0.5245, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6295149638802889, |
|
"grad_norm": 0.4688129723072052, |
|
"learning_rate": 1.8355104918906353e-05, |
|
"loss": 0.5223, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.6398348813209495, |
|
"grad_norm": 0.512848973274231, |
|
"learning_rate": 1.8293351985946194e-05, |
|
"loss": 0.5142, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.6501547987616099, |
|
"grad_norm": 0.47755149006843567, |
|
"learning_rate": 1.823056905779532e-05, |
|
"loss": 0.5215, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.6604747162022704, |
|
"grad_norm": 0.4616507887840271, |
|
"learning_rate": 1.816676393179721e-05, |
|
"loss": 0.5157, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.6707946336429309, |
|
"grad_norm": 0.48507964611053467, |
|
"learning_rate": 1.8101944532247495e-05, |
|
"loss": 0.5202, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.6811145510835913, |
|
"grad_norm": 0.4519255459308624, |
|
"learning_rate": 1.80361189094098e-05, |
|
"loss": 0.513, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.6914344685242518, |
|
"grad_norm": 0.48692259192466736, |
|
"learning_rate": 1.796929523851593e-05, |
|
"loss": 0.5157, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7017543859649122, |
|
"grad_norm": 0.4593399465084076, |
|
"learning_rate": 1.790148181875055e-05, |
|
"loss": 0.5164, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.7120743034055728, |
|
"grad_norm": 0.4479089379310608, |
|
"learning_rate": 1.783268707222048e-05, |
|
"loss": 0.5142, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.7223942208462333, |
|
"grad_norm": 0.4871484041213989, |
|
"learning_rate": 1.776291954290867e-05, |
|
"loss": 0.5118, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7327141382868937, |
|
"grad_norm": 0.48732689023017883, |
|
"learning_rate": 1.769218789561312e-05, |
|
"loss": 0.5078, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.7430340557275542, |
|
"grad_norm": 0.4574413597583771, |
|
"learning_rate": 1.7620500914870734e-05, |
|
"loss": 0.5179, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.7533539731682146, |
|
"grad_norm": 0.4452187120914459, |
|
"learning_rate": 1.7547867503866315e-05, |
|
"loss": 0.5086, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.7636738906088751, |
|
"grad_norm": 0.47763118147850037, |
|
"learning_rate": 1.7474296683326844e-05, |
|
"loss": 0.5158, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.7739938080495357, |
|
"grad_norm": 0.4462694227695465, |
|
"learning_rate": 1.739979759040114e-05, |
|
"loss": 0.5072, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7843137254901961, |
|
"grad_norm": 0.44416990876197815, |
|
"learning_rate": 1.7324379477525086e-05, |
|
"loss": 0.5104, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.7946336429308566, |
|
"grad_norm": 0.4583296775817871, |
|
"learning_rate": 1.724805171127249e-05, |
|
"loss": 0.509, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.804953560371517, |
|
"grad_norm": 0.46201425790786743, |
|
"learning_rate": 1.7170823771191824e-05, |
|
"loss": 0.5049, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.8152734778121775, |
|
"grad_norm": 0.46992990374565125, |
|
"learning_rate": 1.709270524862891e-05, |
|
"loss": 0.5035, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.8255933952528379, |
|
"grad_norm": 0.4597872793674469, |
|
"learning_rate": 1.7013705845535704e-05, |
|
"loss": 0.509, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.8359133126934984, |
|
"grad_norm": 0.44823798537254333, |
|
"learning_rate": 1.6933835373265373e-05, |
|
"loss": 0.5096, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.846233230134159, |
|
"grad_norm": 0.46216702461242676, |
|
"learning_rate": 1.685310375135376e-05, |
|
"loss": 0.5094, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.8565531475748194, |
|
"grad_norm": 0.4581802189350128, |
|
"learning_rate": 1.6771521006287442e-05, |
|
"loss": 0.4999, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.8668730650154799, |
|
"grad_norm": 0.45786017179489136, |
|
"learning_rate": 1.6689097270258463e-05, |
|
"loss": 0.5087, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.8771929824561403, |
|
"grad_norm": 0.45421096682548523, |
|
"learning_rate": 1.6605842779905984e-05, |
|
"loss": 0.5011, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.8875128998968008, |
|
"grad_norm": 0.44464901089668274, |
|
"learning_rate": 1.6521767875044935e-05, |
|
"loss": 0.4953, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.8978328173374613, |
|
"grad_norm": 0.44755035638809204, |
|
"learning_rate": 1.643688299738186e-05, |
|
"loss": 0.4971, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.9081527347781218, |
|
"grad_norm": 0.445285826921463, |
|
"learning_rate": 1.635119868921809e-05, |
|
"loss": 0.5051, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.9184726522187823, |
|
"grad_norm": 0.429434210062027, |
|
"learning_rate": 1.6264725592140468e-05, |
|
"loss": 0.5007, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.9287925696594427, |
|
"grad_norm": 0.46974435448646545, |
|
"learning_rate": 1.6177474445699695e-05, |
|
"loss": 0.4933, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.9391124871001032, |
|
"grad_norm": 0.44739529490470886, |
|
"learning_rate": 1.6089456086076527e-05, |
|
"loss": 0.4962, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.9494324045407637, |
|
"grad_norm": 0.42498907446861267, |
|
"learning_rate": 1.6000681444735976e-05, |
|
"loss": 0.499, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.9597523219814241, |
|
"grad_norm": 0.4373469650745392, |
|
"learning_rate": 1.5911161547069688e-05, |
|
"loss": 0.4963, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.9700722394220846, |
|
"grad_norm": 0.4303041100502014, |
|
"learning_rate": 1.582090751102662e-05, |
|
"loss": 0.5054, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.9803921568627451, |
|
"grad_norm": 0.4305797815322876, |
|
"learning_rate": 1.5729930545732247e-05, |
|
"loss": 0.4903, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.9907120743034056, |
|
"grad_norm": 0.4737144112586975, |
|
"learning_rate": 1.5638241950096458e-05, |
|
"loss": 0.4951, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.001031991744066, |
|
"grad_norm": 0.8125914931297302, |
|
"learning_rate": 1.554585311141027e-05, |
|
"loss": 0.4869, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.0113519091847265, |
|
"grad_norm": 0.5099620223045349, |
|
"learning_rate": 1.5452775503931566e-05, |
|
"loss": 0.4289, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.021671826625387, |
|
"grad_norm": 0.478298544883728, |
|
"learning_rate": 1.5359020687460096e-05, |
|
"loss": 0.4274, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.0319917440660475, |
|
"grad_norm": 0.46251142024993896, |
|
"learning_rate": 1.5264600305901744e-05, |
|
"loss": 0.4315, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.0423116615067078, |
|
"grad_norm": 0.45655471086502075, |
|
"learning_rate": 1.5169526085822451e-05, |
|
"loss": 0.4245, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.0526315789473684, |
|
"grad_norm": 0.43170642852783203, |
|
"learning_rate": 1.5073809834991816e-05, |
|
"loss": 0.4233, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.0629514963880289, |
|
"grad_norm": 0.4710790812969208, |
|
"learning_rate": 1.4977463440916621e-05, |
|
"loss": 0.4218, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.0732714138286894, |
|
"grad_norm": 0.5029376149177551, |
|
"learning_rate": 1.4880498869364482e-05, |
|
"loss": 0.4276, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.08359133126935, |
|
"grad_norm": 0.42352095246315, |
|
"learning_rate": 1.4782928162877722e-05, |
|
"loss": 0.425, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.0939112487100102, |
|
"grad_norm": 0.4630359411239624, |
|
"learning_rate": 1.468476343927778e-05, |
|
"loss": 0.4243, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.1042311661506707, |
|
"grad_norm": 0.46747565269470215, |
|
"learning_rate": 1.4586016890160208e-05, |
|
"loss": 0.4289, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.1145510835913313, |
|
"grad_norm": 0.4462341070175171, |
|
"learning_rate": 1.4486700779380547e-05, |
|
"loss": 0.4265, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.1248710010319918, |
|
"grad_norm": 0.468124121427536, |
|
"learning_rate": 1.4386827441531202e-05, |
|
"loss": 0.4251, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.1351909184726523, |
|
"grad_norm": 0.46226370334625244, |
|
"learning_rate": 1.4286409280409558e-05, |
|
"loss": 0.424, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.1455108359133126, |
|
"grad_norm": 0.45096278190612793, |
|
"learning_rate": 1.4185458767477487e-05, |
|
"loss": 0.4249, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.1558307533539731, |
|
"grad_norm": 0.4443894624710083, |
|
"learning_rate": 1.4083988440312429e-05, |
|
"loss": 0.4254, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.1661506707946336, |
|
"grad_norm": 0.47349488735198975, |
|
"learning_rate": 1.3982010901050305e-05, |
|
"loss": 0.4308, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.1764705882352942, |
|
"grad_norm": 0.4347674250602722, |
|
"learning_rate": 1.3879538814820395e-05, |
|
"loss": 0.4205, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.1867905056759547, |
|
"grad_norm": 0.46304601430892944, |
|
"learning_rate": 1.3776584908172364e-05, |
|
"loss": 0.4268, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.197110423116615, |
|
"grad_norm": 0.46236124634742737, |
|
"learning_rate": 1.3673161967495708e-05, |
|
"loss": 0.4244, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.2074303405572755, |
|
"grad_norm": 0.4554111957550049, |
|
"learning_rate": 1.3569282837431737e-05, |
|
"loss": 0.4267, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.217750257997936, |
|
"grad_norm": 0.44835782051086426, |
|
"learning_rate": 1.3464960419278332e-05, |
|
"loss": 0.4213, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.2280701754385965, |
|
"grad_norm": 0.43975839018821716, |
|
"learning_rate": 1.336020766938766e-05, |
|
"loss": 0.4172, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.238390092879257, |
|
"grad_norm": 0.4433438181877136, |
|
"learning_rate": 1.3255037597557057e-05, |
|
"loss": 0.4242, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.2487100103199174, |
|
"grad_norm": 0.44105082750320435, |
|
"learning_rate": 1.3149463265413282e-05, |
|
"loss": 0.4238, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.2590299277605779, |
|
"grad_norm": 0.4388076961040497, |
|
"learning_rate": 1.3043497784790315e-05, |
|
"loss": 0.4232, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.2693498452012384, |
|
"grad_norm": 0.46081188321113586, |
|
"learning_rate": 1.2937154316100927e-05, |
|
"loss": 0.4231, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.279669762641899, |
|
"grad_norm": 0.4441220164299011, |
|
"learning_rate": 1.283044606670223e-05, |
|
"loss": 0.4156, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.2899896800825594, |
|
"grad_norm": 0.4531669020652771, |
|
"learning_rate": 1.2723386289255374e-05, |
|
"loss": 0.4232, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.3003095975232197, |
|
"grad_norm": 0.4175763428211212, |
|
"learning_rate": 1.2615988280079645e-05, |
|
"loss": 0.4187, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.3106295149638802, |
|
"grad_norm": 0.4199049770832062, |
|
"learning_rate": 1.2508265377501102e-05, |
|
"loss": 0.4203, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.3209494324045408, |
|
"grad_norm": 0.4577757716178894, |
|
"learning_rate": 1.240023096019603e-05, |
|
"loss": 0.4221, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.3312693498452013, |
|
"grad_norm": 0.4792121946811676, |
|
"learning_rate": 1.2291898445529384e-05, |
|
"loss": 0.4194, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.3415892672858618, |
|
"grad_norm": 0.4585110545158386, |
|
"learning_rate": 1.2183281287888398e-05, |
|
"loss": 0.419, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.351909184726522, |
|
"grad_norm": 0.4325573146343231, |
|
"learning_rate": 1.2074392977011629e-05, |
|
"loss": 0.4195, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.3622291021671826, |
|
"grad_norm": 0.4285847544670105, |
|
"learning_rate": 1.1965247036313573e-05, |
|
"loss": 0.4243, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.3725490196078431, |
|
"grad_norm": 0.4623337686061859, |
|
"learning_rate": 1.185585702120515e-05, |
|
"loss": 0.4145, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.3828689370485037, |
|
"grad_norm": 0.4030391275882721, |
|
"learning_rate": 1.1746236517410155e-05, |
|
"loss": 0.416, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.3931888544891642, |
|
"grad_norm": 0.4173198938369751, |
|
"learning_rate": 1.1636399139277998e-05, |
|
"loss": 0.4156, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.4035087719298245, |
|
"grad_norm": 0.43669000267982483, |
|
"learning_rate": 1.1526358528092861e-05, |
|
"loss": 0.4159, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.413828689370485, |
|
"grad_norm": 0.4414576590061188, |
|
"learning_rate": 1.1416128350379503e-05, |
|
"loss": 0.4202, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.4241486068111455, |
|
"grad_norm": 0.43647974729537964, |
|
"learning_rate": 1.1305722296205968e-05, |
|
"loss": 0.4166, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.434468524251806, |
|
"grad_norm": 0.4320119619369507, |
|
"learning_rate": 1.1195154077483313e-05, |
|
"loss": 0.4178, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.4447884416924666, |
|
"grad_norm": 0.43462061882019043, |
|
"learning_rate": 1.1084437426262666e-05, |
|
"loss": 0.4179, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.4551083591331269, |
|
"grad_norm": 0.4457741677761078, |
|
"learning_rate": 1.097358609302978e-05, |
|
"loss": 0.4213, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.4654282765737874, |
|
"grad_norm": 0.42949527502059937, |
|
"learning_rate": 1.0862613844997272e-05, |
|
"loss": 0.4157, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.475748194014448, |
|
"grad_norm": 0.4295817017555237, |
|
"learning_rate": 1.0751534464394809e-05, |
|
"loss": 0.4118, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.4860681114551084, |
|
"grad_norm": 0.46553367376327515, |
|
"learning_rate": 1.0640361746757413e-05, |
|
"loss": 0.4133, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.496388028895769, |
|
"grad_norm": 0.4500296115875244, |
|
"learning_rate": 1.0529109499212137e-05, |
|
"loss": 0.4189, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.5067079463364292, |
|
"grad_norm": 0.44270554184913635, |
|
"learning_rate": 1.0417791538763269e-05, |
|
"loss": 0.4157, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.5170278637770898, |
|
"grad_norm": 0.42945072054862976, |
|
"learning_rate": 1.0306421690576318e-05, |
|
"loss": 0.4171, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.5273477812177503, |
|
"grad_norm": 0.4453894793987274, |
|
"learning_rate": 1.0195013786261017e-05, |
|
"loss": 0.4154, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.5376676986584106, |
|
"grad_norm": 0.4381779730319977, |
|
"learning_rate": 1.0083581662153488e-05, |
|
"loss": 0.4127, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.5479876160990713, |
|
"grad_norm": 0.42887642979621887, |
|
"learning_rate": 9.972139157597836e-06, |
|
"loss": 0.4205, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.5583075335397316, |
|
"grad_norm": 0.43492448329925537, |
|
"learning_rate": 9.86070011322737e-06, |
|
"loss": 0.4165, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.5686274509803921, |
|
"grad_norm": 0.4179580509662628, |
|
"learning_rate": 9.749278369245658e-06, |
|
"loss": 0.4148, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.5789473684210527, |
|
"grad_norm": 0.4366095960140228, |
|
"learning_rate": 9.637887763707649e-06, |
|
"loss": 0.4122, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.589267285861713, |
|
"grad_norm": 0.4154805541038513, |
|
"learning_rate": 9.52654213080103e-06, |
|
"loss": 0.4158, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.5995872033023737, |
|
"grad_norm": 0.4285624325275421, |
|
"learning_rate": 9.415255299128115e-06, |
|
"loss": 0.4092, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.609907120743034, |
|
"grad_norm": 1.8667707443237305, |
|
"learning_rate": 9.304041089988367e-06, |
|
"loss": 0.4185, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.6202270381836945, |
|
"grad_norm": 0.4168296456336975, |
|
"learning_rate": 9.192913315661887e-06, |
|
"loss": 0.4192, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.630546955624355, |
|
"grad_norm": 0.4320428967475891, |
|
"learning_rate": 9.081885777693969e-06, |
|
"loss": 0.4107, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.6408668730650153, |
|
"grad_norm": 0.4265373647212982, |
|
"learning_rate": 8.97097226518103e-06, |
|
"loss": 0.4118, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.651186790505676, |
|
"grad_norm": 0.4222305417060852, |
|
"learning_rate": 8.860186553058066e-06, |
|
"loss": 0.409, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.6615067079463364, |
|
"grad_norm": 0.4396103024482727, |
|
"learning_rate": 8.749542400387861e-06, |
|
"loss": 0.409, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.671826625386997, |
|
"grad_norm": 0.4199695885181427, |
|
"learning_rate": 8.639053548652183e-06, |
|
"loss": 0.4048, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.6821465428276574, |
|
"grad_norm": 0.5912848114967346, |
|
"learning_rate": 8.528733720045162e-06, |
|
"loss": 0.4144, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.6924664602683177, |
|
"grad_norm": 0.41043537855148315, |
|
"learning_rate": 8.418596615769048e-06, |
|
"loss": 0.408, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.7027863777089784, |
|
"grad_norm": 0.41550594568252563, |
|
"learning_rate": 8.308655914332599e-06, |
|
"loss": 0.4129, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.7131062951496387, |
|
"grad_norm": 0.4488890469074249, |
|
"learning_rate": 8.198925269852251e-06, |
|
"loss": 0.4077, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.7234262125902993, |
|
"grad_norm": 0.3980523347854614, |
|
"learning_rate": 8.089418310356379e-06, |
|
"loss": 0.4086, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.7337461300309598, |
|
"grad_norm": 0.43064674735069275, |
|
"learning_rate": 7.980148636092719e-06, |
|
"loss": 0.4097, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.74406604747162, |
|
"grad_norm": 0.4358462393283844, |
|
"learning_rate": 7.871129817839304e-06, |
|
"loss": 0.4032, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.7543859649122808, |
|
"grad_norm": 0.42179906368255615, |
|
"learning_rate": 7.762375395219045e-06, |
|
"loss": 0.4142, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.7647058823529411, |
|
"grad_norm": 0.44796890020370483, |
|
"learning_rate": 7.653898875018151e-06, |
|
"loss": 0.4081, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.7750257997936016, |
|
"grad_norm": 0.40410351753234863, |
|
"learning_rate": 7.545713729508673e-06, |
|
"loss": 0.4066, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.7853457172342622, |
|
"grad_norm": 0.4304727613925934, |
|
"learning_rate": 7.437833394775283e-06, |
|
"loss": 0.408, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.7956656346749225, |
|
"grad_norm": 0.4234323799610138, |
|
"learning_rate": 7.330271269046614e-06, |
|
"loss": 0.4111, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.8059855521155832, |
|
"grad_norm": 0.42498621344566345, |
|
"learning_rate": 7.223040711031225e-06, |
|
"loss": 0.4084, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.8163054695562435, |
|
"grad_norm": 0.41671907901763916, |
|
"learning_rate": 7.116155038258531e-06, |
|
"loss": 0.4069, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.826625386996904, |
|
"grad_norm": 0.42239654064178467, |
|
"learning_rate": 7.009627525424836e-06, |
|
"loss": 0.4072, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.8369453044375645, |
|
"grad_norm": 0.41593438386917114, |
|
"learning_rate": 6.903471402744662e-06, |
|
"loss": 0.4038, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.8472652218782248, |
|
"grad_norm": 0.4209601879119873, |
|
"learning_rate": 6.797699854307631e-06, |
|
"loss": 0.401, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.8575851393188856, |
|
"grad_norm": 0.4174908995628357, |
|
"learning_rate": 6.692326016441054e-06, |
|
"loss": 0.4019, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.8679050567595459, |
|
"grad_norm": 0.42820972204208374, |
|
"learning_rate": 6.587362976078463e-06, |
|
"loss": 0.4013, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.8782249742002064, |
|
"grad_norm": 0.41011422872543335, |
|
"learning_rate": 6.48282376913429e-06, |
|
"loss": 0.4074, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.888544891640867, |
|
"grad_norm": 0.41199740767478943, |
|
"learning_rate": 6.3787213788848376e-06, |
|
"loss": 0.4073, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.8988648090815272, |
|
"grad_norm": 0.4312268793582916, |
|
"learning_rate": 6.2750687343558535e-06, |
|
"loss": 0.4039, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.909184726522188, |
|
"grad_norm": 0.42642372846603394, |
|
"learning_rate": 6.171878708716778e-06, |
|
"loss": 0.399, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.9195046439628483, |
|
"grad_norm": 0.42894232273101807, |
|
"learning_rate": 6.069164117681978e-06, |
|
"loss": 0.4013, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.9298245614035088, |
|
"grad_norm": 0.40852513909339905, |
|
"learning_rate": 5.966937717919072e-06, |
|
"loss": 0.3988, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.9401444788441693, |
|
"grad_norm": 0.41867920756340027, |
|
"learning_rate": 5.86521220546463e-06, |
|
"loss": 0.4033, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.9504643962848296, |
|
"grad_norm": 0.41800442337989807, |
|
"learning_rate": 5.764000214147389e-06, |
|
"loss": 0.4033, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.9607843137254903, |
|
"grad_norm": 0.39704328775405884, |
|
"learning_rate": 5.663314314019172e-06, |
|
"loss": 0.4031, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.9711042311661506, |
|
"grad_norm": 0.42239245772361755, |
|
"learning_rate": 5.563167009793775e-06, |
|
"loss": 0.4045, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.9814241486068112, |
|
"grad_norm": 0.4174403250217438, |
|
"learning_rate": 5.463570739293906e-06, |
|
"loss": 0.404, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.9917440660474717, |
|
"grad_norm": 0.42525631189346313, |
|
"learning_rate": 5.364537871906488e-06, |
|
"loss": 0.4016, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.002063983488132, |
|
"grad_norm": 0.5702281594276428, |
|
"learning_rate": 5.2660807070464435e-06, |
|
"loss": 0.3937, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.0123839009287927, |
|
"grad_norm": 0.46447283029556274, |
|
"learning_rate": 5.16821147262915e-06, |
|
"loss": 0.3525, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.022703818369453, |
|
"grad_norm": 0.42777228355407715, |
|
"learning_rate": 5.070942323551802e-06, |
|
"loss": 0.348, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.0330237358101133, |
|
"grad_norm": 0.4387170374393463, |
|
"learning_rate": 4.974285340183819e-06, |
|
"loss": 0.3402, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.043343653250774, |
|
"grad_norm": 0.43583065271377563, |
|
"learning_rate": 4.878252526866541e-06, |
|
"loss": 0.3448, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.0536635706914343, |
|
"grad_norm": 0.4168053865432739, |
|
"learning_rate": 4.782855810422314e-06, |
|
"loss": 0.3459, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.063983488132095, |
|
"grad_norm": 0.4188889265060425, |
|
"learning_rate": 4.688107038673269e-06, |
|
"loss": 0.3413, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.0743034055727554, |
|
"grad_norm": 0.4150793254375458, |
|
"learning_rate": 4.594017978969851e-06, |
|
"loss": 0.3448, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.0846233230134157, |
|
"grad_norm": 0.40199384093284607, |
|
"learning_rate": 4.50060031672939e-06, |
|
"loss": 0.3452, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.0949432404540764, |
|
"grad_norm": 0.4228343069553375, |
|
"learning_rate": 4.407865653984819e-06, |
|
"loss": 0.3482, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.1052631578947367, |
|
"grad_norm": 0.41736671328544617, |
|
"learning_rate": 4.315825507943746e-06, |
|
"loss": 0.3419, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.1155830753353975, |
|
"grad_norm": 0.41220712661743164, |
|
"learning_rate": 4.224491309558092e-06, |
|
"loss": 0.3406, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.1259029927760578, |
|
"grad_norm": 0.4050326347351074, |
|
"learning_rate": 4.133874402104404e-06, |
|
"loss": 0.3437, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.136222910216718, |
|
"grad_norm": 0.43302711844444275, |
|
"learning_rate": 4.043986039775074e-06, |
|
"loss": 0.3414, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.146542827657379, |
|
"grad_norm": 0.411955863237381, |
|
"learning_rate": 3.954837386280642e-06, |
|
"loss": 0.3438, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.156862745098039, |
|
"grad_norm": 0.41292956471443176, |
|
"learning_rate": 3.8664395134632834e-06, |
|
"loss": 0.3449, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.1671826625387, |
|
"grad_norm": 0.42248064279556274, |
|
"learning_rate": 3.77880339992177e-06, |
|
"loss": 0.345, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.17750257997936, |
|
"grad_norm": 0.4186584949493408, |
|
"learning_rate": 3.6919399296479553e-06, |
|
"loss": 0.34, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.1878224974200204, |
|
"grad_norm": 0.411190003156662, |
|
"learning_rate": 3.605859890675043e-06, |
|
"loss": 0.3455, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.198142414860681, |
|
"grad_norm": 0.4268835186958313, |
|
"learning_rate": 3.520573973737775e-06, |
|
"loss": 0.3422, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.2084623323013415, |
|
"grad_norm": 0.42581525444984436, |
|
"learning_rate": 3.4360927709446813e-06, |
|
"loss": 0.3461, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.218782249742002, |
|
"grad_norm": 0.41429799795150757, |
|
"learning_rate": 3.3524267744625793e-06, |
|
"loss": 0.3444, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.2291021671826625, |
|
"grad_norm": 0.40858539938926697, |
|
"learning_rate": 3.2695863752135203e-06, |
|
"loss": 0.3437, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.239422084623323, |
|
"grad_norm": 0.4122130870819092, |
|
"learning_rate": 3.1875818615842756e-06, |
|
"loss": 0.3424, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.2497420020639836, |
|
"grad_norm": 0.4092227518558502, |
|
"learning_rate": 3.1064234181485574e-06, |
|
"loss": 0.3436, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.260061919504644, |
|
"grad_norm": 0.4187150299549103, |
|
"learning_rate": 3.0261211244021527e-06, |
|
"loss": 0.3457, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.2703818369453046, |
|
"grad_norm": 0.411109060049057, |
|
"learning_rate": 2.9466849535111052e-06, |
|
"loss": 0.3407, |
|
"step": 2200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2907, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0079834794808784e+20, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|