|
{ |
|
"best_metric": 0.9376445510679586, |
|
"best_model_checkpoint": "./phobert_results_v2/checkpoint-12145", |
|
"epoch": 7.0, |
|
"eval_steps": 500, |
|
"global_step": 12145, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02881844380403458, |
|
"grad_norm": 4.723288059234619, |
|
"learning_rate": 1.994236311239193e-05, |
|
"loss": 1.9047, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05763688760806916, |
|
"grad_norm": 32.87104034423828, |
|
"learning_rate": 1.9884726224783863e-05, |
|
"loss": 1.7269, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08645533141210375, |
|
"grad_norm": 5.206538677215576, |
|
"learning_rate": 1.9827089337175795e-05, |
|
"loss": 1.6865, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11527377521613832, |
|
"grad_norm": 52.5133056640625, |
|
"learning_rate": 1.9769452449567724e-05, |
|
"loss": 1.633, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1440922190201729, |
|
"grad_norm": 12.493169784545898, |
|
"learning_rate": 1.9711815561959656e-05, |
|
"loss": 1.553, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1729106628242075, |
|
"grad_norm": 7.675703048706055, |
|
"learning_rate": 1.9654178674351588e-05, |
|
"loss": 1.4131, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2017291066282421, |
|
"grad_norm": 5.984454154968262, |
|
"learning_rate": 1.9596541786743517e-05, |
|
"loss": 1.3606, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.23054755043227665, |
|
"grad_norm": 9.403727531433105, |
|
"learning_rate": 1.953890489913545e-05, |
|
"loss": 1.2202, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.25936599423631124, |
|
"grad_norm": 7.890481472015381, |
|
"learning_rate": 1.9481268011527378e-05, |
|
"loss": 1.186, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.2881844380403458, |
|
"grad_norm": 12.102771759033203, |
|
"learning_rate": 1.942363112391931e-05, |
|
"loss": 1.0865, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3170028818443804, |
|
"grad_norm": 17.669885635375977, |
|
"learning_rate": 1.936599423631124e-05, |
|
"loss": 1.124, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.345821325648415, |
|
"grad_norm": 10.14146614074707, |
|
"learning_rate": 1.930835734870317e-05, |
|
"loss": 1.1307, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.3746397694524496, |
|
"grad_norm": 9.363780975341797, |
|
"learning_rate": 1.9250720461095104e-05, |
|
"loss": 1.0582, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.4034582132564842, |
|
"grad_norm": 22.122907638549805, |
|
"learning_rate": 1.9193083573487033e-05, |
|
"loss": 1.007, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4322766570605187, |
|
"grad_norm": 22.921249389648438, |
|
"learning_rate": 1.9135446685878965e-05, |
|
"loss": 1.0046, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.4610951008645533, |
|
"grad_norm": 13.880660057067871, |
|
"learning_rate": 1.9077809798270894e-05, |
|
"loss": 0.9786, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.4899135446685879, |
|
"grad_norm": 13.75207805633545, |
|
"learning_rate": 1.9020172910662826e-05, |
|
"loss": 0.9166, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.5187319884726225, |
|
"grad_norm": 15.701948165893555, |
|
"learning_rate": 1.8962536023054755e-05, |
|
"loss": 0.8942, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.547550432276657, |
|
"grad_norm": 13.106974601745605, |
|
"learning_rate": 1.8904899135446687e-05, |
|
"loss": 0.8905, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5763688760806917, |
|
"grad_norm": 13.854475021362305, |
|
"learning_rate": 1.884726224783862e-05, |
|
"loss": 0.9089, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6051873198847262, |
|
"grad_norm": 14.22427749633789, |
|
"learning_rate": 1.878962536023055e-05, |
|
"loss": 0.8688, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.6340057636887608, |
|
"grad_norm": 14.088136672973633, |
|
"learning_rate": 1.873198847262248e-05, |
|
"loss": 0.859, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.6628242074927954, |
|
"grad_norm": 15.746428489685059, |
|
"learning_rate": 1.867435158501441e-05, |
|
"loss": 0.8356, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.69164265129683, |
|
"grad_norm": 10.937832832336426, |
|
"learning_rate": 1.861671469740634e-05, |
|
"loss": 0.8699, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7204610951008645, |
|
"grad_norm": 14.396600723266602, |
|
"learning_rate": 1.855907780979827e-05, |
|
"loss": 0.7789, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.7492795389048992, |
|
"grad_norm": 11.801300048828125, |
|
"learning_rate": 1.8501440922190203e-05, |
|
"loss": 0.8713, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.7780979827089337, |
|
"grad_norm": 10.435481071472168, |
|
"learning_rate": 1.8443804034582135e-05, |
|
"loss": 0.7961, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.8069164265129684, |
|
"grad_norm": 11.2293062210083, |
|
"learning_rate": 1.8386167146974067e-05, |
|
"loss": 0.8397, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.8357348703170029, |
|
"grad_norm": 12.809613227844238, |
|
"learning_rate": 1.8328530259365996e-05, |
|
"loss": 0.8297, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.8645533141210374, |
|
"grad_norm": 14.791847229003906, |
|
"learning_rate": 1.8270893371757928e-05, |
|
"loss": 0.7709, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8933717579250721, |
|
"grad_norm": 14.587478637695312, |
|
"learning_rate": 1.8213256484149857e-05, |
|
"loss": 0.7552, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.9221902017291066, |
|
"grad_norm": 16.79636001586914, |
|
"learning_rate": 1.815561959654179e-05, |
|
"loss": 0.7937, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.9510086455331412, |
|
"grad_norm": 8.510680198669434, |
|
"learning_rate": 1.8097982708933718e-05, |
|
"loss": 0.7181, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.9798270893371758, |
|
"grad_norm": 20.466522216796875, |
|
"learning_rate": 1.804034582132565e-05, |
|
"loss": 0.712, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7955036748811067, |
|
"eval_f1_macro": 0.795530535937033, |
|
"eval_f1_weighted": 0.795536244762841, |
|
"eval_loss": 0.6264312267303467, |
|
"eval_precision_macro": 0.7978076489526655, |
|
"eval_precision_weighted": 0.797819457255313, |
|
"eval_recall_macro": 0.7955041514180622, |
|
"eval_recall_weighted": 0.7955036748811067, |
|
"eval_runtime": 29.7702, |
|
"eval_samples_per_second": 233.086, |
|
"eval_steps_per_second": 14.578, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 1.0086455331412103, |
|
"grad_norm": 17.28911018371582, |
|
"learning_rate": 1.7982708933717582e-05, |
|
"loss": 0.6512, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.037463976945245, |
|
"grad_norm": 28.79306411743164, |
|
"learning_rate": 1.792507204610951e-05, |
|
"loss": 0.6036, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.0662824207492796, |
|
"grad_norm": 13.444112777709961, |
|
"learning_rate": 1.7867435158501444e-05, |
|
"loss": 0.552, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.0951008645533142, |
|
"grad_norm": 22.068620681762695, |
|
"learning_rate": 1.7809798270893372e-05, |
|
"loss": 0.577, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.1239193083573487, |
|
"grad_norm": 13.942420959472656, |
|
"learning_rate": 1.7752161383285305e-05, |
|
"loss": 0.5573, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.1527377521613833, |
|
"grad_norm": 13.652227401733398, |
|
"learning_rate": 1.7694524495677234e-05, |
|
"loss": 0.5401, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.181556195965418, |
|
"grad_norm": 22.252243041992188, |
|
"learning_rate": 1.7636887608069166e-05, |
|
"loss": 0.558, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.2103746397694524, |
|
"grad_norm": 17.13372802734375, |
|
"learning_rate": 1.7579250720461095e-05, |
|
"loss": 0.514, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.239193083573487, |
|
"grad_norm": 17.164276123046875, |
|
"learning_rate": 1.7521613832853027e-05, |
|
"loss": 0.5137, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.2680115273775217, |
|
"grad_norm": 14.449616432189941, |
|
"learning_rate": 1.746397694524496e-05, |
|
"loss": 0.5, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.2968299711815563, |
|
"grad_norm": 11.61601734161377, |
|
"learning_rate": 1.7406340057636888e-05, |
|
"loss": 0.5071, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.3256484149855907, |
|
"grad_norm": 20.6407527923584, |
|
"learning_rate": 1.734870317002882e-05, |
|
"loss": 0.4803, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.3544668587896254, |
|
"grad_norm": 18.60149383544922, |
|
"learning_rate": 1.729106628242075e-05, |
|
"loss": 0.4967, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.38328530259366, |
|
"grad_norm": 22.12944793701172, |
|
"learning_rate": 1.723342939481268e-05, |
|
"loss": 0.4652, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.4121037463976944, |
|
"grad_norm": 18.460689544677734, |
|
"learning_rate": 1.717579250720461e-05, |
|
"loss": 0.4417, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.440922190201729, |
|
"grad_norm": 19.452363967895508, |
|
"learning_rate": 1.7118155619596542e-05, |
|
"loss": 0.5082, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.4697406340057637, |
|
"grad_norm": 41.46109390258789, |
|
"learning_rate": 1.7060518731988475e-05, |
|
"loss": 0.463, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.4985590778097984, |
|
"grad_norm": 25.584379196166992, |
|
"learning_rate": 1.7002881844380407e-05, |
|
"loss": 0.4788, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.527377521613833, |
|
"grad_norm": 16.92909049987793, |
|
"learning_rate": 1.6945244956772336e-05, |
|
"loss": 0.4752, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.5561959654178674, |
|
"grad_norm": 10.342531204223633, |
|
"learning_rate": 1.6887608069164268e-05, |
|
"loss": 0.4848, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.585014409221902, |
|
"grad_norm": 15.596243858337402, |
|
"learning_rate": 1.6829971181556197e-05, |
|
"loss": 0.5158, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.6138328530259365, |
|
"grad_norm": 17.031354904174805, |
|
"learning_rate": 1.6772334293948126e-05, |
|
"loss": 0.4577, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.6426512968299711, |
|
"grad_norm": 13.550045013427734, |
|
"learning_rate": 1.6714697406340058e-05, |
|
"loss": 0.4947, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.6714697406340058, |
|
"grad_norm": 10.102880477905273, |
|
"learning_rate": 1.665706051873199e-05, |
|
"loss": 0.4681, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.7002881844380404, |
|
"grad_norm": 10.968811988830566, |
|
"learning_rate": 1.6599423631123922e-05, |
|
"loss": 0.452, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.729106628242075, |
|
"grad_norm": 4.670314311981201, |
|
"learning_rate": 1.654178674351585e-05, |
|
"loss": 0.4346, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.7579250720461095, |
|
"grad_norm": 28.008899688720703, |
|
"learning_rate": 1.6484149855907783e-05, |
|
"loss": 0.4404, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.7867435158501441, |
|
"grad_norm": 27.77347183227539, |
|
"learning_rate": 1.6426512968299712e-05, |
|
"loss": 0.4547, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.8155619596541785, |
|
"grad_norm": 21.949289321899414, |
|
"learning_rate": 1.6368876080691644e-05, |
|
"loss": 0.4924, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.8443804034582132, |
|
"grad_norm": 2.8766281604766846, |
|
"learning_rate": 1.6311239193083573e-05, |
|
"loss": 0.3776, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.8731988472622478, |
|
"grad_norm": 38.812625885009766, |
|
"learning_rate": 1.6253602305475506e-05, |
|
"loss": 0.4176, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.9020172910662825, |
|
"grad_norm": 24.132482528686523, |
|
"learning_rate": 1.6195965417867438e-05, |
|
"loss": 0.4407, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.9308357348703171, |
|
"grad_norm": 19.72426414489746, |
|
"learning_rate": 1.613832853025937e-05, |
|
"loss": 0.4503, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.9596541786743515, |
|
"grad_norm": 16.077899932861328, |
|
"learning_rate": 1.60806916426513e-05, |
|
"loss": 0.4244, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.9884726224783862, |
|
"grad_norm": 21.258529663085938, |
|
"learning_rate": 1.6023054755043228e-05, |
|
"loss": 0.4161, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.87101887880098, |
|
"eval_f1_macro": 0.8705600204306078, |
|
"eval_f1_weighted": 0.8705702821418305, |
|
"eval_loss": 0.43536442518234253, |
|
"eval_precision_macro": 0.8749786652431168, |
|
"eval_precision_weighted": 0.8749977247467932, |
|
"eval_recall_macro": 0.8710178763247105, |
|
"eval_recall_weighted": 0.87101887880098, |
|
"eval_runtime": 29.9836, |
|
"eval_samples_per_second": 231.426, |
|
"eval_steps_per_second": 14.475, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 2.0172910662824206, |
|
"grad_norm": 13.94642448425293, |
|
"learning_rate": 1.596541786743516e-05, |
|
"loss": 0.321, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.0461095100864553, |
|
"grad_norm": 17.44521713256836, |
|
"learning_rate": 1.590778097982709e-05, |
|
"loss": 0.2707, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.07492795389049, |
|
"grad_norm": 12.439199447631836, |
|
"learning_rate": 1.585014409221902e-05, |
|
"loss": 0.2606, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.1037463976945245, |
|
"grad_norm": 10.950318336486816, |
|
"learning_rate": 1.5792507204610953e-05, |
|
"loss": 0.2601, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.132564841498559, |
|
"grad_norm": 20.581911087036133, |
|
"learning_rate": 1.5734870317002882e-05, |
|
"loss": 0.265, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.161383285302594, |
|
"grad_norm": 9.320990562438965, |
|
"learning_rate": 1.5677233429394814e-05, |
|
"loss": 0.297, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.1902017291066285, |
|
"grad_norm": 20.260753631591797, |
|
"learning_rate": 1.5619596541786747e-05, |
|
"loss": 0.3121, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.2190201729106627, |
|
"grad_norm": 55.92860412597656, |
|
"learning_rate": 1.5561959654178675e-05, |
|
"loss": 0.2295, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.2478386167146973, |
|
"grad_norm": 9.637039184570312, |
|
"learning_rate": 1.5504322766570608e-05, |
|
"loss": 0.2558, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.276657060518732, |
|
"grad_norm": 13.105185508728027, |
|
"learning_rate": 1.5446685878962537e-05, |
|
"loss": 0.2432, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.3054755043227666, |
|
"grad_norm": 17.233076095581055, |
|
"learning_rate": 1.538904899135447e-05, |
|
"loss": 0.2817, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.3342939481268012, |
|
"grad_norm": 14.211281776428223, |
|
"learning_rate": 1.5331412103746398e-05, |
|
"loss": 0.2663, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.363112391930836, |
|
"grad_norm": 30.84634780883789, |
|
"learning_rate": 1.527377521613833e-05, |
|
"loss": 0.3084, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.39193083573487, |
|
"grad_norm": 29.224945068359375, |
|
"learning_rate": 1.521613832853026e-05, |
|
"loss": 0.3109, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.4207492795389047, |
|
"grad_norm": 3.674872398376465, |
|
"learning_rate": 1.5158501440922191e-05, |
|
"loss": 0.2346, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.4495677233429394, |
|
"grad_norm": 5.084238052368164, |
|
"learning_rate": 1.5100864553314123e-05, |
|
"loss": 0.2227, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.478386167146974, |
|
"grad_norm": 21.61268424987793, |
|
"learning_rate": 1.5043227665706052e-05, |
|
"loss": 0.2426, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.5072046109510087, |
|
"grad_norm": 12.604729652404785, |
|
"learning_rate": 1.4985590778097984e-05, |
|
"loss": 0.325, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.5360230547550433, |
|
"grad_norm": 25.42458152770996, |
|
"learning_rate": 1.4927953890489915e-05, |
|
"loss": 0.3394, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.564841498559078, |
|
"grad_norm": 9.900081634521484, |
|
"learning_rate": 1.4870317002881847e-05, |
|
"loss": 0.2565, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.5936599423631126, |
|
"grad_norm": 14.56777572631836, |
|
"learning_rate": 1.4812680115273776e-05, |
|
"loss": 0.2542, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.6224783861671472, |
|
"grad_norm": 27.645551681518555, |
|
"learning_rate": 1.4755043227665706e-05, |
|
"loss": 0.2753, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.6512968299711814, |
|
"grad_norm": 24.675256729125977, |
|
"learning_rate": 1.4697406340057639e-05, |
|
"loss": 0.2648, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.680115273775216, |
|
"grad_norm": 19.5800838470459, |
|
"learning_rate": 1.4639769452449568e-05, |
|
"loss": 0.2441, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.7089337175792507, |
|
"grad_norm": 32.76830291748047, |
|
"learning_rate": 1.45821325648415e-05, |
|
"loss": 0.2748, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.7377521613832854, |
|
"grad_norm": 9.661020278930664, |
|
"learning_rate": 1.452449567723343e-05, |
|
"loss": 0.2721, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.76657060518732, |
|
"grad_norm": 25.79545021057129, |
|
"learning_rate": 1.4466858789625363e-05, |
|
"loss": 0.2743, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.795389048991354, |
|
"grad_norm": 30.077226638793945, |
|
"learning_rate": 1.4409221902017291e-05, |
|
"loss": 0.2314, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 2.824207492795389, |
|
"grad_norm": 11.516585350036621, |
|
"learning_rate": 1.4351585014409224e-05, |
|
"loss": 0.2098, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.8530259365994235, |
|
"grad_norm": 12.071518898010254, |
|
"learning_rate": 1.4293948126801154e-05, |
|
"loss": 0.2447, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 2.881844380403458, |
|
"grad_norm": 1.786498785018921, |
|
"learning_rate": 1.4236311239193086e-05, |
|
"loss": 0.2374, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.910662824207493, |
|
"grad_norm": 31.069623947143555, |
|
"learning_rate": 1.4178674351585015e-05, |
|
"loss": 0.2667, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 2.9394812680115274, |
|
"grad_norm": 2.980510950088501, |
|
"learning_rate": 1.4121037463976946e-05, |
|
"loss": 0.28, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.968299711815562, |
|
"grad_norm": 18.496265411376953, |
|
"learning_rate": 1.4063400576368878e-05, |
|
"loss": 0.1831, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 2.9971181556195967, |
|
"grad_norm": 33.23817825317383, |
|
"learning_rate": 1.4005763688760807e-05, |
|
"loss": 0.3026, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9050295431618389, |
|
"eval_f1_macro": 0.9049667283676557, |
|
"eval_f1_weighted": 0.9049779714917217, |
|
"eval_loss": 0.3607948124408722, |
|
"eval_precision_macro": 0.9065337153272169, |
|
"eval_precision_weighted": 0.9065450713204785, |
|
"eval_recall_macro": 0.9050188737810804, |
|
"eval_recall_weighted": 0.9050295431618389, |
|
"eval_runtime": 29.9421, |
|
"eval_samples_per_second": 231.747, |
|
"eval_steps_per_second": 14.495, |
|
"step": 5205 |
|
}, |
|
{ |
|
"epoch": 3.025936599423631, |
|
"grad_norm": 14.13509750366211, |
|
"learning_rate": 1.3948126801152739e-05, |
|
"loss": 0.1617, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 3.0547550432276656, |
|
"grad_norm": 0.5927883982658386, |
|
"learning_rate": 1.389048991354467e-05, |
|
"loss": 0.1693, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 3.0835734870317, |
|
"grad_norm": 14.065008163452148, |
|
"learning_rate": 1.3832853025936602e-05, |
|
"loss": 0.128, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 3.112391930835735, |
|
"grad_norm": 40.97966003417969, |
|
"learning_rate": 1.377521613832853e-05, |
|
"loss": 0.1558, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 3.1412103746397695, |
|
"grad_norm": 10.36765193939209, |
|
"learning_rate": 1.3717579250720463e-05, |
|
"loss": 0.207, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 3.170028818443804, |
|
"grad_norm": 0.5478718876838684, |
|
"learning_rate": 1.3659942363112394e-05, |
|
"loss": 0.1802, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.1988472622478388, |
|
"grad_norm": 0.6302068829536438, |
|
"learning_rate": 1.3602305475504324e-05, |
|
"loss": 0.14, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 3.227665706051873, |
|
"grad_norm": 2.4789652824401855, |
|
"learning_rate": 1.3544668587896255e-05, |
|
"loss": 0.1137, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 3.2564841498559076, |
|
"grad_norm": 22.161422729492188, |
|
"learning_rate": 1.3487031700288185e-05, |
|
"loss": 0.2182, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 3.2853025936599423, |
|
"grad_norm": 28.67848014831543, |
|
"learning_rate": 1.3429394812680117e-05, |
|
"loss": 0.1741, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 3.314121037463977, |
|
"grad_norm": 13.24758243560791, |
|
"learning_rate": 1.3371757925072046e-05, |
|
"loss": 0.2205, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 3.3429394812680115, |
|
"grad_norm": 0.6006250977516174, |
|
"learning_rate": 1.3314121037463979e-05, |
|
"loss": 0.1419, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 3.371757925072046, |
|
"grad_norm": 4.931090831756592, |
|
"learning_rate": 1.3256484149855909e-05, |
|
"loss": 0.1466, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 3.400576368876081, |
|
"grad_norm": 17.918506622314453, |
|
"learning_rate": 1.319884726224784e-05, |
|
"loss": 0.1437, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 3.4293948126801155, |
|
"grad_norm": 18.44112205505371, |
|
"learning_rate": 1.314121037463977e-05, |
|
"loss": 0.1923, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 3.4582132564841497, |
|
"grad_norm": 36.937828063964844, |
|
"learning_rate": 1.3083573487031702e-05, |
|
"loss": 0.2021, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.4870317002881843, |
|
"grad_norm": 0.9251816868782043, |
|
"learning_rate": 1.3025936599423631e-05, |
|
"loss": 0.2166, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 3.515850144092219, |
|
"grad_norm": 29.732206344604492, |
|
"learning_rate": 1.2968299711815563e-05, |
|
"loss": 0.1772, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 3.5446685878962536, |
|
"grad_norm": 8.258246421813965, |
|
"learning_rate": 1.2910662824207494e-05, |
|
"loss": 0.2043, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 3.5734870317002883, |
|
"grad_norm": 0.8275717496871948, |
|
"learning_rate": 1.2853025936599423e-05, |
|
"loss": 0.1502, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 3.602305475504323, |
|
"grad_norm": 27.115209579467773, |
|
"learning_rate": 1.2795389048991355e-05, |
|
"loss": 0.1733, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 3.631123919308357, |
|
"grad_norm": 0.7643899321556091, |
|
"learning_rate": 1.2737752161383286e-05, |
|
"loss": 0.1349, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 3.6599423631123917, |
|
"grad_norm": 33.05510330200195, |
|
"learning_rate": 1.2680115273775218e-05, |
|
"loss": 0.1526, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 3.6887608069164264, |
|
"grad_norm": 13.939464569091797, |
|
"learning_rate": 1.2622478386167147e-05, |
|
"loss": 0.1616, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 3.717579250720461, |
|
"grad_norm": 2.4451797008514404, |
|
"learning_rate": 1.2564841498559079e-05, |
|
"loss": 0.1751, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 3.7463976945244957, |
|
"grad_norm": 17.99618148803711, |
|
"learning_rate": 1.250720461095101e-05, |
|
"loss": 0.2061, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.7752161383285303, |
|
"grad_norm": 19.82054328918457, |
|
"learning_rate": 1.2449567723342942e-05, |
|
"loss": 0.2168, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 3.804034582132565, |
|
"grad_norm": 17.200483322143555, |
|
"learning_rate": 1.239193083573487e-05, |
|
"loss": 0.1849, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 3.8328530259365996, |
|
"grad_norm": 9.288723945617676, |
|
"learning_rate": 1.2334293948126803e-05, |
|
"loss": 0.1902, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 3.8616714697406342, |
|
"grad_norm": 52.95684051513672, |
|
"learning_rate": 1.2276657060518733e-05, |
|
"loss": 0.1566, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 3.8904899135446684, |
|
"grad_norm": 22.813720703125, |
|
"learning_rate": 1.2219020172910662e-05, |
|
"loss": 0.1212, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 3.919308357348703, |
|
"grad_norm": 3.377829074859619, |
|
"learning_rate": 1.2161383285302594e-05, |
|
"loss": 0.1147, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 3.9481268011527377, |
|
"grad_norm": 0.6791939735412598, |
|
"learning_rate": 1.2103746397694525e-05, |
|
"loss": 0.1678, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 3.9769452449567724, |
|
"grad_norm": 12.582691192626953, |
|
"learning_rate": 1.2046109510086457e-05, |
|
"loss": 0.2679, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9279435077100446, |
|
"eval_f1_macro": 0.927584474052213, |
|
"eval_f1_weighted": 0.927592833052565, |
|
"eval_loss": 0.35773107409477234, |
|
"eval_precision_macro": 0.9276877579070416, |
|
"eval_precision_weighted": 0.9276960791160526, |
|
"eval_recall_macro": 0.9279352304379973, |
|
"eval_recall_weighted": 0.9279435077100446, |
|
"eval_runtime": 30.2758, |
|
"eval_samples_per_second": 229.193, |
|
"eval_steps_per_second": 14.335, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 4.005763688760807, |
|
"grad_norm": 0.19622278213500977, |
|
"learning_rate": 1.1988472622478386e-05, |
|
"loss": 0.1063, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 4.034582132564841, |
|
"grad_norm": 2.3306796550750732, |
|
"learning_rate": 1.1930835734870318e-05, |
|
"loss": 0.1007, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 4.063400576368876, |
|
"grad_norm": 36.256927490234375, |
|
"learning_rate": 1.1873198847262249e-05, |
|
"loss": 0.0781, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 4.0922190201729105, |
|
"grad_norm": 13.904011726379395, |
|
"learning_rate": 1.1815561959654181e-05, |
|
"loss": 0.1152, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 4.121037463976945, |
|
"grad_norm": 2.325575828552246, |
|
"learning_rate": 1.175792507204611e-05, |
|
"loss": 0.1033, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 4.14985590778098, |
|
"grad_norm": 0.9815200567245483, |
|
"learning_rate": 1.1700288184438042e-05, |
|
"loss": 0.0964, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 4.178674351585014, |
|
"grad_norm": 32.72802734375, |
|
"learning_rate": 1.1642651296829973e-05, |
|
"loss": 0.1008, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 4.207492795389049, |
|
"grad_norm": 19.83048439025879, |
|
"learning_rate": 1.1585014409221902e-05, |
|
"loss": 0.1044, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 4.236311239193084, |
|
"grad_norm": 22.580406188964844, |
|
"learning_rate": 1.1527377521613834e-05, |
|
"loss": 0.1528, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 4.265129682997118, |
|
"grad_norm": 2.180345296859741, |
|
"learning_rate": 1.1469740634005764e-05, |
|
"loss": 0.1219, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 4.293948126801153, |
|
"grad_norm": 8.74466609954834, |
|
"learning_rate": 1.1412103746397697e-05, |
|
"loss": 0.0763, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 4.322766570605188, |
|
"grad_norm": 0.5323246717453003, |
|
"learning_rate": 1.1354466858789625e-05, |
|
"loss": 0.0875, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 4.351585014409222, |
|
"grad_norm": 0.2620614171028137, |
|
"learning_rate": 1.1296829971181558e-05, |
|
"loss": 0.1046, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 4.380403458213257, |
|
"grad_norm": 7.299178123474121, |
|
"learning_rate": 1.1239193083573488e-05, |
|
"loss": 0.1218, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 4.409221902017291, |
|
"grad_norm": 12.042703628540039, |
|
"learning_rate": 1.1181556195965419e-05, |
|
"loss": 0.1083, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 4.438040345821325, |
|
"grad_norm": 0.6515465378761292, |
|
"learning_rate": 1.112391930835735e-05, |
|
"loss": 0.0987, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 4.46685878962536, |
|
"grad_norm": 4.6855316162109375, |
|
"learning_rate": 1.1066282420749282e-05, |
|
"loss": 0.0983, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 4.495677233429395, |
|
"grad_norm": 23.7063045501709, |
|
"learning_rate": 1.100864553314121e-05, |
|
"loss": 0.0989, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 4.524495677233429, |
|
"grad_norm": 1.5553531646728516, |
|
"learning_rate": 1.0951008645533141e-05, |
|
"loss": 0.1309, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 4.553314121037464, |
|
"grad_norm": 32.84361267089844, |
|
"learning_rate": 1.0893371757925073e-05, |
|
"loss": 0.1097, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 4.582132564841499, |
|
"grad_norm": 70.86231231689453, |
|
"learning_rate": 1.0835734870317004e-05, |
|
"loss": 0.1408, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 4.610951008645533, |
|
"grad_norm": 0.31812503933906555, |
|
"learning_rate": 1.0778097982708934e-05, |
|
"loss": 0.1226, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.639769452449568, |
|
"grad_norm": 39.376468658447266, |
|
"learning_rate": 1.0720461095100865e-05, |
|
"loss": 0.1727, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 4.6685878962536025, |
|
"grad_norm": 28.209671020507812, |
|
"learning_rate": 1.0662824207492797e-05, |
|
"loss": 0.1426, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 4.697406340057637, |
|
"grad_norm": 30.422863006591797, |
|
"learning_rate": 1.0605187319884726e-05, |
|
"loss": 0.1224, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 4.726224783861672, |
|
"grad_norm": 0.9933204054832458, |
|
"learning_rate": 1.0547550432276658e-05, |
|
"loss": 0.0922, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 4.755043227665706, |
|
"grad_norm": 0.3882824182510376, |
|
"learning_rate": 1.0489913544668589e-05, |
|
"loss": 0.1163, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 4.78386167146974, |
|
"grad_norm": 0.04565083980560303, |
|
"learning_rate": 1.0432276657060521e-05, |
|
"loss": 0.1135, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 4.812680115273775, |
|
"grad_norm": 34.51498794555664, |
|
"learning_rate": 1.037463976945245e-05, |
|
"loss": 0.0817, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 4.8414985590778095, |
|
"grad_norm": 23.297779083251953, |
|
"learning_rate": 1.031700288184438e-05, |
|
"loss": 0.1033, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 4.870317002881844, |
|
"grad_norm": 0.41480186581611633, |
|
"learning_rate": 1.0259365994236313e-05, |
|
"loss": 0.107, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 4.899135446685879, |
|
"grad_norm": 49.01393508911133, |
|
"learning_rate": 1.0201729106628241e-05, |
|
"loss": 0.1095, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.927953890489913, |
|
"grad_norm": 0.21958515048027039, |
|
"learning_rate": 1.0144092219020174e-05, |
|
"loss": 0.0695, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 4.956772334293948, |
|
"grad_norm": 0.6442630290985107, |
|
"learning_rate": 1.0086455331412104e-05, |
|
"loss": 0.0979, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 4.985590778097983, |
|
"grad_norm": 0.04263289272785187, |
|
"learning_rate": 1.0028818443804036e-05, |
|
"loss": 0.1149, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.933852140077821, |
|
"eval_f1_macro": 0.9335300711231832, |
|
"eval_f1_weighted": 0.9335363129664682, |
|
"eval_loss": 0.3687053918838501, |
|
"eval_precision_macro": 0.9336924382488527, |
|
"eval_precision_weighted": 0.9336961352032257, |
|
"eval_recall_macro": 0.9338433865620074, |
|
"eval_recall_weighted": 0.933852140077821, |
|
"eval_runtime": 29.8558, |
|
"eval_samples_per_second": 232.417, |
|
"eval_steps_per_second": 14.537, |
|
"step": 8675 |
|
}, |
|
{ |
|
"epoch": 5.014409221902017, |
|
"grad_norm": 4.232170581817627, |
|
"learning_rate": 9.971181556195965e-06, |
|
"loss": 0.1073, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 5.043227665706052, |
|
"grad_norm": 83.36505126953125, |
|
"learning_rate": 9.913544668587897e-06, |
|
"loss": 0.0736, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 5.072046109510087, |
|
"grad_norm": 0.28978821635246277, |
|
"learning_rate": 9.855907780979828e-06, |
|
"loss": 0.0804, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 5.100864553314121, |
|
"grad_norm": 11.236791610717773, |
|
"learning_rate": 9.798270893371759e-06, |
|
"loss": 0.0989, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 5.129682997118156, |
|
"grad_norm": 0.07707870006561279, |
|
"learning_rate": 9.740634005763689e-06, |
|
"loss": 0.0519, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 5.1585014409221905, |
|
"grad_norm": 0.45296791195869446, |
|
"learning_rate": 9.68299711815562e-06, |
|
"loss": 0.0403, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 5.187319884726225, |
|
"grad_norm": 46.82713317871094, |
|
"learning_rate": 9.625360230547552e-06, |
|
"loss": 0.0895, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 5.216138328530259, |
|
"grad_norm": 6.646805286407471, |
|
"learning_rate": 9.567723342939482e-06, |
|
"loss": 0.0419, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 5.244956772334294, |
|
"grad_norm": 21.99289321899414, |
|
"learning_rate": 9.510086455331413e-06, |
|
"loss": 0.0573, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 5.273775216138328, |
|
"grad_norm": 39.594390869140625, |
|
"learning_rate": 9.452449567723344e-06, |
|
"loss": 0.0585, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 5.302593659942363, |
|
"grad_norm": 18.201231002807617, |
|
"learning_rate": 9.394812680115276e-06, |
|
"loss": 0.1113, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 5.3314121037463975, |
|
"grad_norm": 30.270816802978516, |
|
"learning_rate": 9.337175792507205e-06, |
|
"loss": 0.0531, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 5.360230547550432, |
|
"grad_norm": 42.12540817260742, |
|
"learning_rate": 9.279538904899135e-06, |
|
"loss": 0.0975, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 5.389048991354467, |
|
"grad_norm": 0.4828750193119049, |
|
"learning_rate": 9.221902017291067e-06, |
|
"loss": 0.0636, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 5.417867435158501, |
|
"grad_norm": 21.690710067749023, |
|
"learning_rate": 9.164265129682998e-06, |
|
"loss": 0.042, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 5.446685878962536, |
|
"grad_norm": 43.906761169433594, |
|
"learning_rate": 9.106628242074928e-06, |
|
"loss": 0.0887, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 5.475504322766571, |
|
"grad_norm": 37.99407958984375, |
|
"learning_rate": 9.048991354466859e-06, |
|
"loss": 0.0444, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 5.504322766570605, |
|
"grad_norm": 0.9924225807189941, |
|
"learning_rate": 8.991354466858791e-06, |
|
"loss": 0.0893, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 5.53314121037464, |
|
"grad_norm": 0.024848056957125664, |
|
"learning_rate": 8.933717579250722e-06, |
|
"loss": 0.0701, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 5.561959654178675, |
|
"grad_norm": 0.013018026947975159, |
|
"learning_rate": 8.876080691642652e-06, |
|
"loss": 0.1031, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 5.590778097982709, |
|
"grad_norm": 0.36339619755744934, |
|
"learning_rate": 8.818443804034583e-06, |
|
"loss": 0.0422, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 5.619596541786743, |
|
"grad_norm": 0.5731251835823059, |
|
"learning_rate": 8.760806916426513e-06, |
|
"loss": 0.0666, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 5.648414985590778, |
|
"grad_norm": 3.643000841140747, |
|
"learning_rate": 8.703170028818444e-06, |
|
"loss": 0.0817, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 5.677233429394812, |
|
"grad_norm": 0.039307739585638046, |
|
"learning_rate": 8.645533141210375e-06, |
|
"loss": 0.1151, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 5.706051873198847, |
|
"grad_norm": 0.2764396667480469, |
|
"learning_rate": 8.587896253602305e-06, |
|
"loss": 0.1021, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 5.734870317002882, |
|
"grad_norm": 46.3472785949707, |
|
"learning_rate": 8.530259365994237e-06, |
|
"loss": 0.0848, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 5.763688760806916, |
|
"grad_norm": 0.2657397389411926, |
|
"learning_rate": 8.472622478386168e-06, |
|
"loss": 0.0904, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 5.792507204610951, |
|
"grad_norm": 30.730506896972656, |
|
"learning_rate": 8.414985590778098e-06, |
|
"loss": 0.0715, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 5.821325648414986, |
|
"grad_norm": 2.388108253479004, |
|
"learning_rate": 8.357348703170029e-06, |
|
"loss": 0.0474, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 5.85014409221902, |
|
"grad_norm": 0.06589208543300629, |
|
"learning_rate": 8.299711815561961e-06, |
|
"loss": 0.0718, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 5.878962536023055, |
|
"grad_norm": 0.017373552545905113, |
|
"learning_rate": 8.242074927953892e-06, |
|
"loss": 0.0851, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 5.9077809798270895, |
|
"grad_norm": 1.8382971286773682, |
|
"learning_rate": 8.184438040345822e-06, |
|
"loss": 0.0771, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 5.936599423631124, |
|
"grad_norm": 0.5171680450439453, |
|
"learning_rate": 8.126801152737753e-06, |
|
"loss": 0.0352, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 5.965417867435159, |
|
"grad_norm": 0.052714597433805466, |
|
"learning_rate": 8.069164265129685e-06, |
|
"loss": 0.0541, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 5.994236311239193, |
|
"grad_norm": 55.4918327331543, |
|
"learning_rate": 8.011527377521614e-06, |
|
"loss": 0.0491, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9348609309698804, |
|
"eval_f1_macro": 0.9339829958075209, |
|
"eval_f1_weighted": 0.9339902153482186, |
|
"eval_loss": 0.4194980561733246, |
|
"eval_precision_macro": 0.9352763632278608, |
|
"eval_precision_weighted": 0.9352802017792717, |
|
"eval_recall_macro": 0.9348508698098554, |
|
"eval_recall_weighted": 0.9348609309698804, |
|
"eval_runtime": 29.9694, |
|
"eval_samples_per_second": 231.536, |
|
"eval_steps_per_second": 14.481, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 6.023054755043228, |
|
"grad_norm": 0.13180404901504517, |
|
"learning_rate": 7.953890489913544e-06, |
|
"loss": 0.0896, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 6.051873198847262, |
|
"grad_norm": 14.752634048461914, |
|
"learning_rate": 7.896253602305477e-06, |
|
"loss": 0.0528, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 6.0806916426512965, |
|
"grad_norm": 0.12015581876039505, |
|
"learning_rate": 7.838616714697407e-06, |
|
"loss": 0.0762, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 6.109510086455331, |
|
"grad_norm": 0.8792430758476257, |
|
"learning_rate": 7.780979827089338e-06, |
|
"loss": 0.0199, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 6.138328530259366, |
|
"grad_norm": 0.15485620498657227, |
|
"learning_rate": 7.723342939481268e-06, |
|
"loss": 0.0448, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 6.1671469740634, |
|
"grad_norm": 24.48517417907715, |
|
"learning_rate": 7.665706051873199e-06, |
|
"loss": 0.0477, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 6.195965417867435, |
|
"grad_norm": 0.01565726287662983, |
|
"learning_rate": 7.60806916426513e-06, |
|
"loss": 0.0477, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 6.22478386167147, |
|
"grad_norm": 0.06371276825666428, |
|
"learning_rate": 7.550432276657062e-06, |
|
"loss": 0.072, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 6.253602305475504, |
|
"grad_norm": 0.036789000034332275, |
|
"learning_rate": 7.492795389048992e-06, |
|
"loss": 0.024, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 6.282420749279539, |
|
"grad_norm": 10.600634574890137, |
|
"learning_rate": 7.4351585014409235e-06, |
|
"loss": 0.0583, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 6.311239193083574, |
|
"grad_norm": 0.00421316921710968, |
|
"learning_rate": 7.377521613832853e-06, |
|
"loss": 0.0416, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 6.340057636887608, |
|
"grad_norm": 43.42197799682617, |
|
"learning_rate": 7.319884726224784e-06, |
|
"loss": 0.0642, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 6.368876080691643, |
|
"grad_norm": 0.05901940539479256, |
|
"learning_rate": 7.262247838616715e-06, |
|
"loss": 0.0511, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 6.3976945244956775, |
|
"grad_norm": 0.022665705531835556, |
|
"learning_rate": 7.204610951008646e-06, |
|
"loss": 0.0076, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 6.426512968299712, |
|
"grad_norm": 1.6211119890213013, |
|
"learning_rate": 7.146974063400577e-06, |
|
"loss": 0.0401, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 6.455331412103746, |
|
"grad_norm": 0.006085489876568317, |
|
"learning_rate": 7.089337175792508e-06, |
|
"loss": 0.0732, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 6.484149855907781, |
|
"grad_norm": 44.77919006347656, |
|
"learning_rate": 7.031700288184439e-06, |
|
"loss": 0.0634, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 6.512968299711815, |
|
"grad_norm": 13.231744766235352, |
|
"learning_rate": 6.9740634005763696e-06, |
|
"loss": 0.0429, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 6.54178674351585, |
|
"grad_norm": 0.025533217936754227, |
|
"learning_rate": 6.916426512968301e-06, |
|
"loss": 0.0301, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 6.5706051873198845, |
|
"grad_norm": 0.014192778617143631, |
|
"learning_rate": 6.8587896253602315e-06, |
|
"loss": 0.0588, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 6.599423631123919, |
|
"grad_norm": 0.12580570578575134, |
|
"learning_rate": 6.801152737752162e-06, |
|
"loss": 0.0463, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 6.628242074927954, |
|
"grad_norm": 27.74668312072754, |
|
"learning_rate": 6.743515850144093e-06, |
|
"loss": 0.0801, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 6.6570605187319885, |
|
"grad_norm": 0.016943486407399178, |
|
"learning_rate": 6.685878962536023e-06, |
|
"loss": 0.0563, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 6.685878962536023, |
|
"grad_norm": 0.41584333777427673, |
|
"learning_rate": 6.6282420749279545e-06, |
|
"loss": 0.043, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 6.714697406340058, |
|
"grad_norm": 0.15631648898124695, |
|
"learning_rate": 6.570605187319885e-06, |
|
"loss": 0.0479, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 6.743515850144092, |
|
"grad_norm": 0.560581386089325, |
|
"learning_rate": 6.512968299711816e-06, |
|
"loss": 0.0374, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 6.772334293948127, |
|
"grad_norm": 0.1623823046684265, |
|
"learning_rate": 6.455331412103747e-06, |
|
"loss": 0.0402, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 6.801152737752162, |
|
"grad_norm": 0.0034744683653116226, |
|
"learning_rate": 6.3976945244956775e-06, |
|
"loss": 0.0653, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 6.829971181556196, |
|
"grad_norm": 11.196998596191406, |
|
"learning_rate": 6.340057636887609e-06, |
|
"loss": 0.0453, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 6.858789625360231, |
|
"grad_norm": 11.29255199432373, |
|
"learning_rate": 6.2824207492795395e-06, |
|
"loss": 0.0774, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 6.887608069164266, |
|
"grad_norm": 0.0062851207330822945, |
|
"learning_rate": 6.224783861671471e-06, |
|
"loss": 0.0354, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 6.916426512968299, |
|
"grad_norm": 0.030905550345778465, |
|
"learning_rate": 6.167146974063401e-06, |
|
"loss": 0.0353, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 6.945244956772334, |
|
"grad_norm": 0.2567192018032074, |
|
"learning_rate": 6.109510086455331e-06, |
|
"loss": 0.0248, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 6.974063400576369, |
|
"grad_norm": 0.03146115690469742, |
|
"learning_rate": 6.0518731988472625e-06, |
|
"loss": 0.052, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9383196425997983, |
|
"eval_f1_macro": 0.9376382344287736, |
|
"eval_f1_weighted": 0.9376445510679586, |
|
"eval_loss": 0.4325847625732422, |
|
"eval_precision_macro": 0.938292936865987, |
|
"eval_precision_weighted": 0.9382956472792613, |
|
"eval_recall_macro": 0.9383099973494167, |
|
"eval_recall_weighted": 0.9383196425997983, |
|
"eval_runtime": 30.0017, |
|
"eval_samples_per_second": 231.287, |
|
"eval_steps_per_second": 14.466, |
|
"step": 12145 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 17350, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 2.55585712669056e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|