|
{ |
|
"best_metric": 0.8705702821418305, |
|
"best_model_checkpoint": "./phobert_results_v2/checkpoint-3470", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 3470, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02881844380403458, |
|
"grad_norm": 4.723288059234619, |
|
"learning_rate": 1.994236311239193e-05, |
|
"loss": 1.9047, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05763688760806916, |
|
"grad_norm": 32.87104034423828, |
|
"learning_rate": 1.9884726224783863e-05, |
|
"loss": 1.7269, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08645533141210375, |
|
"grad_norm": 5.206538677215576, |
|
"learning_rate": 1.9827089337175795e-05, |
|
"loss": 1.6865, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11527377521613832, |
|
"grad_norm": 52.5133056640625, |
|
"learning_rate": 1.9769452449567724e-05, |
|
"loss": 1.633, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1440922190201729, |
|
"grad_norm": 12.493169784545898, |
|
"learning_rate": 1.9711815561959656e-05, |
|
"loss": 1.553, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1729106628242075, |
|
"grad_norm": 7.675703048706055, |
|
"learning_rate": 1.9654178674351588e-05, |
|
"loss": 1.4131, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2017291066282421, |
|
"grad_norm": 5.984454154968262, |
|
"learning_rate": 1.9596541786743517e-05, |
|
"loss": 1.3606, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.23054755043227665, |
|
"grad_norm": 9.403727531433105, |
|
"learning_rate": 1.953890489913545e-05, |
|
"loss": 1.2202, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.25936599423631124, |
|
"grad_norm": 7.890481472015381, |
|
"learning_rate": 1.9481268011527378e-05, |
|
"loss": 1.186, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.2881844380403458, |
|
"grad_norm": 12.102771759033203, |
|
"learning_rate": 1.942363112391931e-05, |
|
"loss": 1.0865, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3170028818443804, |
|
"grad_norm": 17.669885635375977, |
|
"learning_rate": 1.936599423631124e-05, |
|
"loss": 1.124, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.345821325648415, |
|
"grad_norm": 10.14146614074707, |
|
"learning_rate": 1.930835734870317e-05, |
|
"loss": 1.1307, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.3746397694524496, |
|
"grad_norm": 9.363780975341797, |
|
"learning_rate": 1.9250720461095104e-05, |
|
"loss": 1.0582, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.4034582132564842, |
|
"grad_norm": 22.122907638549805, |
|
"learning_rate": 1.9193083573487033e-05, |
|
"loss": 1.007, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4322766570605187, |
|
"grad_norm": 22.921249389648438, |
|
"learning_rate": 1.9135446685878965e-05, |
|
"loss": 1.0046, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.4610951008645533, |
|
"grad_norm": 13.880660057067871, |
|
"learning_rate": 1.9077809798270894e-05, |
|
"loss": 0.9786, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.4899135446685879, |
|
"grad_norm": 13.75207805633545, |
|
"learning_rate": 1.9020172910662826e-05, |
|
"loss": 0.9166, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.5187319884726225, |
|
"grad_norm": 15.701948165893555, |
|
"learning_rate": 1.8962536023054755e-05, |
|
"loss": 0.8942, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.547550432276657, |
|
"grad_norm": 13.106974601745605, |
|
"learning_rate": 1.8904899135446687e-05, |
|
"loss": 0.8905, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5763688760806917, |
|
"grad_norm": 13.854475021362305, |
|
"learning_rate": 1.884726224783862e-05, |
|
"loss": 0.9089, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6051873198847262, |
|
"grad_norm": 14.22427749633789, |
|
"learning_rate": 1.878962536023055e-05, |
|
"loss": 0.8688, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.6340057636887608, |
|
"grad_norm": 14.088136672973633, |
|
"learning_rate": 1.873198847262248e-05, |
|
"loss": 0.859, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.6628242074927954, |
|
"grad_norm": 15.746428489685059, |
|
"learning_rate": 1.867435158501441e-05, |
|
"loss": 0.8356, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.69164265129683, |
|
"grad_norm": 10.937832832336426, |
|
"learning_rate": 1.861671469740634e-05, |
|
"loss": 0.8699, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7204610951008645, |
|
"grad_norm": 14.396600723266602, |
|
"learning_rate": 1.855907780979827e-05, |
|
"loss": 0.7789, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.7492795389048992, |
|
"grad_norm": 11.801300048828125, |
|
"learning_rate": 1.8501440922190203e-05, |
|
"loss": 0.8713, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.7780979827089337, |
|
"grad_norm": 10.435481071472168, |
|
"learning_rate": 1.8443804034582135e-05, |
|
"loss": 0.7961, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.8069164265129684, |
|
"grad_norm": 11.2293062210083, |
|
"learning_rate": 1.8386167146974067e-05, |
|
"loss": 0.8397, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.8357348703170029, |
|
"grad_norm": 12.809613227844238, |
|
"learning_rate": 1.8328530259365996e-05, |
|
"loss": 0.8297, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.8645533141210374, |
|
"grad_norm": 14.791847229003906, |
|
"learning_rate": 1.8270893371757928e-05, |
|
"loss": 0.7709, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8933717579250721, |
|
"grad_norm": 14.587478637695312, |
|
"learning_rate": 1.8213256484149857e-05, |
|
"loss": 0.7552, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.9221902017291066, |
|
"grad_norm": 16.79636001586914, |
|
"learning_rate": 1.815561959654179e-05, |
|
"loss": 0.7937, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.9510086455331412, |
|
"grad_norm": 8.510680198669434, |
|
"learning_rate": 1.8097982708933718e-05, |
|
"loss": 0.7181, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.9798270893371758, |
|
"grad_norm": 20.466522216796875, |
|
"learning_rate": 1.804034582132565e-05, |
|
"loss": 0.712, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7955036748811067, |
|
"eval_f1_macro": 0.795530535937033, |
|
"eval_f1_weighted": 0.795536244762841, |
|
"eval_loss": 0.6264312267303467, |
|
"eval_precision_macro": 0.7978076489526655, |
|
"eval_precision_weighted": 0.797819457255313, |
|
"eval_recall_macro": 0.7955041514180622, |
|
"eval_recall_weighted": 0.7955036748811067, |
|
"eval_runtime": 29.7702, |
|
"eval_samples_per_second": 233.086, |
|
"eval_steps_per_second": 14.578, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 1.0086455331412103, |
|
"grad_norm": 17.28911018371582, |
|
"learning_rate": 1.7982708933717582e-05, |
|
"loss": 0.6512, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.037463976945245, |
|
"grad_norm": 28.79306411743164, |
|
"learning_rate": 1.792507204610951e-05, |
|
"loss": 0.6036, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.0662824207492796, |
|
"grad_norm": 13.444112777709961, |
|
"learning_rate": 1.7867435158501444e-05, |
|
"loss": 0.552, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.0951008645533142, |
|
"grad_norm": 22.068620681762695, |
|
"learning_rate": 1.7809798270893372e-05, |
|
"loss": 0.577, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.1239193083573487, |
|
"grad_norm": 13.942420959472656, |
|
"learning_rate": 1.7752161383285305e-05, |
|
"loss": 0.5573, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.1527377521613833, |
|
"grad_norm": 13.652227401733398, |
|
"learning_rate": 1.7694524495677234e-05, |
|
"loss": 0.5401, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.181556195965418, |
|
"grad_norm": 22.252243041992188, |
|
"learning_rate": 1.7636887608069166e-05, |
|
"loss": 0.558, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.2103746397694524, |
|
"grad_norm": 17.13372802734375, |
|
"learning_rate": 1.7579250720461095e-05, |
|
"loss": 0.514, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.239193083573487, |
|
"grad_norm": 17.164276123046875, |
|
"learning_rate": 1.7521613832853027e-05, |
|
"loss": 0.5137, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.2680115273775217, |
|
"grad_norm": 14.449616432189941, |
|
"learning_rate": 1.746397694524496e-05, |
|
"loss": 0.5, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.2968299711815563, |
|
"grad_norm": 11.61601734161377, |
|
"learning_rate": 1.7406340057636888e-05, |
|
"loss": 0.5071, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.3256484149855907, |
|
"grad_norm": 20.6407527923584, |
|
"learning_rate": 1.734870317002882e-05, |
|
"loss": 0.4803, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.3544668587896254, |
|
"grad_norm": 18.60149383544922, |
|
"learning_rate": 1.729106628242075e-05, |
|
"loss": 0.4967, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.38328530259366, |
|
"grad_norm": 22.12944793701172, |
|
"learning_rate": 1.723342939481268e-05, |
|
"loss": 0.4652, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.4121037463976944, |
|
"grad_norm": 18.460689544677734, |
|
"learning_rate": 1.717579250720461e-05, |
|
"loss": 0.4417, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.440922190201729, |
|
"grad_norm": 19.452363967895508, |
|
"learning_rate": 1.7118155619596542e-05, |
|
"loss": 0.5082, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.4697406340057637, |
|
"grad_norm": 41.46109390258789, |
|
"learning_rate": 1.7060518731988475e-05, |
|
"loss": 0.463, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.4985590778097984, |
|
"grad_norm": 25.584379196166992, |
|
"learning_rate": 1.7002881844380407e-05, |
|
"loss": 0.4788, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.527377521613833, |
|
"grad_norm": 16.92909049987793, |
|
"learning_rate": 1.6945244956772336e-05, |
|
"loss": 0.4752, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.5561959654178674, |
|
"grad_norm": 10.342531204223633, |
|
"learning_rate": 1.6887608069164268e-05, |
|
"loss": 0.4848, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.585014409221902, |
|
"grad_norm": 15.596243858337402, |
|
"learning_rate": 1.6829971181556197e-05, |
|
"loss": 0.5158, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.6138328530259365, |
|
"grad_norm": 17.031354904174805, |
|
"learning_rate": 1.6772334293948126e-05, |
|
"loss": 0.4577, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.6426512968299711, |
|
"grad_norm": 13.550045013427734, |
|
"learning_rate": 1.6714697406340058e-05, |
|
"loss": 0.4947, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.6714697406340058, |
|
"grad_norm": 10.102880477905273, |
|
"learning_rate": 1.665706051873199e-05, |
|
"loss": 0.4681, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.7002881844380404, |
|
"grad_norm": 10.968811988830566, |
|
"learning_rate": 1.6599423631123922e-05, |
|
"loss": 0.452, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.729106628242075, |
|
"grad_norm": 4.670314311981201, |
|
"learning_rate": 1.654178674351585e-05, |
|
"loss": 0.4346, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.7579250720461095, |
|
"grad_norm": 28.008899688720703, |
|
"learning_rate": 1.6484149855907783e-05, |
|
"loss": 0.4404, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.7867435158501441, |
|
"grad_norm": 27.77347183227539, |
|
"learning_rate": 1.6426512968299712e-05, |
|
"loss": 0.4547, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.8155619596541785, |
|
"grad_norm": 21.949289321899414, |
|
"learning_rate": 1.6368876080691644e-05, |
|
"loss": 0.4924, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.8443804034582132, |
|
"grad_norm": 2.8766281604766846, |
|
"learning_rate": 1.6311239193083573e-05, |
|
"loss": 0.3776, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.8731988472622478, |
|
"grad_norm": 38.812625885009766, |
|
"learning_rate": 1.6253602305475506e-05, |
|
"loss": 0.4176, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.9020172910662825, |
|
"grad_norm": 24.132482528686523, |
|
"learning_rate": 1.6195965417867438e-05, |
|
"loss": 0.4407, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.9308357348703171, |
|
"grad_norm": 19.72426414489746, |
|
"learning_rate": 1.613832853025937e-05, |
|
"loss": 0.4503, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.9596541786743515, |
|
"grad_norm": 16.077899932861328, |
|
"learning_rate": 1.60806916426513e-05, |
|
"loss": 0.4244, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.9884726224783862, |
|
"grad_norm": 21.258529663085938, |
|
"learning_rate": 1.6023054755043228e-05, |
|
"loss": 0.4161, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.87101887880098, |
|
"eval_f1_macro": 0.8705600204306078, |
|
"eval_f1_weighted": 0.8705702821418305, |
|
"eval_loss": 0.43536442518234253, |
|
"eval_precision_macro": 0.8749786652431168, |
|
"eval_precision_weighted": 0.8749977247467932, |
|
"eval_recall_macro": 0.8710178763247105, |
|
"eval_recall_weighted": 0.87101887880098, |
|
"eval_runtime": 29.9836, |
|
"eval_samples_per_second": 231.426, |
|
"eval_steps_per_second": 14.475, |
|
"step": 3470 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 17350, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 7302448933401600.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|