|
{ |
|
"best_metric": 3.142681837081909, |
|
"best_model_checkpoint": "supermarketNRVSMELOC/checkpoint-792", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 792, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04924242424242424, |
|
"grad_norm": 6.067267894744873, |
|
"learning_rate": 8.125000000000001e-06, |
|
"loss": 3.7636, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.09848484848484848, |
|
"grad_norm": 4.615681171417236, |
|
"learning_rate": 1.6250000000000002e-05, |
|
"loss": 3.7539, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.14772727272727273, |
|
"grad_norm": 4.366635799407959, |
|
"learning_rate": 2.4375e-05, |
|
"loss": 3.7559, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.19696969696969696, |
|
"grad_norm": 6.396846294403076, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 3.7443, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.24621212121212122, |
|
"grad_norm": 5.848783493041992, |
|
"learning_rate": 4.0625000000000005e-05, |
|
"loss": 3.7299, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.29545454545454547, |
|
"grad_norm": 3.787656784057617, |
|
"learning_rate": 4.875e-05, |
|
"loss": 3.718, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.3446969696969697, |
|
"grad_norm": 6.08660364151001, |
|
"learning_rate": 4.9227528089887644e-05, |
|
"loss": 3.6869, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.3939393939393939, |
|
"grad_norm": 4.681759357452393, |
|
"learning_rate": 4.831460674157304e-05, |
|
"loss": 3.6771, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.4431818181818182, |
|
"grad_norm": 5.893486499786377, |
|
"learning_rate": 4.740168539325843e-05, |
|
"loss": 3.6445, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.49242424242424243, |
|
"grad_norm": 6.639739513397217, |
|
"learning_rate": 4.648876404494382e-05, |
|
"loss": 3.6046, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.5416666666666666, |
|
"grad_norm": 5.436135292053223, |
|
"learning_rate": 4.5575842696629217e-05, |
|
"loss": 3.6118, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.5909090909090909, |
|
"grad_norm": 4.141537189483643, |
|
"learning_rate": 4.4662921348314605e-05, |
|
"loss": 3.6243, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.6401515151515151, |
|
"grad_norm": 7.079050540924072, |
|
"learning_rate": 4.375e-05, |
|
"loss": 3.5685, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.6893939393939394, |
|
"grad_norm": 5.841846942901611, |
|
"learning_rate": 4.2837078651685394e-05, |
|
"loss": 3.5209, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.7386363636363636, |
|
"grad_norm": 4.406051158905029, |
|
"learning_rate": 4.192415730337079e-05, |
|
"loss": 3.5112, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.7878787878787878, |
|
"grad_norm": 7.7401862144470215, |
|
"learning_rate": 4.1011235955056184e-05, |
|
"loss": 3.5028, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.8371212121212122, |
|
"grad_norm": 7.258950710296631, |
|
"learning_rate": 4.009831460674158e-05, |
|
"loss": 3.4542, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.8863636363636364, |
|
"grad_norm": 9.357794761657715, |
|
"learning_rate": 3.918539325842697e-05, |
|
"loss": 3.4528, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.9356060606060606, |
|
"grad_norm": 6.444118022918701, |
|
"learning_rate": 3.827247191011236e-05, |
|
"loss": 3.4161, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.9848484848484849, |
|
"grad_norm": 5.109555721282959, |
|
"learning_rate": 3.735955056179776e-05, |
|
"loss": 3.4052, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.20265151515151514, |
|
"eval_f1_macro": 0.025505336473483867, |
|
"eval_f1_micro": 0.20265151515151514, |
|
"eval_f1_weighted": 0.08755512614736226, |
|
"eval_loss": 3.434072256088257, |
|
"eval_precision_macro": 0.037007570606101825, |
|
"eval_precision_micro": 0.20265151515151514, |
|
"eval_precision_weighted": 0.09952466920668675, |
|
"eval_recall_macro": 0.05048996382408151, |
|
"eval_recall_micro": 0.20265151515151514, |
|
"eval_recall_weighted": 0.20265151515151514, |
|
"eval_runtime": 56.3872, |
|
"eval_samples_per_second": 9.364, |
|
"eval_steps_per_second": 0.585, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.0340909090909092, |
|
"grad_norm": 27.62860679626465, |
|
"learning_rate": 3.6446629213483145e-05, |
|
"loss": 3.4113, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.0833333333333333, |
|
"grad_norm": 12.518967628479004, |
|
"learning_rate": 3.553370786516854e-05, |
|
"loss": 3.3479, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.1325757575757576, |
|
"grad_norm": 7.984549045562744, |
|
"learning_rate": 3.4620786516853935e-05, |
|
"loss": 3.3677, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.1818181818181819, |
|
"grad_norm": 8.635626792907715, |
|
"learning_rate": 3.370786516853933e-05, |
|
"loss": 3.3681, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.231060606060606, |
|
"grad_norm": 9.190679550170898, |
|
"learning_rate": 3.2794943820224725e-05, |
|
"loss": 3.4162, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.2803030303030303, |
|
"grad_norm": 6.9372663497924805, |
|
"learning_rate": 3.188202247191011e-05, |
|
"loss": 3.2836, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.3295454545454546, |
|
"grad_norm": 4.514989376068115, |
|
"learning_rate": 3.096910112359551e-05, |
|
"loss": 3.3598, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.378787878787879, |
|
"grad_norm": 6.12080717086792, |
|
"learning_rate": 3.0056179775280903e-05, |
|
"loss": 3.3846, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.428030303030303, |
|
"grad_norm": 5.432583332061768, |
|
"learning_rate": 2.914325842696629e-05, |
|
"loss": 3.18, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.4772727272727273, |
|
"grad_norm": 5.338773250579834, |
|
"learning_rate": 2.8230337078651686e-05, |
|
"loss": 3.2837, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.5265151515151514, |
|
"grad_norm": 15.422748565673828, |
|
"learning_rate": 2.731741573033708e-05, |
|
"loss": 3.1643, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.5757575757575757, |
|
"grad_norm": 5.844562530517578, |
|
"learning_rate": 2.6404494382022472e-05, |
|
"loss": 3.2549, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.625, |
|
"grad_norm": 9.893993377685547, |
|
"learning_rate": 2.5491573033707867e-05, |
|
"loss": 3.3227, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.6742424242424243, |
|
"grad_norm": 6.3434295654296875, |
|
"learning_rate": 2.457865168539326e-05, |
|
"loss": 3.1929, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.7234848484848486, |
|
"grad_norm": 7.492882251739502, |
|
"learning_rate": 2.3665730337078653e-05, |
|
"loss": 3.2937, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.7727272727272727, |
|
"grad_norm": 5.293882369995117, |
|
"learning_rate": 2.2752808988764045e-05, |
|
"loss": 3.2394, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.821969696969697, |
|
"grad_norm": 13.984901428222656, |
|
"learning_rate": 2.183988764044944e-05, |
|
"loss": 3.2516, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.871212121212121, |
|
"grad_norm": 6.005086421966553, |
|
"learning_rate": 2.0926966292134835e-05, |
|
"loss": 3.2613, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.9204545454545454, |
|
"grad_norm": 7.952373027801514, |
|
"learning_rate": 2.0014044943820226e-05, |
|
"loss": 3.2926, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.9696969696969697, |
|
"grad_norm": 7.9951043128967285, |
|
"learning_rate": 1.9101123595505618e-05, |
|
"loss": 3.1585, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.20454545454545456, |
|
"eval_f1_macro": 0.02462602836447254, |
|
"eval_f1_micro": 0.20454545454545456, |
|
"eval_f1_weighted": 0.08562023329167062, |
|
"eval_loss": 3.193603992462158, |
|
"eval_precision_macro": 0.034878596742616244, |
|
"eval_precision_micro": 0.20454545454545456, |
|
"eval_precision_weighted": 0.09384124563573254, |
|
"eval_recall_macro": 0.05068899317325554, |
|
"eval_recall_micro": 0.20454545454545456, |
|
"eval_recall_weighted": 0.20454545454545456, |
|
"eval_runtime": 55.1855, |
|
"eval_samples_per_second": 9.568, |
|
"eval_steps_per_second": 0.598, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 2.018939393939394, |
|
"grad_norm": 11.740503311157227, |
|
"learning_rate": 1.818820224719101e-05, |
|
"loss": 3.2347, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 2.0681818181818183, |
|
"grad_norm": 5.635743618011475, |
|
"learning_rate": 1.7275280898876404e-05, |
|
"loss": 3.0835, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 2.117424242424242, |
|
"grad_norm": 11.321329116821289, |
|
"learning_rate": 1.63623595505618e-05, |
|
"loss": 3.1773, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 2.1666666666666665, |
|
"grad_norm": 12.002336502075195, |
|
"learning_rate": 1.544943820224719e-05, |
|
"loss": 3.2081, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 2.215909090909091, |
|
"grad_norm": 13.068202018737793, |
|
"learning_rate": 1.4536516853932586e-05, |
|
"loss": 3.1807, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 2.265151515151515, |
|
"grad_norm": 15.093032836914062, |
|
"learning_rate": 1.3623595505617979e-05, |
|
"loss": 3.1387, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 2.3143939393939394, |
|
"grad_norm": 9.244571685791016, |
|
"learning_rate": 1.271067415730337e-05, |
|
"loss": 3.3179, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 2.3636363636363638, |
|
"grad_norm": 13.117069244384766, |
|
"learning_rate": 1.1797752808988765e-05, |
|
"loss": 3.1844, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 2.412878787878788, |
|
"grad_norm": 8.456986427307129, |
|
"learning_rate": 1.0884831460674158e-05, |
|
"loss": 3.2497, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 2.462121212121212, |
|
"grad_norm": 6.265410423278809, |
|
"learning_rate": 9.97191011235955e-06, |
|
"loss": 3.2243, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.5113636363636362, |
|
"grad_norm": 11.483809471130371, |
|
"learning_rate": 9.058988764044945e-06, |
|
"loss": 3.1206, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 2.5606060606060606, |
|
"grad_norm": 16.1591796875, |
|
"learning_rate": 8.146067415730338e-06, |
|
"loss": 3.1371, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 2.609848484848485, |
|
"grad_norm": 9.152714729309082, |
|
"learning_rate": 7.23314606741573e-06, |
|
"loss": 3.1618, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 2.659090909090909, |
|
"grad_norm": 7.563583850860596, |
|
"learning_rate": 6.320224719101124e-06, |
|
"loss": 3.2177, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 2.7083333333333335, |
|
"grad_norm": 5.385344982147217, |
|
"learning_rate": 5.407303370786517e-06, |
|
"loss": 3.1986, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 2.757575757575758, |
|
"grad_norm": 6.828423500061035, |
|
"learning_rate": 4.49438202247191e-06, |
|
"loss": 3.1137, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 2.8068181818181817, |
|
"grad_norm": 8.450451850891113, |
|
"learning_rate": 3.581460674157303e-06, |
|
"loss": 3.2061, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 2.856060606060606, |
|
"grad_norm": 8.725029945373535, |
|
"learning_rate": 2.6685393258426968e-06, |
|
"loss": 3.0328, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 2.9053030303030303, |
|
"grad_norm": 6.6716694831848145, |
|
"learning_rate": 1.7556179775280902e-06, |
|
"loss": 3.045, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 2.9545454545454546, |
|
"grad_norm": 8.45450496673584, |
|
"learning_rate": 8.426966292134832e-07, |
|
"loss": 3.0927, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.20643939393939395, |
|
"eval_f1_macro": 0.02830170335299915, |
|
"eval_f1_micro": 0.20643939393939395, |
|
"eval_f1_weighted": 0.09619191663674405, |
|
"eval_loss": 3.142681837081909, |
|
"eval_precision_macro": 0.03697093460585543, |
|
"eval_precision_micro": 0.20643939393939395, |
|
"eval_precision_weighted": 0.0998764466790715, |
|
"eval_recall_macro": 0.05223286714505793, |
|
"eval_recall_micro": 0.20643939393939395, |
|
"eval_recall_weighted": 0.20643939393939395, |
|
"eval_runtime": 55.8145, |
|
"eval_samples_per_second": 9.46, |
|
"eval_steps_per_second": 0.591, |
|
"step": 792 |
|
} |
|
], |
|
"logging_steps": 13, |
|
"max_steps": 792, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.3502825530667827e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|