|
{ |
|
"best_metric": 0.19515299797058105, |
|
"best_model_checkpoint": "autotrain-rtvyh-y5ben/checkpoint-1146", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 1146, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06544502617801047, |
|
"grad_norm": 27.387327194213867, |
|
"learning_rate": 9.130434782608697e-06, |
|
"loss": 1.9398, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.13089005235602094, |
|
"grad_norm": 22.39668846130371, |
|
"learning_rate": 2e-05, |
|
"loss": 1.7177, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.19633507853403143, |
|
"grad_norm": 17.08791160583496, |
|
"learning_rate": 3.086956521739131e-05, |
|
"loss": 1.3264, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.2617801047120419, |
|
"grad_norm": 14.287014961242676, |
|
"learning_rate": 4.1739130434782605e-05, |
|
"loss": 0.9999, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.32722513089005234, |
|
"grad_norm": 5.287769794464111, |
|
"learning_rate": 4.97090203685742e-05, |
|
"loss": 0.7594, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.39267015706806285, |
|
"grad_norm": 37.07988739013672, |
|
"learning_rate": 4.849660523763337e-05, |
|
"loss": 0.5061, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4581151832460733, |
|
"grad_norm": 2.10866641998291, |
|
"learning_rate": 4.728419010669253e-05, |
|
"loss": 0.4617, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.5235602094240838, |
|
"grad_norm": 37.698177337646484, |
|
"learning_rate": 4.6071774975751696e-05, |
|
"loss": 0.3837, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5890052356020943, |
|
"grad_norm": 20.22072410583496, |
|
"learning_rate": 4.485935984481087e-05, |
|
"loss": 0.4478, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.6544502617801047, |
|
"grad_norm": 37.976768493652344, |
|
"learning_rate": 4.364694471387003e-05, |
|
"loss": 0.4318, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7198952879581152, |
|
"grad_norm": 25.226699829101562, |
|
"learning_rate": 4.2434529582929193e-05, |
|
"loss": 0.3503, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.7853403141361257, |
|
"grad_norm": 38.52054977416992, |
|
"learning_rate": 4.1270611057225994e-05, |
|
"loss": 0.4691, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8507853403141361, |
|
"grad_norm": 36.57845687866211, |
|
"learning_rate": 4.005819592628517e-05, |
|
"loss": 0.3103, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.9162303664921466, |
|
"grad_norm": 21.468103408813477, |
|
"learning_rate": 3.8845780795344326e-05, |
|
"loss": 0.4161, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.981675392670157, |
|
"grad_norm": 1.5287563800811768, |
|
"learning_rate": 3.763336566440349e-05, |
|
"loss": 0.2762, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.912303664921466, |
|
"eval_f1_macro": 0.8881661676824378, |
|
"eval_f1_micro": 0.912303664921466, |
|
"eval_f1_weighted": 0.9139427487256797, |
|
"eval_loss": 0.24220755696296692, |
|
"eval_precision_macro": 0.878941728706775, |
|
"eval_precision_micro": 0.912303664921466, |
|
"eval_precision_weighted": 0.9193888936099365, |
|
"eval_recall_macro": 0.903916491565807, |
|
"eval_recall_micro": 0.912303664921466, |
|
"eval_recall_weighted": 0.912303664921466, |
|
"eval_runtime": 18.3017, |
|
"eval_samples_per_second": 166.979, |
|
"eval_steps_per_second": 10.436, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.0471204188481675, |
|
"grad_norm": 82.7063980102539, |
|
"learning_rate": 3.6420950533462664e-05, |
|
"loss": 0.4792, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.112565445026178, |
|
"grad_norm": 1.9939672946929932, |
|
"learning_rate": 3.520853540252182e-05, |
|
"loss": 0.3641, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.1780104712041886, |
|
"grad_norm": 2.1117889881134033, |
|
"learning_rate": 3.399612027158099e-05, |
|
"loss": 0.3835, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.243455497382199, |
|
"grad_norm": 12.421843528747559, |
|
"learning_rate": 3.278370514064016e-05, |
|
"loss": 0.3868, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.3089005235602094, |
|
"grad_norm": 4.019728183746338, |
|
"learning_rate": 3.157129000969932e-05, |
|
"loss": 0.3204, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.3743455497382198, |
|
"grad_norm": 23.721004486083984, |
|
"learning_rate": 3.0358874878758486e-05, |
|
"loss": 0.2277, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.4397905759162304, |
|
"grad_norm": 37.32318878173828, |
|
"learning_rate": 2.9146459747817655e-05, |
|
"loss": 0.4918, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.5052356020942408, |
|
"grad_norm": 0.5763813257217407, |
|
"learning_rate": 2.793404461687682e-05, |
|
"loss": 0.2702, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.5706806282722514, |
|
"grad_norm": 25.39264488220215, |
|
"learning_rate": 2.6721629485935983e-05, |
|
"loss": 0.2803, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.6361256544502618, |
|
"grad_norm": 48.61098861694336, |
|
"learning_rate": 2.5509214354995155e-05, |
|
"loss": 0.3219, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.7015706806282722, |
|
"grad_norm": 19.207382202148438, |
|
"learning_rate": 2.4296799224054317e-05, |
|
"loss": 0.4093, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.7670157068062826, |
|
"grad_norm": 19.071319580078125, |
|
"learning_rate": 2.3084384093113483e-05, |
|
"loss": 0.3342, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.8324607329842932, |
|
"grad_norm": 24.700105667114258, |
|
"learning_rate": 2.187196896217265e-05, |
|
"loss": 0.3489, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.8979057591623036, |
|
"grad_norm": 25.030014038085938, |
|
"learning_rate": 2.0659553831231815e-05, |
|
"loss": 0.3604, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.9633507853403143, |
|
"grad_norm": 51.47434616088867, |
|
"learning_rate": 1.944713870029098e-05, |
|
"loss": 0.2948, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9195026178010471, |
|
"eval_f1_macro": 0.8825867245438248, |
|
"eval_f1_micro": 0.9195026178010471, |
|
"eval_f1_weighted": 0.9191139958840788, |
|
"eval_loss": 0.22944672405719757, |
|
"eval_precision_macro": 0.8833301888913111, |
|
"eval_precision_micro": 0.9195026178010471, |
|
"eval_precision_weighted": 0.9239632346357177, |
|
"eval_recall_macro": 0.8933703672169516, |
|
"eval_recall_micro": 0.9195026178010471, |
|
"eval_recall_weighted": 0.9195026178010471, |
|
"eval_runtime": 18.2799, |
|
"eval_samples_per_second": 167.178, |
|
"eval_steps_per_second": 10.449, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 2.0287958115183247, |
|
"grad_norm": 13.710552215576172, |
|
"learning_rate": 1.8234723569350146e-05, |
|
"loss": 0.2163, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 2.094240837696335, |
|
"grad_norm": 14.117116928100586, |
|
"learning_rate": 1.702230843840931e-05, |
|
"loss": 0.286, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.1596858638743455, |
|
"grad_norm": 3.677631378173828, |
|
"learning_rate": 1.5809893307468477e-05, |
|
"loss": 0.421, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 2.225130890052356, |
|
"grad_norm": 19.017574310302734, |
|
"learning_rate": 1.4597478176527643e-05, |
|
"loss": 0.3262, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.2905759162303667, |
|
"grad_norm": 4.1396331787109375, |
|
"learning_rate": 1.338506304558681e-05, |
|
"loss": 0.3032, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 2.356020942408377, |
|
"grad_norm": 14.411179542541504, |
|
"learning_rate": 1.2172647914645975e-05, |
|
"loss": 0.3508, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.4214659685863875, |
|
"grad_norm": 3.900756597518921, |
|
"learning_rate": 1.096023278370514e-05, |
|
"loss": 0.4431, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 2.486910994764398, |
|
"grad_norm": 3.200969696044922, |
|
"learning_rate": 9.747817652764308e-06, |
|
"loss": 0.2815, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.5523560209424083, |
|
"grad_norm": 17.05501365661621, |
|
"learning_rate": 8.535402521823473e-06, |
|
"loss": 0.3593, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 2.6178010471204187, |
|
"grad_norm": 15.708697319030762, |
|
"learning_rate": 7.322987390882638e-06, |
|
"loss": 0.2373, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.683246073298429, |
|
"grad_norm": 30.304662704467773, |
|
"learning_rate": 6.159068865179437e-06, |
|
"loss": 0.3447, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 2.7486910994764395, |
|
"grad_norm": 18.696701049804688, |
|
"learning_rate": 4.946653734238604e-06, |
|
"loss": 0.225, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.8141361256544504, |
|
"grad_norm": 8.534065246582031, |
|
"learning_rate": 3.734238603297769e-06, |
|
"loss": 0.2218, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 2.8795811518324608, |
|
"grad_norm": 2.1658194065093994, |
|
"learning_rate": 2.521823472356935e-06, |
|
"loss": 0.2223, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.945026178010471, |
|
"grad_norm": 61.290462493896484, |
|
"learning_rate": 1.309408341416101e-06, |
|
"loss": 0.3373, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.931282722513089, |
|
"eval_f1_macro": 0.9028776359481251, |
|
"eval_f1_micro": 0.931282722513089, |
|
"eval_f1_weighted": 0.9312850320245419, |
|
"eval_loss": 0.19515299797058105, |
|
"eval_precision_macro": 0.8955979809260896, |
|
"eval_precision_micro": 0.931282722513089, |
|
"eval_precision_weighted": 0.9330235887739936, |
|
"eval_recall_macro": 0.9159046117460236, |
|
"eval_recall_micro": 0.931282722513089, |
|
"eval_recall_weighted": 0.931282722513089, |
|
"eval_runtime": 18.4024, |
|
"eval_samples_per_second": 166.066, |
|
"eval_steps_per_second": 10.379, |
|
"step": 1146 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 1146, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.3077959759396864e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|