|
{ |
|
"best_metric": 0.03780783340334892, |
|
"best_model_checkpoint": "./results3/checkpoint-2400", |
|
"epoch": 3.864734299516908, |
|
"eval_steps": 200, |
|
"global_step": 2400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.322061191626409, |
|
"grad_norm": 7.461069583892822, |
|
"learning_rate": 6.666666666666667e-07, |
|
"loss": 1.8012, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.322061191626409, |
|
"eval_accuracy": 0.18971061093247588, |
|
"eval_f1": 0.10189049057972092, |
|
"eval_loss": 1.7812345027923584, |
|
"eval_precision": 0.12844515990912814, |
|
"eval_recall": 0.18971061093247588, |
|
"eval_runtime": 5.7225, |
|
"eval_samples_per_second": 108.693, |
|
"eval_steps_per_second": 27.261, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.644122383252818, |
|
"grad_norm": 6.918825149536133, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 1.7688, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.644122383252818, |
|
"eval_accuracy": 0.26688102893890675, |
|
"eval_f1": 0.21143076934540506, |
|
"eval_loss": 1.7484790086746216, |
|
"eval_precision": 0.48251637799958924, |
|
"eval_recall": 0.26688102893890675, |
|
"eval_runtime": 5.841, |
|
"eval_samples_per_second": 106.489, |
|
"eval_steps_per_second": 26.708, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.966183574879227, |
|
"grad_norm": 6.153536319732666, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.7475, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.966183574879227, |
|
"eval_accuracy": 0.5353697749196141, |
|
"eval_f1": 0.5292630942880905, |
|
"eval_loss": 1.6678913831710815, |
|
"eval_precision": 0.5777595640628179, |
|
"eval_recall": 0.5353697749196141, |
|
"eval_runtime": 5.7676, |
|
"eval_samples_per_second": 107.844, |
|
"eval_steps_per_second": 27.048, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.288244766505636, |
|
"grad_norm": 10.906112670898438, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 1.6323, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.288244766505636, |
|
"eval_accuracy": 0.6109324758842444, |
|
"eval_f1": 0.584022217728586, |
|
"eval_loss": 1.4518650770187378, |
|
"eval_precision": 0.6770832871589993, |
|
"eval_recall": 0.6109324758842444, |
|
"eval_runtime": 5.7252, |
|
"eval_samples_per_second": 108.643, |
|
"eval_steps_per_second": 27.248, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.6103059581320451, |
|
"grad_norm": 11.212413787841797, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 1.3569, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.6103059581320451, |
|
"eval_accuracy": 0.6672025723472669, |
|
"eval_f1": 0.6448086526662313, |
|
"eval_loss": 1.0713452100753784, |
|
"eval_precision": 0.7227579722788608, |
|
"eval_recall": 0.6672025723472669, |
|
"eval_runtime": 5.8441, |
|
"eval_samples_per_second": 106.433, |
|
"eval_steps_per_second": 26.694, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.9323671497584543, |
|
"grad_norm": 10.077208518981934, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.9744, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.9323671497584543, |
|
"eval_accuracy": 0.8360128617363344, |
|
"eval_f1": 0.8313516450563994, |
|
"eval_loss": 0.6114147901535034, |
|
"eval_precision": 0.8485064229080279, |
|
"eval_recall": 0.8360128617363344, |
|
"eval_runtime": 5.7667, |
|
"eval_samples_per_second": 107.861, |
|
"eval_steps_per_second": 27.052, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.2544283413848634, |
|
"grad_norm": 3.6660408973693848, |
|
"learning_rate": 4.666666666666667e-06, |
|
"loss": 0.5969, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.2544283413848634, |
|
"eval_accuracy": 0.9180064308681672, |
|
"eval_f1": 0.9171911311305204, |
|
"eval_loss": 0.29916083812713623, |
|
"eval_precision": 0.9217784712222669, |
|
"eval_recall": 0.9180064308681672, |
|
"eval_runtime": 5.8166, |
|
"eval_samples_per_second": 106.936, |
|
"eval_steps_per_second": 26.82, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.576489533011272, |
|
"grad_norm": 3.6452574729919434, |
|
"learning_rate": 4.994440868783523e-06, |
|
"loss": 0.3187, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.576489533011272, |
|
"eval_accuracy": 0.9453376205787781, |
|
"eval_f1": 0.94523522766866, |
|
"eval_loss": 0.1684405654668808, |
|
"eval_precision": 0.951570696538466, |
|
"eval_recall": 0.9453376205787781, |
|
"eval_runtime": 5.7215, |
|
"eval_samples_per_second": 108.712, |
|
"eval_steps_per_second": 27.265, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.898550724637681, |
|
"grad_norm": 0.2801424562931061, |
|
"learning_rate": 4.950116048011739e-06, |
|
"loss": 0.1856, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.898550724637681, |
|
"eval_accuracy": 0.9726688102893891, |
|
"eval_f1": 0.9726543269299354, |
|
"eval_loss": 0.0953439399600029, |
|
"eval_precision": 0.9736809257583791, |
|
"eval_recall": 0.9726688102893891, |
|
"eval_runtime": 5.7237, |
|
"eval_samples_per_second": 108.672, |
|
"eval_steps_per_second": 27.255, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.2206119162640903, |
|
"grad_norm": 0.754078209400177, |
|
"learning_rate": 4.862254033772164e-06, |
|
"loss": 0.1113, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.2206119162640903, |
|
"eval_accuracy": 0.9823151125401929, |
|
"eval_f1": 0.9823107313578804, |
|
"eval_loss": 0.05166807398200035, |
|
"eval_precision": 0.9823692945184218, |
|
"eval_recall": 0.9823151125401929, |
|
"eval_runtime": 5.6315, |
|
"eval_samples_per_second": 110.451, |
|
"eval_steps_per_second": 27.702, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.542673107890499, |
|
"grad_norm": 0.2094874083995819, |
|
"learning_rate": 4.7324160849755856e-06, |
|
"loss": 0.0492, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.542673107890499, |
|
"eval_accuracy": 0.9855305466237942, |
|
"eval_f1": 0.9855078289074475, |
|
"eval_loss": 0.04900892823934555, |
|
"eval_precision": 0.9855788271208662, |
|
"eval_recall": 0.9855305466237942, |
|
"eval_runtime": 5.7859, |
|
"eval_samples_per_second": 107.503, |
|
"eval_steps_per_second": 26.962, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.864734299516908, |
|
"grad_norm": 0.26868194341659546, |
|
"learning_rate": 4.562909349440899e-06, |
|
"loss": 0.0584, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.864734299516908, |
|
"eval_accuracy": 0.9855305466237942, |
|
"eval_f1": 0.9855183714453405, |
|
"eval_loss": 0.03780783340334892, |
|
"eval_precision": 0.9856657355462896, |
|
"eval_recall": 0.9855305466237942, |
|
"eval_runtime": 5.7375, |
|
"eval_samples_per_second": 108.409, |
|
"eval_steps_per_second": 27.189, |
|
"step": 2400 |
|
} |
|
], |
|
"logging_steps": 200, |
|
"max_steps": 6210, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 383614021649664.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|