|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 6.223746299743652, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5424, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7418546365914787, |
|
"eval_f1": 0.6575164379109477, |
|
"eval_loss": 0.4762427806854248, |
|
"eval_precision": 0.6837301587301587, |
|
"eval_recall": 0.647344971813057, |
|
"eval_runtime": 1.7978, |
|
"eval_samples_per_second": 221.935, |
|
"eval_steps_per_second": 27.811, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 4.059021949768066, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.4345, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7894736842105263, |
|
"eval_f1": 0.7673663168415792, |
|
"eval_loss": 0.41568055748939514, |
|
"eval_precision": 0.7581367924528302, |
|
"eval_recall": 0.7985542825968357, |
|
"eval_runtime": 1.7989, |
|
"eval_samples_per_second": 221.802, |
|
"eval_steps_per_second": 27.795, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.354827404022217, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.3391, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8546365914786967, |
|
"eval_f1": 0.8180088078011953, |
|
"eval_loss": 0.33879804611206055, |
|
"eval_precision": 0.8323930726843348, |
|
"eval_recall": 0.8071467539552646, |
|
"eval_runtime": 1.8074, |
|
"eval_samples_per_second": 220.763, |
|
"eval_steps_per_second": 27.665, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.747511863708496, |
|
"learning_rate": 4e-05, |
|
"loss": 0.2837, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8333016825553572, |
|
"eval_loss": 0.32792460918426514, |
|
"eval_precision": 0.8341507249908615, |
|
"eval_recall": 0.8324695399163484, |
|
"eval_runtime": 1.804, |
|
"eval_samples_per_second": 221.178, |
|
"eval_steps_per_second": 27.717, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.42053157091140747, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.2761, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8391129032258065, |
|
"eval_loss": 0.31322285532951355, |
|
"eval_precision": 0.8345705196182396, |
|
"eval_recall": 0.8442444080741953, |
|
"eval_runtime": 1.8022, |
|
"eval_samples_per_second": 221.397, |
|
"eval_steps_per_second": 27.744, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.2977254390716553, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.2459, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8543795620437956, |
|
"eval_loss": 0.3032587468624115, |
|
"eval_precision": 0.843984962406015, |
|
"eval_recall": 0.868839789052555, |
|
"eval_runtime": 1.8072, |
|
"eval_samples_per_second": 220.78, |
|
"eval_steps_per_second": 27.667, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 2.8183882236480713, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.2321, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.8622085718274466, |
|
"eval_loss": 0.2870577275753021, |
|
"eval_precision": 0.8530168716042322, |
|
"eval_recall": 0.8741589379887251, |
|
"eval_runtime": 1.8055, |
|
"eval_samples_per_second": 220.996, |
|
"eval_steps_per_second": 27.694, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 3.4162003993988037, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2206, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.8560793854229822, |
|
"eval_loss": 0.2634139955043793, |
|
"eval_precision": 0.8609538327526132, |
|
"eval_recall": 0.8516548463356974, |
|
"eval_runtime": 1.8055, |
|
"eval_samples_per_second": 220.985, |
|
"eval_steps_per_second": 27.692, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 5.009228229522705, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.2067, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8703223612108386, |
|
"eval_loss": 0.2633897066116333, |
|
"eval_precision": 0.8694131129742446, |
|
"eval_recall": 0.8712493180578287, |
|
"eval_runtime": 1.8057, |
|
"eval_samples_per_second": 220.963, |
|
"eval_steps_per_second": 27.69, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.9459621906280518, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.192, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8631217838765008, |
|
"eval_loss": 0.2696186900138855, |
|
"eval_precision": 0.8872804935927859, |
|
"eval_recall": 0.8462447717766868, |
|
"eval_runtime": 1.8049, |
|
"eval_samples_per_second": 221.061, |
|
"eval_steps_per_second": 27.702, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 0.9607306122779846, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.1866, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8808495451466529, |
|
"eval_loss": 0.2752375304698944, |
|
"eval_precision": 0.8691495353421572, |
|
"eval_recall": 0.8972995090016367, |
|
"eval_runtime": 1.8092, |
|
"eval_samples_per_second": 220.542, |
|
"eval_steps_per_second": 27.637, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 4.809903621673584, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1786, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8792560061999484, |
|
"eval_loss": 0.2651856243610382, |
|
"eval_precision": 0.8707622232472325, |
|
"eval_recall": 0.889798145117294, |
|
"eval_runtime": 1.8065, |
|
"eval_samples_per_second": 220.87, |
|
"eval_steps_per_second": 27.678, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 0.7058310508728027, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.1695, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9072681704260651, |
|
"eval_f1": 0.8867007927797945, |
|
"eval_loss": 0.25362077355384827, |
|
"eval_precision": 0.89198606271777, |
|
"eval_recall": 0.8818876159301692, |
|
"eval_runtime": 1.82, |
|
"eval_samples_per_second": 219.234, |
|
"eval_steps_per_second": 27.473, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 7.428104877471924, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.1664, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8680720368560659, |
|
"eval_loss": 0.2736993730068207, |
|
"eval_precision": 0.8587217615098657, |
|
"eval_recall": 0.8802054919076197, |
|
"eval_runtime": 1.8089, |
|
"eval_samples_per_second": 220.576, |
|
"eval_steps_per_second": 27.641, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.6353508234024048, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.1521, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9022556390977443, |
|
"eval_f1": 0.8829621606985718, |
|
"eval_loss": 0.26195329427719116, |
|
"eval_precision": 0.8802419354838709, |
|
"eval_recall": 0.8858428805237315, |
|
"eval_runtime": 1.8126, |
|
"eval_samples_per_second": 220.122, |
|
"eval_steps_per_second": 27.584, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 1.580483317375183, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1494, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8760914310475572, |
|
"eval_loss": 0.30298247933387756, |
|
"eval_precision": 0.8629851740796268, |
|
"eval_recall": 0.8962538643389707, |
|
"eval_runtime": 1.8105, |
|
"eval_samples_per_second": 220.383, |
|
"eval_steps_per_second": 27.617, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 3.5637781620025635, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.1487, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8733660552828726, |
|
"eval_loss": 0.2702126204967499, |
|
"eval_precision": 0.8650109547970479, |
|
"eval_recall": 0.8837515911983997, |
|
"eval_runtime": 1.8055, |
|
"eval_samples_per_second": 220.99, |
|
"eval_steps_per_second": 27.693, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 3.6467041969299316, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1494, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8947368421052632, |
|
"eval_f1": 0.8765906680805938, |
|
"eval_loss": 0.2763027548789978, |
|
"eval_precision": 0.8675710594315245, |
|
"eval_recall": 0.888025095471904, |
|
"eval_runtime": 1.8075, |
|
"eval_samples_per_second": 220.753, |
|
"eval_steps_per_second": 27.663, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 4.27400541305542, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1334, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8733660552828726, |
|
"eval_loss": 0.28261518478393555, |
|
"eval_precision": 0.8650109547970479, |
|
"eval_recall": 0.8837515911983997, |
|
"eval_runtime": 1.8039, |
|
"eval_samples_per_second": 221.182, |
|
"eval_steps_per_second": 27.717, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 0.5643185377120972, |
|
"learning_rate": 0.0, |
|
"loss": 0.1325, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8721887408091659, |
|
"eval_loss": 0.27931535243988037, |
|
"eval_precision": 0.866466275659824, |
|
"eval_recall": 0.8787506819421713, |
|
"eval_runtime": 1.804, |
|
"eval_samples_per_second": 221.175, |
|
"eval_steps_per_second": 27.716, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 8444128359504000.0, |
|
"train_loss": 0.2269888150887411, |
|
"train_runtime": 635.9179, |
|
"train_samples_per_second": 114.417, |
|
"train_steps_per_second": 3.837 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 8444128359504000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|