{ "best_metric": 0.980825229383001, "best_model_checkpoint": "omarmomen/sf_babylm_1/finetune/relative_position_control/checkpoint-400", "epoch": 10.0, "global_step": 1420, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.41, "eval_accuracy": 0.9216228127479553, "eval_f1": 0.9270501080358061, "eval_loss": 0.4737815260887146, "eval_mcc": 0.8527382673557574, "eval_runtime": 30.4369, "eval_samples_per_second": 594.409, "eval_steps_per_second": 74.318, "step": 200 }, { "epoch": 2.82, "eval_accuracy": 0.9807097315788269, "eval_f1": 0.980825229383001, "eval_loss": 0.08114158362150192, "eval_mcc": 0.9614892142400866, "eval_runtime": 30.4073, "eval_samples_per_second": 594.988, "eval_steps_per_second": 74.39, "step": 400 }, { "epoch": 3.52, "learning_rate": 3.23943661971831e-05, "loss": 0.0425, "step": 500 }, { "epoch": 4.23, "eval_accuracy": 0.9766194820404053, "eval_f1": 0.9769545083083628, "eval_loss": 0.11881698668003082, "eval_mcc": 0.9536421310802985, "eval_runtime": 30.3659, "eval_samples_per_second": 595.799, "eval_steps_per_second": 74.491, "step": 600 }, { "epoch": 5.63, "eval_accuracy": 0.9430134892463684, "eval_f1": 0.9460689438719464, "eval_loss": 0.407075434923172, "eval_mcc": 0.8917702307190046, "eval_runtime": 30.3806, "eval_samples_per_second": 595.512, "eval_steps_per_second": 74.455, "step": 800 }, { "epoch": 7.04, "learning_rate": 1.4788732394366198e-05, "loss": 0.0001, "step": 1000 }, { "epoch": 7.04, "eval_accuracy": 0.9644041657447815, "eval_f1": 0.9655172413793104, "eval_loss": 0.2293713390827179, "eval_mcc": 0.930749953390411, "eval_runtime": 30.5501, "eval_samples_per_second": 592.207, "eval_steps_per_second": 74.042, "step": 1000 }, { "epoch": 8.45, "eval_accuracy": 0.9323458075523376, "eval_f1": 0.9365540120257101, "eval_loss": 0.5338804721832275, "eval_mcc": 0.8724016613441335, "eval_runtime": 30.3498, "eval_samples_per_second": 596.116, "eval_steps_per_second": 74.531, "step": 1200 }, { "epoch": 9.86, "eval_accuracy": 0.951028048992157, "eval_f1": 0.9530521407376007, "eval_loss": 0.35129231214523315, "eval_mcc": 0.9054283286330445, "eval_runtime": 30.3142, "eval_samples_per_second": 596.816, "eval_steps_per_second": 74.618, "step": 1400 }, { "epoch": 10.0, "step": 1420, "total_flos": 2323809310863360.0, "train_loss": 0.015042712267907992, "train_runtime": 512.0019, "train_samples_per_second": 177.109, "train_steps_per_second": 2.773 } ], "max_steps": 1420, "num_train_epochs": 10, "total_flos": 2323809310863360.0, "trial_name": null, "trial_params": null }