{ "best_metric": 0.5092250922509225, "best_model_checkpoint": "/content/our_data/checkpoint-9500", "epoch": 10.0, "eval_steps": 500, "global_step": 12410, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.4, "learning_rate": 1.91941982272361e-05, "loss": 1.8353, "step": 500 }, { "epoch": 0.4, "eval_accuracy": 0.5906599952346915, "eval_f1": 0.12145214521452144, "eval_loss": 1.6175334453582764, "eval_precision": 0.12121212121212122, "eval_recall": 0.12169312169312169, "eval_runtime": 2.8356, "eval_samples_per_second": 107.209, "eval_steps_per_second": 53.605, "step": 500 }, { "epoch": 0.81, "learning_rate": 1.83883964544722e-05, "loss": 1.4071, "step": 1000 }, { "epoch": 0.81, "eval_accuracy": 0.651775077436264, "eval_f1": 0.2890995260663507, "eval_loss": 1.3136931657791138, "eval_precision": 0.26180257510729615, "eval_recall": 0.32275132275132273, "eval_runtime": 3.3976, "eval_samples_per_second": 89.476, "eval_steps_per_second": 44.738, "step": 1000 }, { "epoch": 1.21, "learning_rate": 1.75825946817083e-05, "loss": 1.1532, "step": 1500 }, { "epoch": 1.21, "eval_accuracy": 0.6739337622111031, "eval_f1": 0.334369173399627, "eval_loss": 1.2950096130371094, "eval_precision": 0.31535756154747946, "eval_recall": 0.3558201058201058, "eval_runtime": 2.3163, "eval_samples_per_second": 131.246, "eval_steps_per_second": 65.623, "step": 1500 }, { "epoch": 1.61, "learning_rate": 1.67767929089444e-05, "loss": 0.9969, "step": 2000 }, { "epoch": 1.61, "eval_accuracy": 0.6783416726233024, "eval_f1": 0.36094674556213013, "eval_loss": 1.1881901025772095, "eval_precision": 0.3265524625267666, "eval_recall": 0.40343915343915343, "eval_runtime": 2.4579, "eval_samples_per_second": 123.685, "eval_steps_per_second": 61.843, "step": 2000 }, { "epoch": 2.01, "learning_rate": 1.59709911361805e-05, "loss": 0.922, "step": 2500 }, { "epoch": 2.01, "eval_accuracy": 0.687276626161544, "eval_f1": 0.3714637146371464, "eval_loss": 1.265262246131897, "eval_precision": 0.3471264367816092, "eval_recall": 0.3994708994708995, "eval_runtime": 3.4551, "eval_samples_per_second": 87.985, "eval_steps_per_second": 43.993, "step": 2500 }, { "epoch": 2.42, "learning_rate": 1.5165189363416601e-05, "loss": 0.739, "step": 3000 }, { "epoch": 2.42, "eval_accuracy": 0.7033595425303788, "eval_f1": 0.38978015448603676, "eval_loss": 1.1592353582382202, "eval_precision": 0.35382955771305286, "eval_recall": 0.43386243386243384, "eval_runtime": 2.4063, "eval_samples_per_second": 126.335, "eval_steps_per_second": 63.168, "step": 3000 }, { "epoch": 2.82, "learning_rate": 1.4359387590652701e-05, "loss": 0.6866, "step": 3500 }, { "epoch": 2.82, "eval_accuracy": 0.7016916845365737, "eval_f1": 0.38713519952352593, "eval_loss": 1.2015074491500854, "eval_precision": 0.352112676056338, "eval_recall": 0.4298941798941799, "eval_runtime": 2.3881, "eval_samples_per_second": 127.299, "eval_steps_per_second": 63.65, "step": 3500 }, { "epoch": 3.22, "learning_rate": 1.35535858178888e-05, "loss": 0.5554, "step": 4000 }, { "epoch": 3.22, "eval_accuracy": 0.7329044555634977, "eval_f1": 0.4517374517374518, "eval_loss": 1.255534052848816, "eval_precision": 0.4398496240601504, "eval_recall": 0.4642857142857143, "eval_runtime": 2.9976, "eval_samples_per_second": 101.416, "eval_steps_per_second": 50.708, "step": 4000 }, { "epoch": 3.63, "learning_rate": 1.27477840451249e-05, "loss": 0.5009, "step": 4500 }, { "epoch": 3.63, "eval_accuracy": 0.7230164403145104, "eval_f1": 0.4449818621523579, "eval_loss": 1.287095308303833, "eval_precision": 0.40979955456570155, "eval_recall": 0.48677248677248675, "eval_runtime": 3.1963, "eval_samples_per_second": 95.111, "eval_steps_per_second": 47.556, "step": 4500 }, { "epoch": 4.03, "learning_rate": 1.1941982272361e-05, "loss": 0.5117, "step": 5000 }, { "epoch": 4.03, "eval_accuracy": 0.7279008815820824, "eval_f1": 0.44523386619301364, "eval_loss": 1.2481865882873535, "eval_precision": 0.4030010718113612, "eval_recall": 0.4973544973544973, "eval_runtime": 2.7959, "eval_samples_per_second": 108.732, "eval_steps_per_second": 54.366, "step": 5000 }, { "epoch": 4.43, "learning_rate": 1.11361804995971e-05, "loss": 0.3771, "step": 5500 }, { "epoch": 4.43, "eval_accuracy": 0.7261138908744341, "eval_f1": 0.46068796068796075, "eval_loss": 1.300547480583191, "eval_precision": 0.43004587155963303, "eval_recall": 0.49603174603174605, "eval_runtime": 2.3957, "eval_samples_per_second": 126.894, "eval_steps_per_second": 63.447, "step": 5500 }, { "epoch": 4.83, "learning_rate": 1.0330378726833199e-05, "loss": 0.4357, "step": 6000 }, { "epoch": 4.83, "eval_accuracy": 0.7394567548248749, "eval_f1": 0.4856269113149847, "eval_loss": 1.241246223449707, "eval_precision": 0.45164960182025027, "eval_recall": 0.5251322751322751, "eval_runtime": 2.3503, "eval_samples_per_second": 129.346, "eval_steps_per_second": 64.673, "step": 6000 }, { "epoch": 5.24, "learning_rate": 9.5245769540693e-06, "loss": 0.3151, "step": 6500 }, { "epoch": 5.24, "eval_accuracy": 0.7332618537050274, "eval_f1": 0.4790782292298362, "eval_loss": 1.3409576416015625, "eval_precision": 0.4423292273236282, "eval_recall": 0.5224867724867724, "eval_runtime": 2.3899, "eval_samples_per_second": 127.201, "eval_steps_per_second": 63.601, "step": 6500 }, { "epoch": 5.64, "learning_rate": 8.7187751813054e-06, "loss": 0.3219, "step": 7000 }, { "epoch": 5.64, "eval_accuracy": 0.7365975696926376, "eval_f1": 0.4665871121718377, "eval_loss": 1.2903356552124023, "eval_precision": 0.425, "eval_recall": 0.5171957671957672, "eval_runtime": 2.4306, "eval_samples_per_second": 125.071, "eval_steps_per_second": 62.535, "step": 7000 }, { "epoch": 6.04, "learning_rate": 7.9129734085415e-06, "loss": 0.3405, "step": 7500 }, { "epoch": 6.04, "eval_accuracy": 0.7470812485108411, "eval_f1": 0.485178463399879, "eval_loss": 1.3365932703018188, "eval_precision": 0.44704570791527315, "eval_recall": 0.5304232804232805, "eval_runtime": 2.4253, "eval_samples_per_second": 125.346, "eval_steps_per_second": 62.673, "step": 7500 }, { "epoch": 6.45, "learning_rate": 7.107171635777599e-06, "loss": 0.2856, "step": 8000 }, { "epoch": 6.45, "eval_accuracy": 0.7474386466523707, "eval_f1": 0.48354278874925194, "eval_loss": 1.324326515197754, "eval_precision": 0.44153005464480877, "eval_recall": 0.5343915343915344, "eval_runtime": 2.4161, "eval_samples_per_second": 125.821, "eval_steps_per_second": 62.91, "step": 8000 }, { "epoch": 6.85, "learning_rate": 6.301369863013699e-06, "loss": 0.2723, "step": 8500 }, { "epoch": 6.85, "eval_accuracy": 0.7398141529664046, "eval_f1": 0.48869883934025654, "eval_loss": 1.3961701393127441, "eval_precision": 0.4540295119182747, "eval_recall": 0.5291005291005291, "eval_runtime": 3.1243, "eval_samples_per_second": 97.302, "eval_steps_per_second": 48.651, "step": 8500 }, { "epoch": 7.25, "learning_rate": 5.495568090249799e-06, "loss": 0.2307, "step": 9000 }, { "epoch": 7.25, "eval_accuracy": 0.7439837979509173, "eval_f1": 0.4990757855822551, "eval_loss": 1.4783344268798828, "eval_precision": 0.4671280276816609, "eval_recall": 0.5357142857142857, "eval_runtime": 3.4818, "eval_samples_per_second": 87.31, "eval_steps_per_second": 43.655, "step": 9000 }, { "epoch": 7.66, "learning_rate": 4.689766317485899e-06, "loss": 0.2484, "step": 9500 }, { "epoch": 7.66, "eval_accuracy": 0.7455325232308792, "eval_f1": 0.5092250922509225, "eval_loss": 1.4249992370605469, "eval_precision": 0.47586206896551725, "eval_recall": 0.5476190476190477, "eval_runtime": 2.5492, "eval_samples_per_second": 119.251, "eval_steps_per_second": 59.625, "step": 9500 }, { "epoch": 8.06, "learning_rate": 3.883964544721999e-06, "loss": 0.2361, "step": 10000 }, { "epoch": 8.06, "eval_accuracy": 0.7518465570645699, "eval_f1": 0.5018495684340321, "eval_loss": 1.4694937467575073, "eval_precision": 0.46997690531177827, "eval_recall": 0.5383597883597884, "eval_runtime": 2.4541, "eval_samples_per_second": 123.872, "eval_steps_per_second": 61.936, "step": 10000 }, { "epoch": 8.46, "learning_rate": 3.0781627719580986e-06, "loss": 0.186, "step": 10500 }, { "epoch": 8.46, "eval_accuracy": 0.7519656897784132, "eval_f1": 0.500900900900901, "eval_loss": 1.5282784700393677, "eval_precision": 0.45874587458745875, "eval_recall": 0.5515873015873016, "eval_runtime": 3.4582, "eval_samples_per_second": 87.906, "eval_steps_per_second": 43.953, "step": 10500 }, { "epoch": 8.86, "learning_rate": 2.2723609991941985e-06, "loss": 0.2188, "step": 11000 }, { "epoch": 8.86, "eval_accuracy": 0.7470812485108411, "eval_f1": 0.4916467780429595, "eval_loss": 1.4357236623764038, "eval_precision": 0.44782608695652176, "eval_recall": 0.544973544973545, "eval_runtime": 2.4794, "eval_samples_per_second": 122.612, "eval_steps_per_second": 61.306, "step": 11000 }, { "epoch": 9.27, "learning_rate": 1.4665592264302982e-06, "loss": 0.2072, "step": 11500 }, { "epoch": 9.27, "eval_accuracy": 0.7526804860614725, "eval_f1": 0.5046728971962617, "eval_loss": 1.4809573888778687, "eval_precision": 0.47703180212014135, "eval_recall": 0.5357142857142857, "eval_runtime": 3.0075, "eval_samples_per_second": 101.082, "eval_steps_per_second": 50.541, "step": 11500 }, { "epoch": 9.67, "learning_rate": 6.607574536663981e-07, "loss": 0.1817, "step": 12000 }, { "epoch": 9.67, "eval_accuracy": 0.7531570169168453, "eval_f1": 0.5058317986494781, "eval_loss": 1.5041394233703613, "eval_precision": 0.47193585337915234, "eval_recall": 0.544973544973545, "eval_runtime": 2.8739, "eval_samples_per_second": 105.781, "eval_steps_per_second": 52.89, "step": 12000 }, { "epoch": 10.0, "step": 12410, "total_flos": 519474169775046.0, "train_loss": 0.5359412353909083, "train_runtime": 1389.5331, "train_samples_per_second": 17.855, "train_steps_per_second": 8.931 } ], "logging_steps": 500, "max_steps": 12410, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 519474169775046.0, "trial_name": null, "trial_params": null }