{ "best_metric": 0.45430439710617065, "best_model_checkpoint": "/content/aptner_cybert/checkpoint-3000", "epoch": 10.0, "eval_steps": 500, "global_step": 8430, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.59, "learning_rate": 1.881376037959668e-05, "loss": 0.8182, "step": 500 }, { "epoch": 0.59, "eval_accuracy": 0.9014212120427658, "eval_f1": 0.3316993464052288, "eval_loss": 0.5568719506263733, "eval_precision": 0.5218508997429306, "eval_recall": 0.24311377245508983, "eval_runtime": 4.9712, "eval_samples_per_second": 351.823, "eval_steps_per_second": 44.053, "step": 500 }, { "epoch": 1.19, "learning_rate": 1.762752075919336e-05, "loss": 0.5357, "step": 1000 }, { "epoch": 1.19, "eval_accuracy": 0.9023790416526444, "eval_f1": 0.3561038482784018, "eval_loss": 0.49799811840057373, "eval_precision": 0.46245815399330464, "eval_recall": 0.2895209580838323, "eval_runtime": 6.2711, "eval_samples_per_second": 278.899, "eval_steps_per_second": 34.922, "step": 1000 }, { "epoch": 1.78, "learning_rate": 1.6441281138790037e-05, "loss": 0.4417, "step": 1500 }, { "epoch": 1.78, "eval_accuracy": 0.9016283103367936, "eval_f1": 0.3786700523726393, "eval_loss": 0.477287232875824, "eval_precision": 0.4029044241810199, "eval_recall": 0.35718562874251497, "eval_runtime": 4.9329, "eval_samples_per_second": 354.557, "eval_steps_per_second": 44.396, "step": 1500 }, { "epoch": 2.37, "learning_rate": 1.5255041518386714e-05, "loss": 0.394, "step": 2000 }, { "epoch": 2.37, "eval_accuracy": 0.89430220818556, "eval_f1": 0.3816285134743552, "eval_loss": 0.48396036028862, "eval_precision": 0.36973610331274565, "eval_recall": 0.39431137724550896, "eval_runtime": 6.2239, "eval_samples_per_second": 281.013, "eval_steps_per_second": 35.187, "step": 2000 }, { "epoch": 2.97, "learning_rate": 1.4068801897983393e-05, "loss": 0.3534, "step": 2500 }, { "epoch": 2.97, "eval_accuracy": 0.8914028320691708, "eval_f1": 0.3966278602970695, "eval_loss": 0.474202960729599, "eval_precision": 0.3585773046213404, "eval_recall": 0.4437125748502994, "eval_runtime": 5.0489, "eval_samples_per_second": 346.412, "eval_steps_per_second": 43.376, "step": 2500 }, { "epoch": 3.56, "learning_rate": 1.2882562277580073e-05, "loss": 0.3048, "step": 3000 }, { "epoch": 3.56, "eval_accuracy": 0.9043205881591551, "eval_f1": 0.43323262839879156, "eval_loss": 0.45430439710617065, "eval_precision": 0.4371951219512195, "eval_recall": 0.4293413173652695, "eval_runtime": 6.4763, "eval_samples_per_second": 270.064, "eval_steps_per_second": 33.816, "step": 3000 }, { "epoch": 4.15, "learning_rate": 1.169632265717675e-05, "loss": 0.2992, "step": 3500 }, { "epoch": 4.15, "eval_accuracy": 0.890703875326827, "eval_f1": 0.394885598923284, "eval_loss": 0.48460009694099426, "eval_precision": 0.358679706601467, "eval_recall": 0.43922155688622755, "eval_runtime": 6.3841, "eval_samples_per_second": 273.964, "eval_steps_per_second": 34.304, "step": 3500 }, { "epoch": 4.74, "learning_rate": 1.0510083036773429e-05, "loss": 0.2675, "step": 4000 }, { "epoch": 4.74, "eval_accuracy": 0.9000232985580782, "eval_f1": 0.4304409672830725, "eval_loss": 0.4759831428527832, "eval_precision": 0.4100271002710027, "eval_recall": 0.4529940119760479, "eval_runtime": 5.112, "eval_samples_per_second": 342.138, "eval_steps_per_second": 42.841, "step": 4000 }, { "epoch": 5.34, "learning_rate": 9.323843416370107e-06, "loss": 0.2454, "step": 4500 }, { "epoch": 5.34, "eval_accuracy": 0.9014212120427658, "eval_f1": 0.4260492040520984, "eval_loss": 0.470166951417923, "eval_precision": 0.4123249299719888, "eval_recall": 0.4407185628742515, "eval_runtime": 6.2203, "eval_samples_per_second": 281.177, "eval_steps_per_second": 35.207, "step": 4500 }, { "epoch": 5.93, "learning_rate": 8.137603795966786e-06, "loss": 0.2391, "step": 5000 }, { "epoch": 5.93, "eval_accuracy": 0.8979005410442932, "eval_f1": 0.4270158511371468, "eval_loss": 0.4743064045906067, "eval_precision": 0.39565772669220944, "eval_recall": 0.46377245508982035, "eval_runtime": 6.4494, "eval_samples_per_second": 271.187, "eval_steps_per_second": 33.956, "step": 5000 }, { "epoch": 6.52, "learning_rate": 6.951364175563464e-06, "loss": 0.2088, "step": 5500 }, { "epoch": 6.52, "eval_accuracy": 0.903828729710839, "eval_f1": 0.43508568109207085, "eval_loss": 0.4777669310569763, "eval_precision": 0.4224478285391991, "eval_recall": 0.4485029940119761, "eval_runtime": 5.0603, "eval_samples_per_second": 345.632, "eval_steps_per_second": 43.278, "step": 5500 }, { "epoch": 7.12, "learning_rate": 5.765124555160143e-06, "loss": 0.2076, "step": 6000 }, { "epoch": 7.12, "eval_accuracy": 0.8929560692743793, "eval_f1": 0.41404164442071545, "eval_loss": 0.5050208568572998, "eval_precision": 0.3735549132947977, "eval_recall": 0.46437125748502994, "eval_runtime": 5.3928, "eval_samples_per_second": 324.318, "eval_steps_per_second": 40.609, "step": 6000 }, { "epoch": 7.71, "learning_rate": 4.5788849347568215e-06, "loss": 0.1946, "step": 6500 }, { "epoch": 7.71, "eval_accuracy": 0.8977452173237723, "eval_f1": 0.4283921349881467, "eval_loss": 0.49643442034721375, "eval_precision": 0.4009397024275646, "eval_recall": 0.4598802395209581, "eval_runtime": 6.4354, "eval_samples_per_second": 271.776, "eval_steps_per_second": 34.03, "step": 6500 }, { "epoch": 8.3, "learning_rate": 3.3926453143535e-06, "loss": 0.1808, "step": 7000 }, { "epoch": 8.3, "eval_accuracy": 0.9028450128142069, "eval_f1": 0.4383916990920882, "eval_loss": 0.48777079582214355, "eval_precision": 0.4226173937204779, "eval_recall": 0.4553892215568862, "eval_runtime": 5.0515, "eval_samples_per_second": 346.235, "eval_steps_per_second": 43.354, "step": 7000 }, { "epoch": 8.9, "learning_rate": 2.2064056939501782e-06, "loss": 0.1683, "step": 7500 }, { "epoch": 8.9, "eval_accuracy": 0.8975898936032515, "eval_f1": 0.42638333103353115, "eval_loss": 0.4947454333305359, "eval_precision": 0.39544407473765036, "eval_recall": 0.4625748502994012, "eval_runtime": 5.1576, "eval_samples_per_second": 339.114, "eval_steps_per_second": 42.462, "step": 7500 }, { "epoch": 9.49, "learning_rate": 1.0201660735468566e-06, "loss": 0.1681, "step": 8000 }, { "epoch": 9.49, "eval_accuracy": 0.900126847705092, "eval_f1": 0.4352201257861635, "eval_loss": 0.4915802478790283, "eval_precision": 0.40812581913499346, "eval_recall": 0.4661676646706587, "eval_runtime": 6.3679, "eval_samples_per_second": 274.657, "eval_steps_per_second": 34.391, "step": 8000 }, { "epoch": 10.0, "step": 8430, "total_flos": 1171778510432640.0, "train_loss": 0.3065130231502076, "train_runtime": 820.0929, "train_samples_per_second": 82.235, "train_steps_per_second": 10.279 } ], "logging_steps": 500, "max_steps": 8430, "num_train_epochs": 10, "save_steps": 500, "total_flos": 1171778510432640.0, "trial_name": null, "trial_params": null }