|
{ |
|
"best_metric": 0.45430439710617065, |
|
"best_model_checkpoint": "/content/aptner_cybert/checkpoint-3000", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 8430, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.881376037959668e-05, |
|
"loss": 0.8182, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_accuracy": 0.9014212120427658, |
|
"eval_f1": 0.3316993464052288, |
|
"eval_loss": 0.5568719506263733, |
|
"eval_precision": 0.5218508997429306, |
|
"eval_recall": 0.24311377245508983, |
|
"eval_runtime": 4.9712, |
|
"eval_samples_per_second": 351.823, |
|
"eval_steps_per_second": 44.053, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.762752075919336e-05, |
|
"loss": 0.5357, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_accuracy": 0.9023790416526444, |
|
"eval_f1": 0.3561038482784018, |
|
"eval_loss": 0.49799811840057373, |
|
"eval_precision": 0.46245815399330464, |
|
"eval_recall": 0.2895209580838323, |
|
"eval_runtime": 6.2711, |
|
"eval_samples_per_second": 278.899, |
|
"eval_steps_per_second": 34.922, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.6441281138790037e-05, |
|
"loss": 0.4417, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"eval_accuracy": 0.9016283103367936, |
|
"eval_f1": 0.3786700523726393, |
|
"eval_loss": 0.477287232875824, |
|
"eval_precision": 0.4029044241810199, |
|
"eval_recall": 0.35718562874251497, |
|
"eval_runtime": 4.9329, |
|
"eval_samples_per_second": 354.557, |
|
"eval_steps_per_second": 44.396, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.5255041518386714e-05, |
|
"loss": 0.394, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_accuracy": 0.89430220818556, |
|
"eval_f1": 0.3816285134743552, |
|
"eval_loss": 0.48396036028862, |
|
"eval_precision": 0.36973610331274565, |
|
"eval_recall": 0.39431137724550896, |
|
"eval_runtime": 6.2239, |
|
"eval_samples_per_second": 281.013, |
|
"eval_steps_per_second": 35.187, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.4068801897983393e-05, |
|
"loss": 0.3534, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_accuracy": 0.8914028320691708, |
|
"eval_f1": 0.3966278602970695, |
|
"eval_loss": 0.474202960729599, |
|
"eval_precision": 0.3585773046213404, |
|
"eval_recall": 0.4437125748502994, |
|
"eval_runtime": 5.0489, |
|
"eval_samples_per_second": 346.412, |
|
"eval_steps_per_second": 43.376, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 1.2882562277580073e-05, |
|
"loss": 0.3048, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"eval_accuracy": 0.9043205881591551, |
|
"eval_f1": 0.43323262839879156, |
|
"eval_loss": 0.45430439710617065, |
|
"eval_precision": 0.4371951219512195, |
|
"eval_recall": 0.4293413173652695, |
|
"eval_runtime": 6.4763, |
|
"eval_samples_per_second": 270.064, |
|
"eval_steps_per_second": 33.816, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1.169632265717675e-05, |
|
"loss": 0.2992, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"eval_accuracy": 0.890703875326827, |
|
"eval_f1": 0.394885598923284, |
|
"eval_loss": 0.48460009694099426, |
|
"eval_precision": 0.358679706601467, |
|
"eval_recall": 0.43922155688622755, |
|
"eval_runtime": 6.3841, |
|
"eval_samples_per_second": 273.964, |
|
"eval_steps_per_second": 34.304, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 1.0510083036773429e-05, |
|
"loss": 0.2675, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"eval_accuracy": 0.9000232985580782, |
|
"eval_f1": 0.4304409672830725, |
|
"eval_loss": 0.4759831428527832, |
|
"eval_precision": 0.4100271002710027, |
|
"eval_recall": 0.4529940119760479, |
|
"eval_runtime": 5.112, |
|
"eval_samples_per_second": 342.138, |
|
"eval_steps_per_second": 42.841, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 9.323843416370107e-06, |
|
"loss": 0.2454, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"eval_accuracy": 0.9014212120427658, |
|
"eval_f1": 0.4260492040520984, |
|
"eval_loss": 0.470166951417923, |
|
"eval_precision": 0.4123249299719888, |
|
"eval_recall": 0.4407185628742515, |
|
"eval_runtime": 6.2203, |
|
"eval_samples_per_second": 281.177, |
|
"eval_steps_per_second": 35.207, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 8.137603795966786e-06, |
|
"loss": 0.2391, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"eval_accuracy": 0.8979005410442932, |
|
"eval_f1": 0.4270158511371468, |
|
"eval_loss": 0.4743064045906067, |
|
"eval_precision": 0.39565772669220944, |
|
"eval_recall": 0.46377245508982035, |
|
"eval_runtime": 6.4494, |
|
"eval_samples_per_second": 271.187, |
|
"eval_steps_per_second": 33.956, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 6.951364175563464e-06, |
|
"loss": 0.2088, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"eval_accuracy": 0.903828729710839, |
|
"eval_f1": 0.43508568109207085, |
|
"eval_loss": 0.4777669310569763, |
|
"eval_precision": 0.4224478285391991, |
|
"eval_recall": 0.4485029940119761, |
|
"eval_runtime": 5.0603, |
|
"eval_samples_per_second": 345.632, |
|
"eval_steps_per_second": 43.278, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 5.765124555160143e-06, |
|
"loss": 0.2076, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"eval_accuracy": 0.8929560692743793, |
|
"eval_f1": 0.41404164442071545, |
|
"eval_loss": 0.5050208568572998, |
|
"eval_precision": 0.3735549132947977, |
|
"eval_recall": 0.46437125748502994, |
|
"eval_runtime": 5.3928, |
|
"eval_samples_per_second": 324.318, |
|
"eval_steps_per_second": 40.609, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 4.5788849347568215e-06, |
|
"loss": 0.1946, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"eval_accuracy": 0.8977452173237723, |
|
"eval_f1": 0.4283921349881467, |
|
"eval_loss": 0.49643442034721375, |
|
"eval_precision": 0.4009397024275646, |
|
"eval_recall": 0.4598802395209581, |
|
"eval_runtime": 6.4354, |
|
"eval_samples_per_second": 271.776, |
|
"eval_steps_per_second": 34.03, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 3.3926453143535e-06, |
|
"loss": 0.1808, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"eval_accuracy": 0.9028450128142069, |
|
"eval_f1": 0.4383916990920882, |
|
"eval_loss": 0.48777079582214355, |
|
"eval_precision": 0.4226173937204779, |
|
"eval_recall": 0.4553892215568862, |
|
"eval_runtime": 5.0515, |
|
"eval_samples_per_second": 346.235, |
|
"eval_steps_per_second": 43.354, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 2.2064056939501782e-06, |
|
"loss": 0.1683, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"eval_accuracy": 0.8975898936032515, |
|
"eval_f1": 0.42638333103353115, |
|
"eval_loss": 0.4947454333305359, |
|
"eval_precision": 0.39544407473765036, |
|
"eval_recall": 0.4625748502994012, |
|
"eval_runtime": 5.1576, |
|
"eval_samples_per_second": 339.114, |
|
"eval_steps_per_second": 42.462, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 1.0201660735468566e-06, |
|
"loss": 0.1681, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"eval_accuracy": 0.900126847705092, |
|
"eval_f1": 0.4352201257861635, |
|
"eval_loss": 0.4915802478790283, |
|
"eval_precision": 0.40812581913499346, |
|
"eval_recall": 0.4661676646706587, |
|
"eval_runtime": 6.3679, |
|
"eval_samples_per_second": 274.657, |
|
"eval_steps_per_second": 34.391, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 8430, |
|
"total_flos": 1171778510432640.0, |
|
"train_loss": 0.3065130231502076, |
|
"train_runtime": 820.0929, |
|
"train_samples_per_second": 82.235, |
|
"train_steps_per_second": 10.279 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 8430, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 1171778510432640.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|