CyBERT-APTNER / trainer_state.json
Anonymous
Upload folder using huggingface_hub
16b1639
{
"best_metric": 0.45430439710617065,
"best_model_checkpoint": "/content/aptner_cybert/checkpoint-3000",
"epoch": 10.0,
"eval_steps": 500,
"global_step": 8430,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.59,
"learning_rate": 1.881376037959668e-05,
"loss": 0.8182,
"step": 500
},
{
"epoch": 0.59,
"eval_accuracy": 0.9014212120427658,
"eval_f1": 0.3316993464052288,
"eval_loss": 0.5568719506263733,
"eval_precision": 0.5218508997429306,
"eval_recall": 0.24311377245508983,
"eval_runtime": 4.9712,
"eval_samples_per_second": 351.823,
"eval_steps_per_second": 44.053,
"step": 500
},
{
"epoch": 1.19,
"learning_rate": 1.762752075919336e-05,
"loss": 0.5357,
"step": 1000
},
{
"epoch": 1.19,
"eval_accuracy": 0.9023790416526444,
"eval_f1": 0.3561038482784018,
"eval_loss": 0.49799811840057373,
"eval_precision": 0.46245815399330464,
"eval_recall": 0.2895209580838323,
"eval_runtime": 6.2711,
"eval_samples_per_second": 278.899,
"eval_steps_per_second": 34.922,
"step": 1000
},
{
"epoch": 1.78,
"learning_rate": 1.6441281138790037e-05,
"loss": 0.4417,
"step": 1500
},
{
"epoch": 1.78,
"eval_accuracy": 0.9016283103367936,
"eval_f1": 0.3786700523726393,
"eval_loss": 0.477287232875824,
"eval_precision": 0.4029044241810199,
"eval_recall": 0.35718562874251497,
"eval_runtime": 4.9329,
"eval_samples_per_second": 354.557,
"eval_steps_per_second": 44.396,
"step": 1500
},
{
"epoch": 2.37,
"learning_rate": 1.5255041518386714e-05,
"loss": 0.394,
"step": 2000
},
{
"epoch": 2.37,
"eval_accuracy": 0.89430220818556,
"eval_f1": 0.3816285134743552,
"eval_loss": 0.48396036028862,
"eval_precision": 0.36973610331274565,
"eval_recall": 0.39431137724550896,
"eval_runtime": 6.2239,
"eval_samples_per_second": 281.013,
"eval_steps_per_second": 35.187,
"step": 2000
},
{
"epoch": 2.97,
"learning_rate": 1.4068801897983393e-05,
"loss": 0.3534,
"step": 2500
},
{
"epoch": 2.97,
"eval_accuracy": 0.8914028320691708,
"eval_f1": 0.3966278602970695,
"eval_loss": 0.474202960729599,
"eval_precision": 0.3585773046213404,
"eval_recall": 0.4437125748502994,
"eval_runtime": 5.0489,
"eval_samples_per_second": 346.412,
"eval_steps_per_second": 43.376,
"step": 2500
},
{
"epoch": 3.56,
"learning_rate": 1.2882562277580073e-05,
"loss": 0.3048,
"step": 3000
},
{
"epoch": 3.56,
"eval_accuracy": 0.9043205881591551,
"eval_f1": 0.43323262839879156,
"eval_loss": 0.45430439710617065,
"eval_precision": 0.4371951219512195,
"eval_recall": 0.4293413173652695,
"eval_runtime": 6.4763,
"eval_samples_per_second": 270.064,
"eval_steps_per_second": 33.816,
"step": 3000
},
{
"epoch": 4.15,
"learning_rate": 1.169632265717675e-05,
"loss": 0.2992,
"step": 3500
},
{
"epoch": 4.15,
"eval_accuracy": 0.890703875326827,
"eval_f1": 0.394885598923284,
"eval_loss": 0.48460009694099426,
"eval_precision": 0.358679706601467,
"eval_recall": 0.43922155688622755,
"eval_runtime": 6.3841,
"eval_samples_per_second": 273.964,
"eval_steps_per_second": 34.304,
"step": 3500
},
{
"epoch": 4.74,
"learning_rate": 1.0510083036773429e-05,
"loss": 0.2675,
"step": 4000
},
{
"epoch": 4.74,
"eval_accuracy": 0.9000232985580782,
"eval_f1": 0.4304409672830725,
"eval_loss": 0.4759831428527832,
"eval_precision": 0.4100271002710027,
"eval_recall": 0.4529940119760479,
"eval_runtime": 5.112,
"eval_samples_per_second": 342.138,
"eval_steps_per_second": 42.841,
"step": 4000
},
{
"epoch": 5.34,
"learning_rate": 9.323843416370107e-06,
"loss": 0.2454,
"step": 4500
},
{
"epoch": 5.34,
"eval_accuracy": 0.9014212120427658,
"eval_f1": 0.4260492040520984,
"eval_loss": 0.470166951417923,
"eval_precision": 0.4123249299719888,
"eval_recall": 0.4407185628742515,
"eval_runtime": 6.2203,
"eval_samples_per_second": 281.177,
"eval_steps_per_second": 35.207,
"step": 4500
},
{
"epoch": 5.93,
"learning_rate": 8.137603795966786e-06,
"loss": 0.2391,
"step": 5000
},
{
"epoch": 5.93,
"eval_accuracy": 0.8979005410442932,
"eval_f1": 0.4270158511371468,
"eval_loss": 0.4743064045906067,
"eval_precision": 0.39565772669220944,
"eval_recall": 0.46377245508982035,
"eval_runtime": 6.4494,
"eval_samples_per_second": 271.187,
"eval_steps_per_second": 33.956,
"step": 5000
},
{
"epoch": 6.52,
"learning_rate": 6.951364175563464e-06,
"loss": 0.2088,
"step": 5500
},
{
"epoch": 6.52,
"eval_accuracy": 0.903828729710839,
"eval_f1": 0.43508568109207085,
"eval_loss": 0.4777669310569763,
"eval_precision": 0.4224478285391991,
"eval_recall": 0.4485029940119761,
"eval_runtime": 5.0603,
"eval_samples_per_second": 345.632,
"eval_steps_per_second": 43.278,
"step": 5500
},
{
"epoch": 7.12,
"learning_rate": 5.765124555160143e-06,
"loss": 0.2076,
"step": 6000
},
{
"epoch": 7.12,
"eval_accuracy": 0.8929560692743793,
"eval_f1": 0.41404164442071545,
"eval_loss": 0.5050208568572998,
"eval_precision": 0.3735549132947977,
"eval_recall": 0.46437125748502994,
"eval_runtime": 5.3928,
"eval_samples_per_second": 324.318,
"eval_steps_per_second": 40.609,
"step": 6000
},
{
"epoch": 7.71,
"learning_rate": 4.5788849347568215e-06,
"loss": 0.1946,
"step": 6500
},
{
"epoch": 7.71,
"eval_accuracy": 0.8977452173237723,
"eval_f1": 0.4283921349881467,
"eval_loss": 0.49643442034721375,
"eval_precision": 0.4009397024275646,
"eval_recall": 0.4598802395209581,
"eval_runtime": 6.4354,
"eval_samples_per_second": 271.776,
"eval_steps_per_second": 34.03,
"step": 6500
},
{
"epoch": 8.3,
"learning_rate": 3.3926453143535e-06,
"loss": 0.1808,
"step": 7000
},
{
"epoch": 8.3,
"eval_accuracy": 0.9028450128142069,
"eval_f1": 0.4383916990920882,
"eval_loss": 0.48777079582214355,
"eval_precision": 0.4226173937204779,
"eval_recall": 0.4553892215568862,
"eval_runtime": 5.0515,
"eval_samples_per_second": 346.235,
"eval_steps_per_second": 43.354,
"step": 7000
},
{
"epoch": 8.9,
"learning_rate": 2.2064056939501782e-06,
"loss": 0.1683,
"step": 7500
},
{
"epoch": 8.9,
"eval_accuracy": 0.8975898936032515,
"eval_f1": 0.42638333103353115,
"eval_loss": 0.4947454333305359,
"eval_precision": 0.39544407473765036,
"eval_recall": 0.4625748502994012,
"eval_runtime": 5.1576,
"eval_samples_per_second": 339.114,
"eval_steps_per_second": 42.462,
"step": 7500
},
{
"epoch": 9.49,
"learning_rate": 1.0201660735468566e-06,
"loss": 0.1681,
"step": 8000
},
{
"epoch": 9.49,
"eval_accuracy": 0.900126847705092,
"eval_f1": 0.4352201257861635,
"eval_loss": 0.4915802478790283,
"eval_precision": 0.40812581913499346,
"eval_recall": 0.4661676646706587,
"eval_runtime": 6.3679,
"eval_samples_per_second": 274.657,
"eval_steps_per_second": 34.391,
"step": 8000
},
{
"epoch": 10.0,
"step": 8430,
"total_flos": 1171778510432640.0,
"train_loss": 0.3065130231502076,
"train_runtime": 820.0929,
"train_samples_per_second": 82.235,
"train_steps_per_second": 10.279
}
],
"logging_steps": 500,
"max_steps": 8430,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 1171778510432640.0,
"trial_name": null,
"trial_params": null
}