bert_trainer / run-0 /checkpoint-3000 /trainer_state.json
Artanis1551's picture
End of training
351a4ac
{
"best_metric": 0.8798324742268041,
"best_model_checkpoint": "bert_trainer/run-0/checkpoint-3000",
"epoch": 3.865979381443299,
"eval_steps": 250,
"global_step": 3000,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 2.650169893369787e-06,
"loss": 1.4561,
"step": 25
},
{
"epoch": 0.06,
"learning_rate": 5.300339786739574e-06,
"loss": 1.2146,
"step": 50
},
{
"epoch": 0.1,
"learning_rate": 5.300339786739574e-06,
"loss": 1.0949,
"step": 75
},
{
"epoch": 0.13,
"learning_rate": 5.300339786739574e-06,
"loss": 0.9846,
"step": 100
},
{
"epoch": 0.16,
"learning_rate": 5.300339786739574e-06,
"loss": 0.9161,
"step": 125
},
{
"epoch": 0.19,
"learning_rate": 5.300339786739574e-06,
"loss": 0.8466,
"step": 150
},
{
"epoch": 0.23,
"learning_rate": 5.300339786739574e-06,
"loss": 0.7954,
"step": 175
},
{
"epoch": 0.26,
"learning_rate": 5.300339786739574e-06,
"loss": 0.7311,
"step": 200
},
{
"epoch": 0.29,
"learning_rate": 5.300339786739574e-06,
"loss": 0.6688,
"step": 225
},
{
"epoch": 0.32,
"learning_rate": 5.300339786739574e-06,
"loss": 0.6816,
"step": 250
},
{
"epoch": 0.32,
"eval_accuracy": 0.7857603092783505,
"eval_loss": 0.6313713192939758,
"eval_runtime": 78.939,
"eval_samples_per_second": 39.322,
"eval_steps_per_second": 9.83,
"step": 250
},
{
"epoch": 0.35,
"learning_rate": 5.300339786739574e-06,
"loss": 0.6339,
"step": 275
},
{
"epoch": 0.39,
"learning_rate": 5.300339786739574e-06,
"loss": 0.6102,
"step": 300
},
{
"epoch": 0.42,
"learning_rate": 5.300339786739574e-06,
"loss": 0.6572,
"step": 325
},
{
"epoch": 0.45,
"learning_rate": 5.300339786739574e-06,
"loss": 0.6575,
"step": 350
},
{
"epoch": 0.48,
"learning_rate": 5.300339786739574e-06,
"loss": 0.5946,
"step": 375
},
{
"epoch": 0.52,
"learning_rate": 5.300339786739574e-06,
"loss": 0.5779,
"step": 400
},
{
"epoch": 0.55,
"learning_rate": 5.300339786739574e-06,
"loss": 0.5773,
"step": 425
},
{
"epoch": 0.58,
"learning_rate": 5.300339786739574e-06,
"loss": 0.5461,
"step": 450
},
{
"epoch": 0.61,
"learning_rate": 5.300339786739574e-06,
"loss": 0.576,
"step": 475
},
{
"epoch": 0.64,
"learning_rate": 5.300339786739574e-06,
"loss": 0.4864,
"step": 500
},
{
"epoch": 0.64,
"eval_accuracy": 0.8176546391752577,
"eval_loss": 0.5011568069458008,
"eval_runtime": 78.7987,
"eval_samples_per_second": 39.392,
"eval_steps_per_second": 9.848,
"step": 500
},
{
"epoch": 0.68,
"learning_rate": 5.300339786739574e-06,
"loss": 0.5618,
"step": 525
},
{
"epoch": 0.71,
"learning_rate": 5.300339786739574e-06,
"loss": 0.5325,
"step": 550
},
{
"epoch": 0.74,
"learning_rate": 5.300339786739574e-06,
"loss": 0.4656,
"step": 575
},
{
"epoch": 0.77,
"learning_rate": 5.300339786739574e-06,
"loss": 0.4966,
"step": 600
},
{
"epoch": 0.81,
"learning_rate": 5.300339786739574e-06,
"loss": 0.457,
"step": 625
},
{
"epoch": 0.84,
"learning_rate": 5.300339786739574e-06,
"loss": 0.4715,
"step": 650
},
{
"epoch": 0.87,
"learning_rate": 5.300339786739574e-06,
"loss": 0.4621,
"step": 675
},
{
"epoch": 0.9,
"learning_rate": 5.300339786739574e-06,
"loss": 0.4679,
"step": 700
},
{
"epoch": 0.93,
"learning_rate": 5.300339786739574e-06,
"loss": 0.4236,
"step": 725
},
{
"epoch": 0.97,
"learning_rate": 5.300339786739574e-06,
"loss": 0.399,
"step": 750
},
{
"epoch": 0.97,
"eval_accuracy": 0.8443943298969072,
"eval_loss": 0.4396892488002777,
"eval_runtime": 78.9384,
"eval_samples_per_second": 39.322,
"eval_steps_per_second": 9.83,
"step": 750
},
{
"epoch": 1.0,
"learning_rate": 5.300339786739574e-06,
"loss": 0.4321,
"step": 775
},
{
"epoch": 1.03,
"learning_rate": 5.300339786739574e-06,
"loss": 0.3953,
"step": 800
},
{
"epoch": 1.06,
"learning_rate": 5.300339786739574e-06,
"loss": 0.379,
"step": 825
},
{
"epoch": 1.1,
"learning_rate": 5.300339786739574e-06,
"loss": 0.3415,
"step": 850
},
{
"epoch": 1.13,
"learning_rate": 5.300339786739574e-06,
"loss": 0.3705,
"step": 875
},
{
"epoch": 1.16,
"learning_rate": 5.300339786739574e-06,
"loss": 0.3612,
"step": 900
},
{
"epoch": 1.19,
"learning_rate": 5.300339786739574e-06,
"loss": 0.4108,
"step": 925
},
{
"epoch": 1.22,
"learning_rate": 5.300339786739574e-06,
"loss": 0.4167,
"step": 950
},
{
"epoch": 1.26,
"learning_rate": 5.300339786739574e-06,
"loss": 0.3916,
"step": 975
},
{
"epoch": 1.29,
"learning_rate": 5.300339786739574e-06,
"loss": 0.3947,
"step": 1000
},
{
"epoch": 1.29,
"eval_accuracy": 0.8579252577319587,
"eval_loss": 0.40539947152137756,
"eval_runtime": 78.7689,
"eval_samples_per_second": 39.406,
"eval_steps_per_second": 9.852,
"step": 1000
},
{
"epoch": 1.32,
"learning_rate": 5.300339786739574e-06,
"loss": 0.3812,
"step": 1025
},
{
"epoch": 1.35,
"learning_rate": 5.300339786739574e-06,
"loss": 0.3804,
"step": 1050
},
{
"epoch": 1.39,
"learning_rate": 5.300339786739574e-06,
"loss": 0.3783,
"step": 1075
},
{
"epoch": 1.42,
"learning_rate": 5.300339786739574e-06,
"loss": 0.337,
"step": 1100
},
{
"epoch": 1.45,
"learning_rate": 5.300339786739574e-06,
"loss": 0.3191,
"step": 1125
},
{
"epoch": 1.48,
"learning_rate": 5.300339786739574e-06,
"loss": 0.3277,
"step": 1150
},
{
"epoch": 1.51,
"learning_rate": 5.300339786739574e-06,
"loss": 0.3601,
"step": 1175
},
{
"epoch": 1.55,
"learning_rate": 5.300339786739574e-06,
"loss": 0.3932,
"step": 1200
},
{
"epoch": 1.58,
"learning_rate": 5.300339786739574e-06,
"loss": 0.3964,
"step": 1225
},
{
"epoch": 1.61,
"learning_rate": 5.300339786739574e-06,
"loss": 0.3705,
"step": 1250
},
{
"epoch": 1.61,
"eval_accuracy": 0.8524484536082474,
"eval_loss": 0.40534982085227966,
"eval_runtime": 78.953,
"eval_samples_per_second": 39.315,
"eval_steps_per_second": 9.829,
"step": 1250
},
{
"epoch": 1.64,
"learning_rate": 5.300339786739574e-06,
"loss": 0.4567,
"step": 1275
},
{
"epoch": 1.68,
"learning_rate": 5.300339786739574e-06,
"loss": 0.384,
"step": 1300
},
{
"epoch": 1.71,
"learning_rate": 5.300339786739574e-06,
"loss": 0.3205,
"step": 1325
},
{
"epoch": 1.74,
"learning_rate": 5.300339786739574e-06,
"loss": 0.3841,
"step": 1350
},
{
"epoch": 1.77,
"learning_rate": 5.300339786739574e-06,
"loss": 0.322,
"step": 1375
},
{
"epoch": 1.8,
"learning_rate": 5.300339786739574e-06,
"loss": 0.3742,
"step": 1400
},
{
"epoch": 1.84,
"learning_rate": 5.300339786739574e-06,
"loss": 0.3407,
"step": 1425
},
{
"epoch": 1.87,
"learning_rate": 5.300339786739574e-06,
"loss": 0.3342,
"step": 1450
},
{
"epoch": 1.9,
"learning_rate": 5.300339786739574e-06,
"loss": 0.3068,
"step": 1475
},
{
"epoch": 1.93,
"learning_rate": 5.300339786739574e-06,
"loss": 0.3288,
"step": 1500
},
{
"epoch": 1.93,
"eval_accuracy": 0.8688788659793815,
"eval_loss": 0.36364319920539856,
"eval_runtime": 78.9463,
"eval_samples_per_second": 39.318,
"eval_steps_per_second": 9.829,
"step": 1500
},
{
"epoch": 1.97,
"learning_rate": 5.300339786739574e-06,
"loss": 0.2918,
"step": 1525
},
{
"epoch": 2.0,
"learning_rate": 5.300339786739574e-06,
"loss": 0.3571,
"step": 1550
},
{
"epoch": 2.03,
"learning_rate": 5.300339786739574e-06,
"loss": 0.3341,
"step": 1575
},
{
"epoch": 2.06,
"learning_rate": 5.300339786739574e-06,
"loss": 0.3162,
"step": 1600
},
{
"epoch": 2.09,
"learning_rate": 5.300339786739574e-06,
"loss": 0.2384,
"step": 1625
},
{
"epoch": 2.13,
"learning_rate": 5.300339786739574e-06,
"loss": 0.2934,
"step": 1650
},
{
"epoch": 2.16,
"learning_rate": 5.300339786739574e-06,
"loss": 0.2686,
"step": 1675
},
{
"epoch": 2.19,
"learning_rate": 5.300339786739574e-06,
"loss": 0.2806,
"step": 1700
},
{
"epoch": 2.22,
"learning_rate": 5.300339786739574e-06,
"loss": 0.2897,
"step": 1725
},
{
"epoch": 2.26,
"learning_rate": 5.300339786739574e-06,
"loss": 0.2417,
"step": 1750
},
{
"epoch": 2.26,
"eval_accuracy": 0.8743556701030928,
"eval_loss": 0.39001935720443726,
"eval_runtime": 78.9576,
"eval_samples_per_second": 39.312,
"eval_steps_per_second": 9.828,
"step": 1750
},
{
"epoch": 2.29,
"learning_rate": 5.300339786739574e-06,
"loss": 0.2849,
"step": 1775
},
{
"epoch": 2.32,
"learning_rate": 5.300339786739574e-06,
"loss": 0.2653,
"step": 1800
},
{
"epoch": 2.35,
"learning_rate": 5.300339786739574e-06,
"loss": 0.2266,
"step": 1825
},
{
"epoch": 2.38,
"learning_rate": 5.300339786739574e-06,
"loss": 0.2401,
"step": 1850
},
{
"epoch": 2.42,
"learning_rate": 5.300339786739574e-06,
"loss": 0.2208,
"step": 1875
},
{
"epoch": 2.45,
"learning_rate": 5.300339786739574e-06,
"loss": 0.2615,
"step": 1900
},
{
"epoch": 2.48,
"learning_rate": 5.300339786739574e-06,
"loss": 0.2913,
"step": 1925
},
{
"epoch": 2.51,
"learning_rate": 5.300339786739574e-06,
"loss": 0.2474,
"step": 1950
},
{
"epoch": 2.55,
"learning_rate": 5.300339786739574e-06,
"loss": 0.2388,
"step": 1975
},
{
"epoch": 2.58,
"learning_rate": 5.300339786739574e-06,
"loss": 0.2374,
"step": 2000
},
{
"epoch": 2.58,
"eval_accuracy": 0.8730670103092784,
"eval_loss": 0.39385583996772766,
"eval_runtime": 78.9575,
"eval_samples_per_second": 39.312,
"eval_steps_per_second": 9.828,
"step": 2000
},
{
"epoch": 2.61,
"learning_rate": 5.300339786739574e-06,
"loss": 0.2697,
"step": 2025
},
{
"epoch": 2.64,
"learning_rate": 5.300339786739574e-06,
"loss": 0.2592,
"step": 2050
},
{
"epoch": 2.67,
"learning_rate": 5.300339786739574e-06,
"loss": 0.2636,
"step": 2075
},
{
"epoch": 2.71,
"learning_rate": 5.300339786739574e-06,
"loss": 0.2349,
"step": 2100
},
{
"epoch": 2.74,
"learning_rate": 5.300339786739574e-06,
"loss": 0.2697,
"step": 2125
},
{
"epoch": 2.77,
"learning_rate": 5.300339786739574e-06,
"loss": 0.2507,
"step": 2150
},
{
"epoch": 2.8,
"learning_rate": 5.300339786739574e-06,
"loss": 0.2608,
"step": 2175
},
{
"epoch": 2.84,
"learning_rate": 5.300339786739574e-06,
"loss": 0.2463,
"step": 2200
},
{
"epoch": 2.87,
"learning_rate": 5.300339786739574e-06,
"loss": 0.2408,
"step": 2225
},
{
"epoch": 2.9,
"learning_rate": 5.300339786739574e-06,
"loss": 0.3525,
"step": 2250
},
{
"epoch": 2.9,
"eval_accuracy": 0.8627577319587629,
"eval_loss": 0.4113520085811615,
"eval_runtime": 78.9601,
"eval_samples_per_second": 39.311,
"eval_steps_per_second": 9.828,
"step": 2250
},
{
"epoch": 2.93,
"learning_rate": 5.300339786739574e-06,
"loss": 0.3033,
"step": 2275
},
{
"epoch": 2.96,
"learning_rate": 5.300339786739574e-06,
"loss": 0.2186,
"step": 2300
},
{
"epoch": 3.0,
"learning_rate": 5.300339786739574e-06,
"loss": 0.2619,
"step": 2325
},
{
"epoch": 3.03,
"learning_rate": 5.300339786739574e-06,
"loss": 0.1473,
"step": 2350
},
{
"epoch": 3.06,
"learning_rate": 5.300339786739574e-06,
"loss": 0.1662,
"step": 2375
},
{
"epoch": 3.09,
"learning_rate": 5.300339786739574e-06,
"loss": 0.1863,
"step": 2400
},
{
"epoch": 3.12,
"learning_rate": 5.300339786739574e-06,
"loss": 0.1869,
"step": 2425
},
{
"epoch": 3.16,
"learning_rate": 5.300339786739574e-06,
"loss": 0.1729,
"step": 2450
},
{
"epoch": 3.19,
"learning_rate": 5.300339786739574e-06,
"loss": 0.1998,
"step": 2475
},
{
"epoch": 3.22,
"learning_rate": 5.300339786739574e-06,
"loss": 0.1778,
"step": 2500
},
{
"epoch": 3.22,
"eval_accuracy": 0.873389175257732,
"eval_loss": 0.44405418634414673,
"eval_runtime": 78.9751,
"eval_samples_per_second": 39.304,
"eval_steps_per_second": 9.826,
"step": 2500
},
{
"epoch": 3.25,
"learning_rate": 5.300339786739574e-06,
"loss": 0.171,
"step": 2525
},
{
"epoch": 3.29,
"learning_rate": 5.300339786739574e-06,
"loss": 0.1642,
"step": 2550
},
{
"epoch": 3.32,
"learning_rate": 5.300339786739574e-06,
"loss": 0.2303,
"step": 2575
},
{
"epoch": 3.35,
"learning_rate": 5.300339786739574e-06,
"loss": 0.2194,
"step": 2600
},
{
"epoch": 3.38,
"learning_rate": 5.300339786739574e-06,
"loss": 0.1756,
"step": 2625
},
{
"epoch": 3.41,
"learning_rate": 5.300339786739574e-06,
"loss": 0.1956,
"step": 2650
},
{
"epoch": 3.45,
"learning_rate": 5.300339786739574e-06,
"loss": 0.2669,
"step": 2675
},
{
"epoch": 3.48,
"learning_rate": 5.300339786739574e-06,
"loss": 0.1514,
"step": 2700
},
{
"epoch": 3.51,
"learning_rate": 5.300339786739574e-06,
"loss": 0.1263,
"step": 2725
},
{
"epoch": 3.54,
"learning_rate": 5.300339786739574e-06,
"loss": 0.259,
"step": 2750
},
{
"epoch": 3.54,
"eval_accuracy": 0.8566365979381443,
"eval_loss": 0.4988997280597687,
"eval_runtime": 78.9372,
"eval_samples_per_second": 39.322,
"eval_steps_per_second": 9.831,
"step": 2750
},
{
"epoch": 3.58,
"learning_rate": 5.300339786739574e-06,
"loss": 0.1378,
"step": 2775
},
{
"epoch": 3.61,
"learning_rate": 5.300339786739574e-06,
"loss": 0.1588,
"step": 2800
},
{
"epoch": 3.64,
"learning_rate": 5.300339786739574e-06,
"loss": 0.179,
"step": 2825
},
{
"epoch": 3.67,
"learning_rate": 5.300339786739574e-06,
"loss": 0.3413,
"step": 2850
},
{
"epoch": 3.7,
"learning_rate": 5.300339786739574e-06,
"loss": 0.169,
"step": 2875
},
{
"epoch": 3.74,
"learning_rate": 5.300339786739574e-06,
"loss": 0.208,
"step": 2900
},
{
"epoch": 3.77,
"learning_rate": 5.300339786739574e-06,
"loss": 0.1602,
"step": 2925
},
{
"epoch": 3.8,
"learning_rate": 5.300339786739574e-06,
"loss": 0.1406,
"step": 2950
},
{
"epoch": 3.83,
"learning_rate": 5.300339786739574e-06,
"loss": 0.1519,
"step": 2975
},
{
"epoch": 3.87,
"learning_rate": 5.300339786739574e-06,
"loss": 0.2028,
"step": 3000
},
{
"epoch": 3.87,
"eval_accuracy": 0.8798324742268041,
"eval_loss": 0.46966859698295593,
"eval_runtime": 78.9816,
"eval_samples_per_second": 39.3,
"eval_steps_per_second": 9.825,
"step": 3000
}
],
"logging_steps": 25,
"max_steps": 3000,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 250,
"total_flos": 1.2625987184277504e+16,
"trial_name": null,
"trial_params": {
"learning_rate": 5.300339786739574e-06,
"per_device_eval_batch_size": 4,
"per_device_train_batch_size": 16
}
}