org_aug_a / trainer_state.json
Stern5497's picture
mistral-lp2-org_org_a
a8f05d6 verified
{
"best_metric": 0.8807787895202637,
"best_model_checkpoint": "org_org_a/org_aug_a/checkpoint-400",
"epoch": 0.24615384615384617,
"eval_steps": 25,
"global_step": 400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.015384615384615385,
"grad_norm": 99.7617416381836,
"learning_rate": 9.375e-05,
"loss": 2.2195,
"step": 25
},
{
"epoch": 0.015384615384615385,
"eval_f1_macro": 0.5521299126846324,
"eval_f1_micro": 0.5691194856224325,
"eval_f1_weighted": 0.569423288164649,
"eval_loss": 1.520703673362732,
"eval_runtime": 1286.7359,
"eval_samples_per_second": 8.703,
"eval_steps_per_second": 0.272,
"step": 25
},
{
"epoch": 0.03076923076923077,
"grad_norm": 40.433555603027344,
"learning_rate": 8.75e-05,
"loss": 1.4371,
"step": 50
},
{
"epoch": 0.03076923076923077,
"eval_f1_macro": 0.5857301862067549,
"eval_f1_micro": 0.6089480264332917,
"eval_f1_weighted": 0.6051732662908408,
"eval_loss": 1.2746953964233398,
"eval_runtime": 1302.368,
"eval_samples_per_second": 8.598,
"eval_steps_per_second": 0.269,
"step": 50
},
{
"epoch": 0.046153846153846156,
"grad_norm": 50.680335998535156,
"learning_rate": 8.125000000000001e-05,
"loss": 1.2556,
"step": 75
},
{
"epoch": 0.046153846153846156,
"eval_f1_macro": 0.6036315452406847,
"eval_f1_micro": 0.6303804250759064,
"eval_f1_weighted": 0.6240449220829647,
"eval_loss": 1.1545159816741943,
"eval_runtime": 1308.8563,
"eval_samples_per_second": 8.556,
"eval_steps_per_second": 0.267,
"step": 75
},
{
"epoch": 0.06153846153846154,
"grad_norm": 24.990671157836914,
"learning_rate": 7.500000000000001e-05,
"loss": 1.2415,
"step": 100
},
{
"epoch": 0.06153846153846154,
"eval_f1_macro": 0.6131831448419165,
"eval_f1_micro": 0.6319878549741025,
"eval_f1_weighted": 0.6300913896752308,
"eval_loss": 1.0690622329711914,
"eval_runtime": 1302.0106,
"eval_samples_per_second": 8.601,
"eval_steps_per_second": 0.269,
"step": 100
},
{
"epoch": 0.07692307692307693,
"grad_norm": 20.941816329956055,
"learning_rate": 6.875e-05,
"loss": 0.9864,
"step": 125
},
{
"epoch": 0.07692307692307693,
"eval_f1_macro": 0.627800207605976,
"eval_f1_micro": 0.6399357028040722,
"eval_f1_weighted": 0.6411240450638264,
"eval_loss": 1.0263742208480835,
"eval_runtime": 1310.3949,
"eval_samples_per_second": 8.546,
"eval_steps_per_second": 0.267,
"step": 125
},
{
"epoch": 0.09230769230769231,
"grad_norm": 17.243314743041992,
"learning_rate": 6.25e-05,
"loss": 1.0647,
"step": 150
},
{
"epoch": 0.09230769230769231,
"eval_f1_macro": 0.6265827051287185,
"eval_f1_micro": 0.6510091087694231,
"eval_f1_weighted": 0.6455165794591529,
"eval_loss": 0.9917964339256287,
"eval_runtime": 1292.6103,
"eval_samples_per_second": 8.663,
"eval_steps_per_second": 0.271,
"step": 150
},
{
"epoch": 0.1076923076923077,
"grad_norm": 20.114173889160156,
"learning_rate": 5.6250000000000005e-05,
"loss": 0.9849,
"step": 175
},
{
"epoch": 0.1076923076923077,
"eval_f1_macro": 0.6317476736951155,
"eval_f1_micro": 0.6576174316842294,
"eval_f1_weighted": 0.6510976948325254,
"eval_loss": 0.9679338932037354,
"eval_runtime": 1305.0812,
"eval_samples_per_second": 8.58,
"eval_steps_per_second": 0.268,
"step": 175
},
{
"epoch": 0.12307692307692308,
"grad_norm": 39.2221565246582,
"learning_rate": 5e-05,
"loss": 1.0067,
"step": 200
},
{
"epoch": 0.12307692307692308,
"eval_f1_macro": 0.6383959350585475,
"eval_f1_micro": 0.6501160921593142,
"eval_f1_weighted": 0.6513020604373679,
"eval_loss": 0.9382981061935425,
"eval_runtime": 1283.0843,
"eval_samples_per_second": 8.727,
"eval_steps_per_second": 0.273,
"step": 200
},
{
"epoch": 0.13846153846153847,
"grad_norm": 26.992185592651367,
"learning_rate": 4.375e-05,
"loss": 0.8928,
"step": 225
},
{
"epoch": 0.13846153846153847,
"eval_f1_macro": 0.640450740779414,
"eval_f1_micro": 0.6619932130737631,
"eval_f1_weighted": 0.6578984928748007,
"eval_loss": 0.9242791533470154,
"eval_runtime": 1276.3685,
"eval_samples_per_second": 8.773,
"eval_steps_per_second": 0.274,
"step": 225
},
{
"epoch": 0.15384615384615385,
"grad_norm": 71.61570739746094,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.9858,
"step": 250
},
{
"epoch": 0.15384615384615385,
"eval_f1_macro": 0.640469116958249,
"eval_f1_micro": 0.6627076263618503,
"eval_f1_weighted": 0.6581960436641718,
"eval_loss": 0.9131789803504944,
"eval_runtime": 1285.3671,
"eval_samples_per_second": 8.712,
"eval_steps_per_second": 0.272,
"step": 250
},
{
"epoch": 0.16923076923076924,
"grad_norm": 51.381019592285156,
"learning_rate": 3.125e-05,
"loss": 0.9085,
"step": 275
},
{
"epoch": 0.16923076923076924,
"eval_f1_macro": 0.6446344740224741,
"eval_f1_micro": 0.6575281300232184,
"eval_f1_weighted": 0.6580540316041209,
"eval_loss": 0.9010853171348572,
"eval_runtime": 1307.6049,
"eval_samples_per_second": 8.564,
"eval_steps_per_second": 0.268,
"step": 275
},
{
"epoch": 0.18461538461538463,
"grad_norm": 25.47317886352539,
"learning_rate": 2.5e-05,
"loss": 1.0059,
"step": 300
},
{
"epoch": 0.18461538461538463,
"eval_f1_macro": 0.6435798595814134,
"eval_f1_micro": 0.6686015359885694,
"eval_f1_weighted": 0.662301841928527,
"eval_loss": 0.9018191695213318,
"eval_runtime": 1330.8319,
"eval_samples_per_second": 8.414,
"eval_steps_per_second": 0.263,
"step": 300
},
{
"epoch": 0.2,
"grad_norm": 16.64508628845215,
"learning_rate": 1.8750000000000002e-05,
"loss": 0.8939,
"step": 325
},
{
"epoch": 0.2,
"eval_f1_macro": 0.6448244249080287,
"eval_f1_micro": 0.6681550276835149,
"eval_f1_weighted": 0.6628710932570111,
"eval_loss": 0.892798125743866,
"eval_runtime": 1305.0822,
"eval_samples_per_second": 8.58,
"eval_steps_per_second": 0.268,
"step": 325
},
{
"epoch": 0.2153846153846154,
"grad_norm": 32.9316520690918,
"learning_rate": 1.25e-05,
"loss": 0.864,
"step": 350
},
{
"epoch": 0.2153846153846154,
"eval_f1_macro": 0.6477608498148076,
"eval_f1_micro": 0.6621718163957849,
"eval_f1_weighted": 0.6618855058014609,
"eval_loss": 0.8832775950431824,
"eval_runtime": 1344.9811,
"eval_samples_per_second": 8.326,
"eval_steps_per_second": 0.26,
"step": 350
},
{
"epoch": 0.23076923076923078,
"grad_norm": 63.84526824951172,
"learning_rate": 6.25e-06,
"loss": 0.9499,
"step": 375
},
{
"epoch": 0.23076923076923078,
"eval_f1_macro": 0.646307301425064,
"eval_f1_micro": 0.6585104482943382,
"eval_f1_weighted": 0.659331813949001,
"eval_loss": 0.8836826682090759,
"eval_runtime": 1287.5574,
"eval_samples_per_second": 8.697,
"eval_steps_per_second": 0.272,
"step": 375
},
{
"epoch": 0.24615384615384617,
"grad_norm": 21.614879608154297,
"learning_rate": 0.0,
"loss": 0.9721,
"step": 400
},
{
"epoch": 0.24615384615384617,
"eval_f1_macro": 0.6475943444675745,
"eval_f1_micro": 0.6614574031076978,
"eval_f1_weighted": 0.6614511612983146,
"eval_loss": 0.8807787895202637,
"eval_runtime": 1313.0891,
"eval_samples_per_second": 8.528,
"eval_steps_per_second": 0.267,
"step": 400
},
{
"epoch": 0.24615384615384617,
"step": 400,
"total_flos": 1.690284412871639e+17,
"train_loss": 1.104334650039673,
"train_runtime": 22485.3021,
"train_samples_per_second": 0.569,
"train_steps_per_second": 0.018
},
{
"epoch": 0.24615384615384617,
"eval_f1_macro": 0.6475943444675745,
"eval_f1_micro": 0.6614574031076978,
"eval_f1_weighted": 0.6614511612983146,
"eval_loss": 0.8807787895202637,
"eval_runtime": 1299.8097,
"eval_samples_per_second": 8.615,
"eval_steps_per_second": 0.269,
"step": 400
}
],
"logging_steps": 25,
"max_steps": 400,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 25,
"total_flos": 1.690284412871639e+17,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}