source-type-model / trainer_state.json
alex2awesome's picture
Training in progress, step 500
771390e
raw
history blame
6.83 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"global_step": 2436,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.12,
"eval_f1": 0.0956468424279583,
"eval_loss": 1.3489933013916016,
"eval_runtime": 12.0925,
"eval_samples_per_second": 38.454,
"eval_steps_per_second": 7.691,
"step": 100
},
{
"epoch": 0.25,
"eval_f1": 0.0956468424279583,
"eval_loss": 1.4751325845718384,
"eval_runtime": 2.7372,
"eval_samples_per_second": 169.883,
"eval_steps_per_second": 33.977,
"step": 200
},
{
"epoch": 0.37,
"eval_f1": 0.24267804768973908,
"eval_loss": 0.9687431454658508,
"eval_runtime": 2.7453,
"eval_samples_per_second": 169.381,
"eval_steps_per_second": 33.876,
"step": 300
},
{
"epoch": 0.49,
"eval_f1": 0.189060606060606,
"eval_loss": 1.0624897480010986,
"eval_runtime": 2.7393,
"eval_samples_per_second": 169.753,
"eval_steps_per_second": 33.951,
"step": 400
},
{
"epoch": 0.62,
"learning_rate": 3.9737274220032846e-05,
"loss": 1.2336,
"step": 500
},
{
"epoch": 0.62,
"eval_f1": 0.1949172949812336,
"eval_loss": 1.0953644514083862,
"eval_runtime": 2.7252,
"eval_samples_per_second": 170.632,
"eval_steps_per_second": 34.126,
"step": 500
},
{
"epoch": 0.74,
"eval_f1": 0.3080140543853414,
"eval_loss": 0.9969261288642883,
"eval_runtime": 2.7252,
"eval_samples_per_second": 170.632,
"eval_steps_per_second": 34.126,
"step": 600
},
{
"epoch": 0.86,
"eval_f1": 0.3174607190495667,
"eval_loss": 0.917083203792572,
"eval_runtime": 2.7253,
"eval_samples_per_second": 170.626,
"eval_steps_per_second": 34.125,
"step": 700
},
{
"epoch": 0.99,
"eval_f1": 0.3136389264934501,
"eval_loss": 0.9600306153297424,
"eval_runtime": 2.7547,
"eval_samples_per_second": 168.801,
"eval_steps_per_second": 33.76,
"step": 800
},
{
"epoch": 1.11,
"eval_f1": 0.3161058727288384,
"eval_loss": 0.9637317061424255,
"eval_runtime": 2.7379,
"eval_samples_per_second": 169.835,
"eval_steps_per_second": 33.967,
"step": 900
},
{
"epoch": 1.23,
"learning_rate": 2.947454844006568e-05,
"loss": 1.0269,
"step": 1000
},
{
"epoch": 1.23,
"eval_f1": 0.32568520729341943,
"eval_loss": 0.9592134356498718,
"eval_runtime": 2.7586,
"eval_samples_per_second": 168.567,
"eval_steps_per_second": 33.713,
"step": 1000
},
{
"epoch": 1.35,
"eval_f1": 0.33416337186541306,
"eval_loss": 0.9117490649223328,
"eval_runtime": 2.7292,
"eval_samples_per_second": 170.381,
"eval_steps_per_second": 34.076,
"step": 1100
},
{
"epoch": 1.48,
"eval_f1": 0.32049717164329167,
"eval_loss": 0.8890902996063232,
"eval_runtime": 2.7191,
"eval_samples_per_second": 171.014,
"eval_steps_per_second": 34.203,
"step": 1200
},
{
"epoch": 1.6,
"eval_f1": 0.33748089562391775,
"eval_loss": 0.8135806918144226,
"eval_runtime": 2.7319,
"eval_samples_per_second": 170.209,
"eval_steps_per_second": 34.042,
"step": 1300
},
{
"epoch": 1.72,
"eval_f1": 0.3299982086985783,
"eval_loss": 0.9675911664962769,
"eval_runtime": 2.7207,
"eval_samples_per_second": 170.911,
"eval_steps_per_second": 34.182,
"step": 1400
},
{
"epoch": 1.85,
"learning_rate": 1.921182266009852e-05,
"loss": 0.8592,
"step": 1500
},
{
"epoch": 1.85,
"eval_f1": 0.3315730877046458,
"eval_loss": 0.8777754306793213,
"eval_runtime": 2.7307,
"eval_samples_per_second": 170.287,
"eval_steps_per_second": 34.057,
"step": 1500
},
{
"epoch": 1.97,
"eval_f1": 0.33789886027350413,
"eval_loss": 0.840674102306366,
"eval_runtime": 2.7102,
"eval_samples_per_second": 171.575,
"eval_steps_per_second": 34.315,
"step": 1600
},
{
"epoch": 2.09,
"eval_f1": 0.33685244804747255,
"eval_loss": 0.8408811092376709,
"eval_runtime": 2.7395,
"eval_samples_per_second": 169.74,
"eval_steps_per_second": 33.948,
"step": 1700
},
{
"epoch": 2.22,
"eval_f1": 0.3342829560124293,
"eval_loss": 0.8817650675773621,
"eval_runtime": 2.7283,
"eval_samples_per_second": 170.435,
"eval_steps_per_second": 34.087,
"step": 1800
},
{
"epoch": 2.34,
"eval_f1": 0.3385578564861433,
"eval_loss": 0.9258896708488464,
"eval_runtime": 2.7323,
"eval_samples_per_second": 170.186,
"eval_steps_per_second": 34.037,
"step": 1900
},
{
"epoch": 2.46,
"learning_rate": 8.949096880131364e-06,
"loss": 0.7521,
"step": 2000
},
{
"epoch": 2.46,
"eval_f1": 0.33802107795500247,
"eval_loss": 0.941909670829773,
"eval_runtime": 2.7353,
"eval_samples_per_second": 169.998,
"eval_steps_per_second": 34.0,
"step": 2000
},
{
"epoch": 2.59,
"eval_f1": 0.3473817078391378,
"eval_loss": 0.8049965500831604,
"eval_runtime": 2.7367,
"eval_samples_per_second": 169.913,
"eval_steps_per_second": 33.983,
"step": 2100
},
{
"epoch": 2.71,
"eval_f1": 0.4052810578970328,
"eval_loss": 0.7772806286811829,
"eval_runtime": 2.7323,
"eval_samples_per_second": 170.185,
"eval_steps_per_second": 34.037,
"step": 2200
},
{
"epoch": 2.83,
"eval_f1": 0.43365497482920284,
"eval_loss": 0.7114064693450928,
"eval_runtime": 2.7465,
"eval_samples_per_second": 169.308,
"eval_steps_per_second": 33.862,
"step": 2300
},
{
"epoch": 2.96,
"eval_f1": 0.43148798131355104,
"eval_loss": 0.7161591649055481,
"eval_runtime": 2.7392,
"eval_samples_per_second": 169.755,
"eval_steps_per_second": 33.951,
"step": 2400
},
{
"epoch": 3.0,
"step": 2436,
"total_flos": 1184978465021280.0,
"train_loss": 0.9243036530092237,
"train_runtime": 435.0692,
"train_samples_per_second": 27.968,
"train_steps_per_second": 5.599
}
],
"max_steps": 2436,
"num_train_epochs": 3,
"total_flos": 1184978465021280.0,
"trial_name": null,
"trial_params": null
}