josecannete's picture
adding model finetuned on QA (MLQA)
f54896f
{
"best_metric": 74.34789884230815,
"best_model_checkpoint": "/data/jcanete/all_results/mlqa/albeto_xxlarge/epochs_2_bs_16_lr_5e-6/checkpoint-7800",
"epoch": 1.9999025056059276,
"global_step": 10256,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06,
"eval_exact_match": 38.4,
"eval_f1": 63.6410695647597,
"step": 300
},
{
"epoch": 0.1,
"learning_rate": 4.757702808112325e-06,
"loss": 2.4731,
"step": 500
},
{
"epoch": 0.12,
"eval_exact_match": 38.4,
"eval_f1": 64.544213013251,
"step": 600
},
{
"epoch": 0.18,
"eval_exact_match": 40.8,
"eval_f1": 67.22901394580703,
"step": 900
},
{
"epoch": 0.19,
"learning_rate": 4.51443057722309e-06,
"loss": 1.948,
"step": 1000
},
{
"epoch": 0.23,
"eval_exact_match": 43.8,
"eval_f1": 68.61469074139023,
"step": 1200
},
{
"epoch": 0.29,
"learning_rate": 4.270670826833074e-06,
"loss": 1.8,
"step": 1500
},
{
"epoch": 0.29,
"eval_exact_match": 41.4,
"eval_f1": 67.90524053632915,
"step": 1500
},
{
"epoch": 0.35,
"eval_exact_match": 44.0,
"eval_f1": 70.24463093067978,
"step": 1800
},
{
"epoch": 0.39,
"learning_rate": 4.0273985959438385e-06,
"loss": 1.7607,
"step": 2000
},
{
"epoch": 0.41,
"eval_exact_match": 44.8,
"eval_f1": 69.83718821214595,
"step": 2100
},
{
"epoch": 0.47,
"eval_exact_match": 44.4,
"eval_f1": 70.77459313733458,
"step": 2400
},
{
"epoch": 0.49,
"learning_rate": 3.7836388455538226e-06,
"loss": 1.7102,
"step": 2500
},
{
"epoch": 0.53,
"eval_exact_match": 45.6,
"eval_f1": 71.38763960651633,
"step": 2700
},
{
"epoch": 0.58,
"learning_rate": 3.5398790951638067e-06,
"loss": 1.7394,
"step": 3000
},
{
"epoch": 0.58,
"eval_exact_match": 44.0,
"eval_f1": 69.412900377769,
"step": 3000
},
{
"epoch": 0.64,
"eval_exact_match": 47.0,
"eval_f1": 72.19159641530743,
"step": 3300
},
{
"epoch": 0.68,
"learning_rate": 3.2961193447737912e-06,
"loss": 1.6818,
"step": 3500
},
{
"epoch": 0.7,
"eval_exact_match": 44.6,
"eval_f1": 70.72571293515003,
"step": 3600
},
{
"epoch": 0.76,
"eval_exact_match": 46.0,
"eval_f1": 71.9410006201964,
"step": 3900
},
{
"epoch": 0.78,
"learning_rate": 3.0523595943837758e-06,
"loss": 1.6872,
"step": 4000
},
{
"epoch": 0.82,
"eval_exact_match": 44.6,
"eval_f1": 71.3955318328154,
"step": 4200
},
{
"epoch": 0.88,
"learning_rate": 2.80859984399376e-06,
"loss": 1.6612,
"step": 4500
},
{
"epoch": 0.88,
"eval_exact_match": 46.8,
"eval_f1": 71.87883155952227,
"step": 4500
},
{
"epoch": 0.94,
"eval_exact_match": 48.0,
"eval_f1": 72.41927545450808,
"step": 4800
},
{
"epoch": 0.97,
"learning_rate": 2.5653276131045246e-06,
"loss": 1.6439,
"step": 5000
},
{
"epoch": 0.99,
"eval_exact_match": 46.6,
"eval_f1": 73.06916789937951,
"step": 5100
},
{
"epoch": 1.05,
"eval_exact_match": 48.4,
"eval_f1": 73.98548384439948,
"step": 5400
},
{
"epoch": 1.07,
"learning_rate": 2.3215678627145087e-06,
"loss": 1.4243,
"step": 5500
},
{
"epoch": 1.11,
"eval_exact_match": 47.2,
"eval_f1": 72.57079556834765,
"step": 5700
},
{
"epoch": 1.17,
"learning_rate": 2.0778081123244933e-06,
"loss": 1.3584,
"step": 6000
},
{
"epoch": 1.17,
"eval_exact_match": 48.8,
"eval_f1": 73.08780082573215,
"step": 6000
},
{
"epoch": 1.23,
"eval_exact_match": 46.4,
"eval_f1": 71.91085486551921,
"step": 6300
},
{
"epoch": 1.27,
"learning_rate": 1.8340483619344776e-06,
"loss": 1.3542,
"step": 6500
},
{
"epoch": 1.29,
"eval_exact_match": 47.2,
"eval_f1": 72.44435482394755,
"step": 6600
},
{
"epoch": 1.35,
"eval_exact_match": 48.0,
"eval_f1": 72.41674068329031,
"step": 6900
},
{
"epoch": 1.37,
"learning_rate": 1.590288611544462e-06,
"loss": 1.327,
"step": 7000
},
{
"epoch": 1.4,
"eval_exact_match": 47.8,
"eval_f1": 72.90594893687623,
"step": 7200
},
{
"epoch": 1.46,
"learning_rate": 1.3465288611544464e-06,
"loss": 1.3348,
"step": 7500
},
{
"epoch": 1.46,
"eval_exact_match": 48.4,
"eval_f1": 72.90760899561369,
"step": 7500
},
{
"epoch": 1.52,
"eval_exact_match": 50.6,
"eval_f1": 74.34789884230815,
"step": 7800
},
{
"epoch": 1.56,
"learning_rate": 1.1027691107644305e-06,
"loss": 1.3464,
"step": 8000
},
{
"epoch": 1.58,
"eval_exact_match": 48.6,
"eval_f1": 72.45736285926266,
"step": 8100
},
{
"epoch": 1.64,
"eval_exact_match": 49.2,
"eval_f1": 72.37190764197561,
"step": 8400
},
{
"epoch": 1.66,
"learning_rate": 8.59009360374415e-07,
"loss": 1.3301,
"step": 8500
},
{
"epoch": 1.7,
"eval_exact_match": 48.8,
"eval_f1": 73.07938587006336,
"step": 8700
},
{
"epoch": 1.75,
"learning_rate": 6.152496099843995e-07,
"loss": 1.3533,
"step": 9000
},
{
"epoch": 1.75,
"eval_exact_match": 48.2,
"eval_f1": 73.30312499533368,
"step": 9000
},
{
"epoch": 1.81,
"eval_exact_match": 48.6,
"eval_f1": 73.24347351849956,
"step": 9300
},
{
"epoch": 1.85,
"learning_rate": 3.7197737909516383e-07,
"loss": 1.3472,
"step": 9500
},
{
"epoch": 1.87,
"eval_exact_match": 48.4,
"eval_f1": 73.49200531066305,
"step": 9600
},
{
"epoch": 1.93,
"eval_exact_match": 48.8,
"eval_f1": 73.5158314844892,
"step": 9900
},
{
"epoch": 1.95,
"learning_rate": 1.2870514820592826e-07,
"loss": 1.3221,
"step": 10000
},
{
"epoch": 1.99,
"eval_exact_match": 48.8,
"eval_f1": 73.76578660183563,
"step": 10200
},
{
"epoch": 2.0,
"step": 10256,
"total_flos": 6.233158034379648e+16,
"train_loss": 1.573529590124645,
"train_runtime": 24730.4391,
"train_samples_per_second": 6.636,
"train_steps_per_second": 0.415
}
],
"max_steps": 10256,
"num_train_epochs": 2,
"total_flos": 6.233158034379648e+16,
"trial_name": null,
"trial_params": null
}