|
{ |
|
"best_metric": 0.7438596491228071, |
|
"best_model_checkpoint": "final_models/glue_models/structroberta_s1_final//finetune/qnli/checkpoint-2000", |
|
"epoch": 10.0, |
|
"global_step": 3660, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.09, |
|
"eval_accuracy": 0.6854768395423889, |
|
"eval_f1": 0.698279479647503, |
|
"eval_loss": 0.5918503403663635, |
|
"eval_mcc": 0.37039988909838023, |
|
"eval_runtime": 4.521, |
|
"eval_samples_per_second": 505.638, |
|
"eval_steps_per_second": 63.26, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.316939890710383e-05, |
|
"loss": 0.6348, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_accuracy": 0.6942257285118103, |
|
"eval_f1": 0.7279096924873493, |
|
"eval_loss": 0.6217535138130188, |
|
"eval_mcc": 0.3841755069564845, |
|
"eval_runtime": 4.5007, |
|
"eval_samples_per_second": 507.921, |
|
"eval_steps_per_second": 63.546, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 3.633879781420765e-05, |
|
"loss": 0.5113, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_accuracy": 0.6999124884605408, |
|
"eval_f1": 0.7262569832402235, |
|
"eval_loss": 0.6595348715782166, |
|
"eval_mcc": 0.39577981158604925, |
|
"eval_runtime": 4.4963, |
|
"eval_samples_per_second": 508.422, |
|
"eval_steps_per_second": 63.608, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 2.9508196721311478e-05, |
|
"loss": 0.3571, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"eval_accuracy": 0.7104111909866333, |
|
"eval_f1": 0.7324171382376718, |
|
"eval_loss": 0.8105891942977905, |
|
"eval_mcc": 0.4174093343048448, |
|
"eval_runtime": 4.5019, |
|
"eval_samples_per_second": 507.789, |
|
"eval_steps_per_second": 63.529, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 2.2677595628415303e-05, |
|
"loss": 0.2085, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"eval_accuracy": 0.712598443031311, |
|
"eval_f1": 0.7438596491228071, |
|
"eval_loss": 0.8948610424995422, |
|
"eval_mcc": 0.42158343510176083, |
|
"eval_runtime": 4.5038, |
|
"eval_samples_per_second": 507.569, |
|
"eval_steps_per_second": 63.502, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"eval_accuracy": 0.7213473320007324, |
|
"eval_f1": 0.7357942762339277, |
|
"eval_loss": 1.1155877113342285, |
|
"eval_mcc": 0.44112337210044195, |
|
"eval_runtime": 4.4995, |
|
"eval_samples_per_second": 508.053, |
|
"eval_steps_per_second": 63.562, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 1.5846994535519128e-05, |
|
"loss": 0.1211, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"eval_accuracy": 0.7152230739593506, |
|
"eval_f1": 0.740534077321642, |
|
"eval_loss": 1.3353219032287598, |
|
"eval_mcc": 0.42670434808360297, |
|
"eval_runtime": 4.5055, |
|
"eval_samples_per_second": 507.381, |
|
"eval_steps_per_second": 63.478, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 9.016393442622952e-06, |
|
"loss": 0.0599, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"eval_accuracy": 0.7121610045433044, |
|
"eval_f1": 0.7269709543568464, |
|
"eval_loss": 1.4796608686447144, |
|
"eval_mcc": 0.42273254192250986, |
|
"eval_runtime": 4.5005, |
|
"eval_samples_per_second": 507.939, |
|
"eval_steps_per_second": 63.548, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"learning_rate": 2.185792349726776e-06, |
|
"loss": 0.0317, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"eval_accuracy": 0.7108486294746399, |
|
"eval_f1": 0.7300939158840344, |
|
"eval_loss": 1.662826418876648, |
|
"eval_mcc": 0.4188408674974473, |
|
"eval_runtime": 4.4953, |
|
"eval_samples_per_second": 508.53, |
|
"eval_steps_per_second": 63.622, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 3660, |
|
"total_flos": 3.664834789427712e+16, |
|
"train_loss": 0.2638393348032008, |
|
"train_runtime": 2094.1582, |
|
"train_samples_per_second": 209.712, |
|
"train_steps_per_second": 1.748 |
|
} |
|
], |
|
"max_steps": 3660, |
|
"num_train_epochs": 10, |
|
"total_flos": 3.664834789427712e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|