DeBERTaV3-small-GeneralSentenceTransformer-keepTraining-checkpoints-tmp
/
last-checkpoint
/trainer_state.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 2.0, | |
"eval_steps": 5170, | |
"global_step": 68928, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.15001160631383473, | |
"grad_norm": 6.272289752960205, | |
"learning_rate": 7.568874560375147e-07, | |
"loss": 0.2058, | |
"step": 5170 | |
}, | |
{ | |
"epoch": 0.15001160631383473, | |
"eval_nli-pairs_loss": 0.14333350956439972, | |
"eval_nli-pairs_runtime": 15.7419, | |
"eval_nli-pairs_samples_per_second": 432.477, | |
"eval_nli-pairs_steps_per_second": 27.062, | |
"step": 5170 | |
}, | |
{ | |
"epoch": 0.15001160631383473, | |
"eval_qnli-contrastive_loss": 0.0066042630933225155, | |
"eval_qnli-contrastive_runtime": 16.0666, | |
"eval_qnli-contrastive_samples_per_second": 340.022, | |
"eval_qnli-contrastive_steps_per_second": 21.286, | |
"step": 5170 | |
}, | |
{ | |
"epoch": 0.30002321262766946, | |
"grad_norm": 0.1510516107082367, | |
"learning_rate": 1.5143610785463074e-06, | |
"loss": 0.1978, | |
"step": 10340 | |
}, | |
{ | |
"epoch": 0.30002321262766946, | |
"eval_nli-pairs_loss": 0.14484703540802002, | |
"eval_nli-pairs_runtime": 15.5078, | |
"eval_nli-pairs_samples_per_second": 439.004, | |
"eval_nli-pairs_steps_per_second": 27.47, | |
"step": 10340 | |
}, | |
{ | |
"epoch": 0.30002321262766946, | |
"eval_qnli-contrastive_loss": 0.005310256965458393, | |
"eval_qnli-contrastive_runtime": 15.9103, | |
"eval_qnli-contrastive_samples_per_second": 343.363, | |
"eval_qnli-contrastive_steps_per_second": 21.496, | |
"step": 10340 | |
}, | |
{ | |
"epoch": 0.45003481894150416, | |
"grad_norm": 0.12565171718597412, | |
"learning_rate": 2.271541617819461e-06, | |
"loss": 0.2122, | |
"step": 15510 | |
}, | |
{ | |
"epoch": 0.45003481894150416, | |
"eval_nli-pairs_loss": 0.14426207542419434, | |
"eval_nli-pairs_runtime": 15.502, | |
"eval_nli-pairs_samples_per_second": 439.169, | |
"eval_nli-pairs_steps_per_second": 27.48, | |
"step": 15510 | |
}, | |
{ | |
"epoch": 0.45003481894150416, | |
"eval_qnli-contrastive_loss": 0.006289388053119183, | |
"eval_qnli-contrastive_runtime": 15.9331, | |
"eval_qnli-contrastive_samples_per_second": 342.872, | |
"eval_qnli-contrastive_steps_per_second": 21.465, | |
"step": 15510 | |
}, | |
{ | |
"epoch": 0.6000464252553389, | |
"grad_norm": 2.5047528743743896, | |
"learning_rate": 3.0288686987104337e-06, | |
"loss": 0.1918, | |
"step": 20680 | |
}, | |
{ | |
"epoch": 0.6000464252553389, | |
"eval_nli-pairs_loss": 0.14941762387752533, | |
"eval_nli-pairs_runtime": 15.559, | |
"eval_nli-pairs_samples_per_second": 437.561, | |
"eval_nli-pairs_steps_per_second": 27.38, | |
"step": 20680 | |
}, | |
{ | |
"epoch": 0.6000464252553389, | |
"eval_qnli-contrastive_loss": 0.005304055288434029, | |
"eval_qnli-contrastive_runtime": 15.9267, | |
"eval_qnli-contrastive_samples_per_second": 343.01, | |
"eval_qnli-contrastive_steps_per_second": 21.473, | |
"step": 20680 | |
}, | |
{ | |
"epoch": 0.7500580315691736, | |
"grad_norm": 0.0, | |
"learning_rate": 3.7861957796014073e-06, | |
"loss": 0.2103, | |
"step": 25850 | |
}, | |
{ | |
"epoch": 0.7500580315691736, | |
"eval_nli-pairs_loss": 0.14879679679870605, | |
"eval_nli-pairs_runtime": 15.7056, | |
"eval_nli-pairs_samples_per_second": 433.477, | |
"eval_nli-pairs_steps_per_second": 27.124, | |
"step": 25850 | |
}, | |
{ | |
"epoch": 0.7500580315691736, | |
"eval_qnli-contrastive_loss": 0.008172737434506416, | |
"eval_qnli-contrastive_runtime": 16.1249, | |
"eval_qnli-contrastive_samples_per_second": 338.792, | |
"eval_qnli-contrastive_steps_per_second": 21.209, | |
"step": 25850 | |
}, | |
{ | |
"epoch": 0.9000696378830083, | |
"grad_norm": 0.4431862533092499, | |
"learning_rate": 4.543376318874561e-06, | |
"loss": 0.2056, | |
"step": 31020 | |
}, | |
{ | |
"epoch": 0.9000696378830083, | |
"eval_nli-pairs_loss": 0.15133754909038544, | |
"eval_nli-pairs_runtime": 15.6187, | |
"eval_nli-pairs_samples_per_second": 435.887, | |
"eval_nli-pairs_steps_per_second": 27.275, | |
"step": 31020 | |
}, | |
{ | |
"epoch": 0.9000696378830083, | |
"eval_qnli-contrastive_loss": 0.003923382144421339, | |
"eval_qnli-contrastive_runtime": 15.9272, | |
"eval_qnli-contrastive_samples_per_second": 342.998, | |
"eval_qnli-contrastive_steps_per_second": 21.473, | |
"step": 31020 | |
}, | |
{ | |
"epoch": 1.050081244196843, | |
"grad_norm": 3.5489258766174316, | |
"learning_rate": 4.989171745750203e-06, | |
"loss": 0.2067, | |
"step": 36190 | |
}, | |
{ | |
"epoch": 1.050081244196843, | |
"eval_nli-pairs_loss": 0.15012863278388977, | |
"eval_nli-pairs_runtime": 15.698, | |
"eval_nli-pairs_samples_per_second": 433.687, | |
"eval_nli-pairs_steps_per_second": 27.137, | |
"step": 36190 | |
}, | |
{ | |
"epoch": 1.050081244196843, | |
"eval_qnli-contrastive_loss": 0.007893337868154049, | |
"eval_qnli-contrastive_runtime": 16.0422, | |
"eval_qnli-contrastive_samples_per_second": 340.539, | |
"eval_qnli-contrastive_steps_per_second": 21.319, | |
"step": 36190 | |
}, | |
{ | |
"epoch": 1.2000928505106778, | |
"grad_norm": 1.2927824258804321, | |
"learning_rate": 4.867136962447637e-06, | |
"loss": 0.1987, | |
"step": 41360 | |
}, | |
{ | |
"epoch": 1.2000928505106778, | |
"eval_nli-pairs_loss": 0.14854447543621063, | |
"eval_nli-pairs_runtime": 15.3871, | |
"eval_nli-pairs_samples_per_second": 442.449, | |
"eval_nli-pairs_steps_per_second": 27.686, | |
"step": 41360 | |
}, | |
{ | |
"epoch": 1.2000928505106778, | |
"eval_qnli-contrastive_loss": 0.008381461724638939, | |
"eval_qnli-contrastive_runtime": 15.7761, | |
"eval_qnli-contrastive_samples_per_second": 346.283, | |
"eval_qnli-contrastive_steps_per_second": 21.678, | |
"step": 41360 | |
}, | |
{ | |
"epoch": 1.3501044568245124, | |
"grad_norm": 12.146841049194336, | |
"learning_rate": 4.615724011281596e-06, | |
"loss": 0.1987, | |
"step": 46530 | |
}, | |
{ | |
"epoch": 1.3501044568245124, | |
"eval_nli-pairs_loss": 0.15174470841884613, | |
"eval_nli-pairs_runtime": 15.5759, | |
"eval_nli-pairs_samples_per_second": 437.085, | |
"eval_nli-pairs_steps_per_second": 27.35, | |
"step": 46530 | |
}, | |
{ | |
"epoch": 1.3501044568245124, | |
"eval_qnli-contrastive_loss": 0.00563395069912076, | |
"eval_qnli-contrastive_runtime": 15.9485, | |
"eval_qnli-contrastive_samples_per_second": 342.54, | |
"eval_qnli-contrastive_steps_per_second": 21.444, | |
"step": 46530 | |
}, | |
{ | |
"epoch": 1.5001160631383472, | |
"grad_norm": 0.12127237021923065, | |
"learning_rate": 4.248583657440329e-06, | |
"loss": 0.205, | |
"step": 51700 | |
}, | |
{ | |
"epoch": 1.5001160631383472, | |
"eval_nli-pairs_loss": 0.1490125209093094, | |
"eval_nli-pairs_runtime": 15.4029, | |
"eval_nli-pairs_samples_per_second": 441.994, | |
"eval_nli-pairs_steps_per_second": 27.657, | |
"step": 51700 | |
}, | |
{ | |
"epoch": 1.5001160631383472, | |
"eval_qnli-contrastive_loss": 0.0062314593233168125, | |
"eval_qnli-contrastive_runtime": 15.801, | |
"eval_qnli-contrastive_samples_per_second": 345.738, | |
"eval_qnli-contrastive_steps_per_second": 21.644, | |
"step": 51700 | |
}, | |
{ | |
"epoch": 1.650127669452182, | |
"grad_norm": 9.487723350524902, | |
"learning_rate": 3.7857309145658974e-06, | |
"loss": 0.183, | |
"step": 56870 | |
}, | |
{ | |
"epoch": 1.650127669452182, | |
"eval_nli-pairs_loss": 0.14577454328536987, | |
"eval_nli-pairs_runtime": 15.5234, | |
"eval_nli-pairs_samples_per_second": 438.563, | |
"eval_nli-pairs_steps_per_second": 27.442, | |
"step": 56870 | |
}, | |
{ | |
"epoch": 1.650127669452182, | |
"eval_qnli-contrastive_loss": 0.006071700248867273, | |
"eval_qnli-contrastive_runtime": 15.7649, | |
"eval_qnli-contrastive_samples_per_second": 346.529, | |
"eval_qnli-contrastive_steps_per_second": 21.694, | |
"step": 56870 | |
}, | |
{ | |
"epoch": 1.8001392757660168, | |
"grad_norm": 3.4426660537719727, | |
"learning_rate": 3.252653526527395e-06, | |
"loss": 0.1763, | |
"step": 62040 | |
}, | |
{ | |
"epoch": 1.8001392757660168, | |
"eval_nli-pairs_loss": 0.1418175995349884, | |
"eval_nli-pairs_runtime": 15.4065, | |
"eval_nli-pairs_samples_per_second": 441.89, | |
"eval_nli-pairs_steps_per_second": 27.651, | |
"step": 62040 | |
}, | |
{ | |
"epoch": 1.8001392757660168, | |
"eval_qnli-contrastive_loss": 0.007999507710337639, | |
"eval_qnli-contrastive_runtime": 15.8195, | |
"eval_qnli-contrastive_samples_per_second": 345.334, | |
"eval_qnli-contrastive_steps_per_second": 21.619, | |
"step": 62040 | |
}, | |
{ | |
"epoch": 1.9501508820798514, | |
"grad_norm": 0.897132158279419, | |
"learning_rate": 2.678533013873007e-06, | |
"loss": 0.1982, | |
"step": 67210 | |
}, | |
{ | |
"epoch": 1.9501508820798514, | |
"eval_nli-pairs_loss": 0.14005425572395325, | |
"eval_nli-pairs_runtime": 15.4671, | |
"eval_nli-pairs_samples_per_second": 440.16, | |
"eval_nli-pairs_steps_per_second": 27.542, | |
"step": 67210 | |
}, | |
{ | |
"epoch": 1.9501508820798514, | |
"eval_qnli-contrastive_loss": 0.006610157899558544, | |
"eval_qnli-contrastive_runtime": 15.8954, | |
"eval_qnli-contrastive_samples_per_second": 343.684, | |
"eval_qnli-contrastive_steps_per_second": 21.516, | |
"step": 67210 | |
} | |
], | |
"logging_steps": 5170, | |
"max_steps": 103392, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 3, | |
"save_steps": 500, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": false | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 0.0, | |
"train_batch_size": 20, | |
"trial_name": null, | |
"trial_params": null | |
} | |