|
{ |
|
"model_name": "microsoft/deberta-v3-large", |
|
"lr": 6e-06, |
|
"epochs": 1, |
|
"weight_decay": 0.01, |
|
"train_bs": 3, |
|
"eval_bs": 8, |
|
"gradient_accumulation_steps": 8, |
|
"gradient_checkpointing": false, |
|
"eval_datasets": [ |
|
{"path": "./data/all_eval/osu_eval.csv", "name": "osu"}, |
|
{"path": "./data/all_eval/healthver_eval.csv", "name": "healthver"}, |
|
{"path": "./data/all_eval/deepset_1_eval.csv", "name": "deepset_1"}, |
|
{"path": "./data/all_eval/deepset_2_eval.csv", "name": "deepset_2"}, |
|
{"path": "./data/all_eval/deepset_3_eval.csv", "name": "deepset_3"}, |
|
{"path": "./data/all_eval/deepset_4_eval.csv", "name": "deepset_4"} |
|
], |
|
"train_dataset_path": "./data/all_training/all_train_v2_pseudo.csv", |
|
"ensemble_model_predictions": [ |
|
"mathislucka/deberta-large-hallucination-eval-v2", |
|
"mathislucka/deberta-base-hallucination-eval-v2", |
|
"models/albert-xxlarge-v2-optim-data-v1", |
|
"models/deberta-base-v3-no-atomic-wfc-nq", |
|
"models/deberta-v3-large-data-optim-v2" |
|
], |
|
"half_precision": true |
|
} |