|
--- |
|
language: |
|
- en |
|
tags: |
|
- QA |
|
license: cc-by-4.0 |
|
datasets: |
|
- BoolQ |
|
- CommonSenseQA |
|
- DROP |
|
- DuoRC |
|
- HellaSWAG |
|
- HotpotQA |
|
- HybridQA |
|
- NarrativeQA |
|
- NaturalQuestionsShort |
|
- NewsQA |
|
- QAMR |
|
- RACE |
|
- SearchQA |
|
- SIQA |
|
- SQuAD |
|
- TriviaQA-web |
|
metrics: |
|
- Accuracy |
|
- Precision |
|
- Recall |
|
- F1 |
|
- MRR |
|
- R@3 |
|
- R@5 |
|
|
|
|
|
--- |
|
|
|
BERT for Sequence Classification trained on QA Dataset prediction task. |
|
- Input: question. |
|
- Output: dataset from where that question comes from. |
|
Original paper: TWEAC: Transformer with Extendable QA Agent Classifiers |
|
https://arxiv.org/abs/2104.07081 |
|
|
|
Datasets used for training: |
|
``` |
|
list_datasets = ['BoolQ','CommonSenseQA','DROP','DuoRC','HellaSWAG','HotpotQA','HybridQA','NarrativeQA','NaturalQuestionsShort','NewsQA','QAMR','RACE','SearchQA','SIQA','SQuAD','TriviaQA-web'] |
|
``` |
|
|
|
Results for all datasets: |
|
- Accuracy: 0.7919096825783123 |
|
- Precision: 0.731586272892176 |
|
- Recall: 0.7919096825783123 |
|
- F1: 0.7494425609552463 |
|
- MRR: 0.8720871733637521 |
|
- R@3: 0.9438690810655046 |
|
- R@5: 0.9745318608004427 |
|
- Queries/second: 6052.33538824659 |
|
|
|
|
|
Results per dataset: |
|
``` |
|
"BoolQ": { |
|
"accuracy": 0.998776758409786, |
|
"mrr": 0.999388379204893, |
|
"r@3": 1.0, |
|
"r@5": 1.0, |
|
"query_per_second": 6978.947907596168, |
|
"precision": 0.8649364406779662, |
|
"recall": 0.998776758409786, |
|
"f1": 0.9270508089696281 |
|
}, |
|
"CommonSenseQA": { |
|
"accuracy": 0.9247135842880524, |
|
"mrr": 0.9476358338878795, |
|
"r@3": 0.9705400981996727, |
|
"r@5": 0.9705400981996727, |
|
"query_per_second": 5823.984138936813, |
|
"precision": 0.442443226311668, |
|
"recall": 0.9247135842880524, |
|
"f1": 0.5985169491525425 |
|
}, |
|
"DROP": { |
|
"accuracy": 0.9075083892617449, |
|
"mrr": 0.9378200367399193, |
|
"r@3": 0.9609899328859061, |
|
"r@5": 0.9786073825503355, |
|
"query_per_second": 6440.988897129248, |
|
"precision": 0.8636726546906187, |
|
"recall": 0.9075083892617449, |
|
"f1": 0.8850480670893842 |
|
}, |
|
"DuoRC": { |
|
"accuracy": 0.5555803405457654, |
|
"mrr": 0.7368963429107307, |
|
"r@3": 0.9092125808610305, |
|
"r@5": 0.9596996059186557, |
|
"query_per_second": 6853.643198794893, |
|
"precision": 0.646814404432133, |
|
"recall": 0.5555803405457654, |
|
"f1": 0.5977360905563778 |
|
}, |
|
"HellaSWAG": { |
|
"accuracy": 0.998406691894045, |
|
"mrr": 0.9990705702715262, |
|
"r@3": 1.0, |
|
"r@5": 1.0, |
|
"query_per_second": 3091.5012960785157, |
|
"precision": 0.9974134500596896, |
|
"recall": 0.998406691894045, |
|
"f1": 0.9979098238280083 |
|
}, |
|
"HotpotQA": { |
|
"accuracy": 0.7414435784479837, |
|
"mrr": 0.8435804344945315, |
|
"r@3": 0.9325652321247034, |
|
"r@5": 0.973568281938326, |
|
"query_per_second": 4972.668019223381, |
|
"precision": 0.7352150537634409, |
|
"recall": 0.7414435784479837, |
|
"f1": 0.7383161801923401 |
|
}, |
|
"HybridQA": { |
|
"accuracy": 0.7934218118869013, |
|
"mrr": 0.8806947764680021, |
|
"r@3": 0.964800923254472, |
|
"r@5": 0.9930755914598961, |
|
"query_per_second": 4886.494046259562, |
|
"precision": 0.7198952879581152, |
|
"recall": 0.7934218118869013, |
|
"f1": 0.7548723579467472 |
|
}, |
|
"NarrativeQA": { |
|
"accuracy": 0.5623756749076442, |
|
"mrr": 0.7416681781060867, |
|
"r@3": 0.9011082693947144, |
|
"r@5": 0.9580373212086767, |
|
"query_per_second": 7081.067049796865, |
|
"precision": 0.5623224095472628, |
|
"recall": 0.5623756749076442, |
|
"f1": 0.5623490409661377 |
|
}, |
|
"NaturalQuestionsShort": { |
|
"accuracy": 0.7985353692739171, |
|
"mrr": 0.8743599435345307, |
|
"r@3": 0.9439077594266126, |
|
"r@5": 0.9774072919912745, |
|
"query_per_second": 7136.590426649795, |
|
"precision": 0.7963020509633313, |
|
"recall": 0.7985353692739171, |
|
"f1": 0.7974171464135678 |
|
}, |
|
"NewsQA": { |
|
"accuracy": 0.5375118708452041, |
|
"mrr": 0.71192075967717, |
|
"r@3": 0.855650522317189, |
|
"r@5": 0.939696106362773, |
|
"query_per_second": 7193.851409052092, |
|
"precision": 0.18757249378624688, |
|
"recall": 0.5375118708452041, |
|
"f1": 0.2780985136961061 |
|
}, |
|
"QAMR": { |
|
"accuracy": 0.6658497602557272, |
|
"mrr": 0.7969741223377345, |
|
"r@3": 0.9207778369738945, |
|
"r@5": 0.973361747469366, |
|
"query_per_second": 7321.775044800525, |
|
"precision": 0.8654525309881587, |
|
"recall": 0.6658497602557272, |
|
"f1": 0.7526421968624852 |
|
}, |
|
"RACE": { |
|
"accuracy": 0.8771538617474154, |
|
"mrr": 0.917901778042666, |
|
"r@3": 0.9489154672613015, |
|
"r@5": 0.9693898236367322, |
|
"query_per_second": 6952.225120744351, |
|
"precision": 0.8767983789260385, |
|
"recall": 0.8771538617474154, |
|
"f1": 0.8769760843129306 |
|
}, |
|
"SearchQA": { |
|
"accuracy": 0.9762073027090695, |
|
"mrr": 0.9865069592101393, |
|
"r@3": 0.9972909305064782, |
|
"r@5": 0.9984687868080094, |
|
"query_per_second": 4031.0193826035634, |
|
"precision": 0.9870191735143503, |
|
"recall": 0.9762073027090695, |
|
"f1": 0.9815834665719192 |
|
}, |
|
"SIQA": { |
|
"accuracy": 0.9969293756397134, |
|
"mrr": 0.9977823268509042, |
|
"r@3": 0.9979529170931423, |
|
"r@5": 1.0, |
|
"query_per_second": 6711.547709005977, |
|
"precision": 0.9329501915708812, |
|
"recall": 0.9969293756397134, |
|
"f1": 0.9638792676892627 |
|
}, |
|
"SQuAD": { |
|
"accuracy": 0.550628092881614, |
|
"mrr": 0.7164538452390565, |
|
"r@3": 0.8660068519223448, |
|
"r@5": 0.9366197183098591, |
|
"query_per_second": 7033.420124363291, |
|
"precision": 0.48613678373382624, |
|
"recall": 0.550628092881614, |
|
"f1": 0.5163766175814368 |
|
}, |
|
"TriviaQA-web": { |
|
"accuracy": 0.7855124582584125, |
|
"mrr": 0.8647404868442627, |
|
"r@3": 0.9321859748266119, |
|
"r@5": 0.9640380169535063, |
|
"query_per_second": 4327.642440910395, |
|
"precision": 0.7404358353510896, |
|
"recall": 0.7855124582584125, |
|
"f1": 0.7623083634550667 |
|
}, |
|
``` |