Cetvel / data.py
Ilker Kesen
initialize the first version
500fbd7
raw
history blame
3.99 kB
from enum import StrEnum, auto
class Tasks(StrEnum):
EXTRACTIVE_QUESTION_ANSWERING = auto()
MULTIPLE_CHOICE = auto()
SUMMARIZATION = auto()
NATURAL_LANGUAGE_INFERENCE = auto()
TEXT_CLASSIFICATION = auto()
MACHINE_TRANSLATION = auto()
GRAMMATICAL_ERROR_CORRECTION = auto()
class Metrics(StrEnum):
F1 = "f1"
EXACT_MATCH = "exact_match"
ROGUE1 = "rouge1"
ROUGE2 = "rouge2"
ROUGEL = "rougeL"
ACCURACY = "acc"
WER = "wer"
BLEU = "bleu"
DATASET_TASK_DICT = {
# extractive qa
'xquad_tr': Tasks.EXTRACTIVE_QUESTION_ANSWERING,
'tquad': Tasks.EXTRACTIVE_QUESTION_ANSWERING,
'mkqa_tr': Tasks.EXTRACTIVE_QUESTION_ANSWERING, # not exactly
# summarization
'xlsum_tr': Tasks.SUMMARIZATION,
'mlsum_tr': Tasks.SUMMARIZATION,
'wiki_lingua_tr': Tasks.SUMMARIZATION,
'tr-wikihow-summ': Tasks.SUMMARIZATION,
# NLI
#'nli_tr': Tasks.NATURAL_LANGUAGE_INFERENCE,
'mnli_tr': Tasks.NATURAL_LANGUAGE_INFERENCE,
'snli_tr': Tasks.NATURAL_LANGUAGE_INFERENCE,
'xnli_tr': Tasks.NATURAL_LANGUAGE_INFERENCE,
# multiple-choice
'xcopa_tr': Tasks.MULTIPLE_CHOICE,
'exams_tr': Tasks.MULTIPLE_CHOICE,
'belebele_tr': Tasks.MULTIPLE_CHOICE,
'turkish_plu': Tasks.MULTIPLE_CHOICE,
'turkish_plu_goal_inference': Tasks.MULTIPLE_CHOICE,
'turkish_plu_next_event_prediction': Tasks.MULTIPLE_CHOICE,
'turkish_plu_step_inference': Tasks.MULTIPLE_CHOICE,
'turkish_plu_step_ordering': Tasks.MULTIPLE_CHOICE,
# fact-checking, not sure whether these are multi-choice
# 'trclaim19': Tasks.MULTIPLE_CHOICE,
'check_worthiness': Tasks.MULTIPLE_CHOICE,
'relevance_judgment': Tasks.MULTIPLE_CHOICE,
# text classification
'sts_tr': Tasks.TEXT_CLASSIFICATION,
'offenseval_tr': Tasks.TEXT_CLASSIFICATION,
'news_cat': Tasks.TEXT_CLASSIFICATION,
'ironytr': Tasks.TEXT_CLASSIFICATION,
# other generation
'wmt-tr-en-prompt': Tasks.MACHINE_TRANSLATION,
'gecturk_generation': Tasks.GRAMMATICAL_ERROR_CORRECTION,
}
TASK_METRIC_DICT = {
Tasks.EXTRACTIVE_QUESTION_ANSWERING: Metrics.EXACT_MATCH,
Tasks.MULTIPLE_CHOICE: Metrics.ACCURACY,
Tasks.TEXT_CLASSIFICATION: Metrics.ACCURACY,
Tasks.NATURAL_LANGUAGE_INFERENCE: Metrics.ACCURACY,
Tasks.SUMMARIZATION: Metrics.ROUGE2,
Tasks.MACHINE_TRANSLATION: Metrics.BLEU,
Tasks.GRAMMATICAL_ERROR_CORRECTION: Metrics.EXACT_MATCH,
}
GENERATIVE_TASKS = (
Tasks.SUMMARIZATION,
Tasks.MACHINE_TRANSLATION,
Tasks.GRAMMATICAL_ERROR_CORRECTION,
)
DATASET_GROUPS = {
'QA': {
'datasets': ['xquad_tr', 'tquad', 'mkqa_tr'],
'description': 'Turkish splits of SQuAD-like datasets XQuAD and TQUAD.',
},
'MCQA': {
'datasets': ['xcopa_tr', 'exams_tr', 'belebele_tr'] + [x for x in DATASET_TASK_DICT.keys() if x.startswith('turkish_plu')],
'description': 'Multiple Choice Question Answering datasets: XCOPA, Exams, Belebele and Turkish PLU.'
},
'TC': {
'datasets': ['sts_tr', 'offenseval_tr', 'news_cat', 'ironytr', ],
'description': 'Text Classification datasets.',
},
'NLI': {
'datasets': ['mnli_tr', 'snli_tr', 'xnli_tr'],
'description': 'Natural Language Inference (NLI) datasets in Turkish: XNLI, SNLI and MNLI.',
},
'SUM': {
'datasets': [name for name, task in DATASET_TASK_DICT.items() if task == Tasks.SUMMARIZATION],
'description': 'Summarization datasets in Turkish (XLSum, MLSum, WikiLingua and TrWikiHowSumm).',
},
'GEC': {
'datasets': ['gecturk_generation',],
'description': 'Grammatical Error Correction task.',
},
'MT': {
'datasets': ['wmt-tr-en-prompt'],
'description': 'Machine Translation on WMT-16 dataset (English-to-Turkish).',
},
# 'TrClaim19': {
# 'datasets': ['check_worthiness', 'relevance_judgment'],
# 'description': 'TrClaim19 dataset for fact-checking.',
# },
}