Spaces:
Running
Running
from enum import StrEnum, auto | |
class Tasks(StrEnum): | |
EXTRACTIVE_QUESTION_ANSWERING = auto() | |
MULTIPLE_CHOICE = auto() | |
SUMMARIZATION = auto() | |
NATURAL_LANGUAGE_INFERENCE = auto() | |
TEXT_CLASSIFICATION = auto() | |
MACHINE_TRANSLATION = auto() | |
GRAMMATICAL_ERROR_CORRECTION = auto() | |
class Metrics(StrEnum): | |
F1 = "f1" | |
EXACT_MATCH = "exact_match" | |
ROGUE1 = "rouge1" | |
ROUGE2 = "rouge2" | |
ROUGEL = "rougeL" | |
ACCURACY = "acc" | |
WER = "wer" | |
BLEU = "bleu" | |
DATASET_TASK_DICT = { | |
# extractive qa | |
'xquad_tr': Tasks.EXTRACTIVE_QUESTION_ANSWERING, | |
'tquad': Tasks.EXTRACTIVE_QUESTION_ANSWERING, | |
'mkqa_tr': Tasks.EXTRACTIVE_QUESTION_ANSWERING, # not exactly | |
# summarization | |
'xlsum_tr': Tasks.SUMMARIZATION, | |
'mlsum_tr': Tasks.SUMMARIZATION, | |
'wiki_lingua_tr': Tasks.SUMMARIZATION, | |
'tr-wikihow-summ': Tasks.SUMMARIZATION, | |
# NLI | |
#'nli_tr': Tasks.NATURAL_LANGUAGE_INFERENCE, | |
'mnli_tr': Tasks.NATURAL_LANGUAGE_INFERENCE, | |
'snli_tr': Tasks.NATURAL_LANGUAGE_INFERENCE, | |
'xnli_tr': Tasks.NATURAL_LANGUAGE_INFERENCE, | |
# multiple-choice | |
'xcopa_tr': Tasks.MULTIPLE_CHOICE, | |
'exams_tr': Tasks.MULTIPLE_CHOICE, | |
'belebele_tr': Tasks.MULTIPLE_CHOICE, | |
'turkish_plu': Tasks.MULTIPLE_CHOICE, | |
'turkish_plu_goal_inference': Tasks.MULTIPLE_CHOICE, | |
'turkish_plu_next_event_prediction': Tasks.MULTIPLE_CHOICE, | |
'turkish_plu_step_inference': Tasks.MULTIPLE_CHOICE, | |
'turkish_plu_step_ordering': Tasks.MULTIPLE_CHOICE, | |
# fact-checking, not sure whether these are multi-choice | |
# 'trclaim19': Tasks.MULTIPLE_CHOICE, | |
'check_worthiness': Tasks.MULTIPLE_CHOICE, | |
'relevance_judgment': Tasks.MULTIPLE_CHOICE, | |
# text classification | |
'sts_tr': Tasks.TEXT_CLASSIFICATION, | |
'offenseval_tr': Tasks.TEXT_CLASSIFICATION, | |
'news_cat': Tasks.TEXT_CLASSIFICATION, | |
'ironytr': Tasks.TEXT_CLASSIFICATION, | |
# other generation | |
'wmt-tr-en-prompt': Tasks.MACHINE_TRANSLATION, | |
'gecturk_generation': Tasks.GRAMMATICAL_ERROR_CORRECTION, | |
} | |
TASK_METRIC_DICT = { | |
Tasks.EXTRACTIVE_QUESTION_ANSWERING: Metrics.EXACT_MATCH, | |
Tasks.MULTIPLE_CHOICE: Metrics.ACCURACY, | |
Tasks.TEXT_CLASSIFICATION: Metrics.ACCURACY, | |
Tasks.NATURAL_LANGUAGE_INFERENCE: Metrics.ACCURACY, | |
Tasks.SUMMARIZATION: Metrics.ROUGE2, | |
Tasks.MACHINE_TRANSLATION: Metrics.BLEU, | |
Tasks.GRAMMATICAL_ERROR_CORRECTION: Metrics.EXACT_MATCH, | |
} | |
GENERATIVE_TASKS = ( | |
Tasks.SUMMARIZATION, | |
Tasks.MACHINE_TRANSLATION, | |
Tasks.GRAMMATICAL_ERROR_CORRECTION, | |
) | |
DATASET_GROUPS = { | |
'QA': { | |
'datasets': ['xquad_tr', 'tquad', 'mkqa_tr'], | |
'description': 'Turkish splits of SQuAD-like datasets XQuAD and TQUAD.', | |
}, | |
'MCQA': { | |
'datasets': ['xcopa_tr', 'exams_tr', 'belebele_tr'] + [x for x in DATASET_TASK_DICT.keys() if x.startswith('turkish_plu')], | |
'description': 'Multiple Choice Question Answering datasets: XCOPA, Exams, Belebele and Turkish PLU.' | |
}, | |
'TC': { | |
'datasets': ['sts_tr', 'offenseval_tr', 'news_cat', 'ironytr', ], | |
'description': 'Text Classification datasets.', | |
}, | |
'NLI': { | |
'datasets': ['mnli_tr', 'snli_tr', 'xnli_tr'], | |
'description': 'Natural Language Inference (NLI) datasets in Turkish: XNLI, SNLI and MNLI.', | |
}, | |
'SUM': { | |
'datasets': [name for name, task in DATASET_TASK_DICT.items() if task == Tasks.SUMMARIZATION], | |
'description': 'Summarization datasets in Turkish (XLSum, MLSum, WikiLingua and TrWikiHowSumm).', | |
}, | |
'GEC': { | |
'datasets': ['gecturk_generation',], | |
'description': 'Grammatical Error Correction task.', | |
}, | |
'MT': { | |
'datasets': ['wmt-tr-en-prompt'], | |
'description': 'Machine Translation on WMT-16 dataset (English-to-Turkish).', | |
}, | |
# 'TrClaim19': { | |
# 'datasets': ['check_worthiness', 'relevance_judgment'], | |
# 'description': 'TrClaim19 dataset for fact-checking.', | |
# }, | |
} | |