from enum import StrEnum, auto class Tasks(StrEnum): EXTRACTIVE_QUESTION_ANSWERING = auto() MULTIPLE_CHOICE = auto() SUMMARIZATION = auto() NATURAL_LANGUAGE_INFERENCE = auto() TEXT_CLASSIFICATION = auto() MACHINE_TRANSLATION = auto() GRAMMATICAL_ERROR_CORRECTION = auto() class Metrics(StrEnum): F1 = "f1" EXACT_MATCH = "exact_match" ROGUE1 = "rouge1" ROUGE2 = "rouge2" ROUGEL = "rougeL" ACCURACY = "acc" WER = "wer" BLEU = "bleu" DATASET_TASK_DICT = { # extractive qa 'xquad_tr': Tasks.EXTRACTIVE_QUESTION_ANSWERING, 'tquad': Tasks.EXTRACTIVE_QUESTION_ANSWERING, 'mkqa_tr': Tasks.EXTRACTIVE_QUESTION_ANSWERING, # not exactly # summarization 'xlsum_tr': Tasks.SUMMARIZATION, 'mlsum_tr': Tasks.SUMMARIZATION, 'wiki_lingua_tr': Tasks.SUMMARIZATION, 'tr-wikihow-summ': Tasks.SUMMARIZATION, # NLI #'nli_tr': Tasks.NATURAL_LANGUAGE_INFERENCE, 'mnli_tr': Tasks.NATURAL_LANGUAGE_INFERENCE, 'snli_tr': Tasks.NATURAL_LANGUAGE_INFERENCE, 'xnli_tr': Tasks.NATURAL_LANGUAGE_INFERENCE, # multiple-choice 'xcopa_tr': Tasks.MULTIPLE_CHOICE, 'exams_tr': Tasks.MULTIPLE_CHOICE, 'belebele_tr': Tasks.MULTIPLE_CHOICE, 'turkish_plu': Tasks.MULTIPLE_CHOICE, 'turkish_plu_goal_inference': Tasks.MULTIPLE_CHOICE, 'turkish_plu_next_event_prediction': Tasks.MULTIPLE_CHOICE, 'turkish_plu_step_inference': Tasks.MULTIPLE_CHOICE, 'turkish_plu_step_ordering': Tasks.MULTIPLE_CHOICE, # fact-checking, not sure whether these are multi-choice # 'trclaim19': Tasks.MULTIPLE_CHOICE, 'check_worthiness': Tasks.MULTIPLE_CHOICE, 'relevance_judgment': Tasks.MULTIPLE_CHOICE, # text classification 'sts_tr': Tasks.TEXT_CLASSIFICATION, 'offenseval_tr': Tasks.TEXT_CLASSIFICATION, 'news_cat': Tasks.TEXT_CLASSIFICATION, 'ironytr': Tasks.TEXT_CLASSIFICATION, # other generation 'wmt-tr-en-prompt': Tasks.MACHINE_TRANSLATION, 'gecturk_generation': Tasks.GRAMMATICAL_ERROR_CORRECTION, } TASK_METRIC_DICT = { Tasks.EXTRACTIVE_QUESTION_ANSWERING: Metrics.EXACT_MATCH, Tasks.MULTIPLE_CHOICE: Metrics.ACCURACY, Tasks.TEXT_CLASSIFICATION: Metrics.ACCURACY, Tasks.NATURAL_LANGUAGE_INFERENCE: Metrics.ACCURACY, Tasks.SUMMARIZATION: Metrics.ROUGE2, Tasks.MACHINE_TRANSLATION: Metrics.BLEU, Tasks.GRAMMATICAL_ERROR_CORRECTION: Metrics.EXACT_MATCH, } GENERATIVE_TASKS = ( Tasks.SUMMARIZATION, Tasks.MACHINE_TRANSLATION, Tasks.GRAMMATICAL_ERROR_CORRECTION, ) DATASET_GROUPS = { 'QA': { 'datasets': ['xquad_tr', 'tquad', 'mkqa_tr'], 'description': 'Turkish splits of SQuAD-like datasets XQuAD and TQUAD.', }, 'MCQA': { 'datasets': ['xcopa_tr', 'exams_tr', 'belebele_tr'] + [x for x in DATASET_TASK_DICT.keys() if x.startswith('turkish_plu')], 'description': 'Multiple Choice Question Answering datasets: XCOPA, Exams, Belebele and Turkish PLU.' }, 'TC': { 'datasets': ['sts_tr', 'offenseval_tr', 'news_cat', 'ironytr', ], 'description': 'Text Classification datasets.', }, 'NLI': { 'datasets': ['mnli_tr', 'snli_tr', 'xnli_tr'], 'description': 'Natural Language Inference (NLI) datasets in Turkish: XNLI, SNLI and MNLI.', }, 'SUM': { 'datasets': [name for name, task in DATASET_TASK_DICT.items() if task == Tasks.SUMMARIZATION], 'description': 'Summarization datasets in Turkish (XLSum, MLSum, WikiLingua and TrWikiHowSumm).', }, 'GEC': { 'datasets': ['gecturk_generation',], 'description': 'Grammatical Error Correction task.', }, 'MT': { 'datasets': ['wmt-tr-en-prompt'], 'description': 'Machine Translation on WMT-16 dataset (English-to-Turkish).', }, # 'TrClaim19': { # 'datasets': ['check_worthiness', 'relevance_judgment'], # 'description': 'TrClaim19 dataset for fact-checking.', # }, }