File size: 4,146 Bytes
500fbd7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2c0582b
500fbd7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2c0582b
 
500fbd7
 
 
 
 
 
 
 
 
 
f0dd9aa
500fbd7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
from enum import StrEnum, auto


class Tasks(StrEnum):
    EXTRACTIVE_QUESTION_ANSWERING = auto()
    MULTIPLE_CHOICE = auto()
    SUMMARIZATION = auto()
    NATURAL_LANGUAGE_INFERENCE = auto()
    TEXT_CLASSIFICATION = auto()
    MACHINE_TRANSLATION = auto()
    GRAMMATICAL_ERROR_CORRECTION = auto()


class Metrics(StrEnum):
    F1 = "f1"
    EXACT_MATCH = "exact_match"
    ROGUE1 = "rouge1"
    ROUGE2 = "rouge2"
    ROUGEL = "rougeL"
    ACCURACY = "acc"
    WER = "wer"
    BLEU = "bleu"


DATASET_TASK_DICT = {
    # extractive qa
    'xquad_tr': Tasks.EXTRACTIVE_QUESTION_ANSWERING,
    'tquad': Tasks.EXTRACTIVE_QUESTION_ANSWERING,
    'mkqa_tr': Tasks.EXTRACTIVE_QUESTION_ANSWERING,  # not exactly

    # summarization
    'xlsum_tr': Tasks.SUMMARIZATION,
    'mlsum_tr': Tasks.SUMMARIZATION,
    'wiki_lingua_tr': Tasks.SUMMARIZATION,
    'tr-wikihow-summ': Tasks.SUMMARIZATION,

    # NLI
    #'nli_tr': Tasks.NATURAL_LANGUAGE_INFERENCE,
    'mnli_tr': Tasks.NATURAL_LANGUAGE_INFERENCE,
    'snli_tr': Tasks.NATURAL_LANGUAGE_INFERENCE,
    'xnli_tr': Tasks.NATURAL_LANGUAGE_INFERENCE,

    # multiple-choice
    'xcopa_tr': Tasks.MULTIPLE_CHOICE,
    'exams_tr': Tasks.MULTIPLE_CHOICE,
    'belebele_tr': Tasks.MULTIPLE_CHOICE,
    'turkish_plu': Tasks.MULTIPLE_CHOICE,
    'turkish_plu_goal_inference': Tasks.MULTIPLE_CHOICE,
    'turkish_plu_next_event_prediction': Tasks.MULTIPLE_CHOICE,
    'turkish_plu_step_inference': Tasks.MULTIPLE_CHOICE,
    'turkish_plu_step_ordering': Tasks.MULTIPLE_CHOICE,

    # fact-checking, not sure whether these are multi-choice
    # 'trclaim19': Tasks.MULTIPLE_CHOICE,
    'check_worthiness': Tasks.MULTIPLE_CHOICE,
    'relevance_judgment': Tasks.MULTIPLE_CHOICE,

    # text classification
    'sts_tr': Tasks.TEXT_CLASSIFICATION,
    'offenseval_tr': Tasks.TEXT_CLASSIFICATION,
    'news_cat': Tasks.TEXT_CLASSIFICATION,
    'ironytr': Tasks.TEXT_CLASSIFICATION,

    # other generation
    'wmt-tr-en-prompt': Tasks.MACHINE_TRANSLATION,
    'gecturk_generation': Tasks.GRAMMATICAL_ERROR_CORRECTION,
    'turkce_atasozleri': Tasks.MULTIPLE_CHOICE,
}


TASK_METRIC_DICT = {
    Tasks.EXTRACTIVE_QUESTION_ANSWERING: Metrics.EXACT_MATCH,
    Tasks.MULTIPLE_CHOICE: Metrics.ACCURACY,
    Tasks.TEXT_CLASSIFICATION: Metrics.ACCURACY,
    Tasks.NATURAL_LANGUAGE_INFERENCE: Metrics.ACCURACY,
    Tasks.SUMMARIZATION: Metrics.ROUGE2,
    Tasks.MACHINE_TRANSLATION: Metrics.BLEU,
    Tasks.GRAMMATICAL_ERROR_CORRECTION: Metrics.EXACT_MATCH,
}


GENERATIVE_TASKS = (
    Tasks.SUMMARIZATION,
    Tasks.MACHINE_TRANSLATION,
    Tasks.GRAMMATICAL_ERROR_CORRECTION,
)

DATASET_GROUPS = {
    'QA': {
        'datasets': ['xquad_tr', 'tquad', 'mkqa_tr'],
        'description': 'Turkish splits of SQuAD-like datasets XQuAD and TQUAD.',
    },
    'MCQA': {
        'datasets': ['xcopa_tr', 'exams_tr', 'belebele_tr', 'turkce_atasozleri'] + [x for x in DATASET_TASK_DICT.keys() if x.startswith('turkish_plu')],
        'description': 'Multiple Choice Question Answering datasets: XCOPA, Exams, Belebele, Turkish PLU, and Turkce-Atasozleri.'
    },
    'TC': {
        'datasets': ['sts_tr', 'offenseval_tr', 'news_cat', 'ironytr', ],
        'description': 'Text Classification datasets.',
    },
    'NLI': {
        'datasets': ['mnli_tr', 'snli_tr', 'xnli_tr'],
        'description': 'Natural Language Inference (NLI) datasets in Turkish: XNLI, SNLI and MNLI.',
    },
    'SUM': {
        'datasets': [name for name, task in DATASET_TASK_DICT.items() if task == Tasks.SUMMARIZATION and name != 'tr-wikihow-summ'],  # FIXME: include all the datasets.
        'description': 'Summarization datasets in Turkish (XLSum, MLSum, WikiLingua and TrWikiHowSumm).',
    },
    'GEC': {
        'datasets': ['gecturk_generation',],
        'description': 'Grammatical Error Correction task.',
    },
    'MT': {
        'datasets': ['wmt-tr-en-prompt'],
        'description': 'Machine Translation on WMT-16 dataset (English-to-Turkish).',
    },

    #  'TrClaim19': {
    #     'datasets': ['check_worthiness', 'relevance_judgment'],
    #     'description': 'TrClaim19 dataset for fact-checking.',
    # },
}