File size: 3,991 Bytes
500fbd7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from enum import StrEnum, auto


class Tasks(StrEnum):
    EXTRACTIVE_QUESTION_ANSWERING = auto()
    MULTIPLE_CHOICE = auto()
    SUMMARIZATION = auto()
    NATURAL_LANGUAGE_INFERENCE = auto()
    TEXT_CLASSIFICATION = auto()
    MACHINE_TRANSLATION = auto()
    GRAMMATICAL_ERROR_CORRECTION = auto()


class Metrics(StrEnum):
    F1 = "f1"
    EXACT_MATCH = "exact_match"
    ROGUE1 = "rouge1"
    ROUGE2 = "rouge2"
    ROUGEL = "rougeL"
    ACCURACY = "acc"
    WER = "wer"
    BLEU = "bleu"


DATASET_TASK_DICT = {
    # extractive qa
    'xquad_tr': Tasks.EXTRACTIVE_QUESTION_ANSWERING,
    'tquad': Tasks.EXTRACTIVE_QUESTION_ANSWERING,
    'mkqa_tr': Tasks.EXTRACTIVE_QUESTION_ANSWERING,  # not exactly

    # summarization
    'xlsum_tr': Tasks.SUMMARIZATION,
    'mlsum_tr': Tasks.SUMMARIZATION,
    'wiki_lingua_tr': Tasks.SUMMARIZATION,
    'tr-wikihow-summ': Tasks.SUMMARIZATION,

    # NLI
    #'nli_tr': Tasks.NATURAL_LANGUAGE_INFERENCE,
    'mnli_tr': Tasks.NATURAL_LANGUAGE_INFERENCE,
    'snli_tr': Tasks.NATURAL_LANGUAGE_INFERENCE,
    'xnli_tr': Tasks.NATURAL_LANGUAGE_INFERENCE,

    # multiple-choice
    'xcopa_tr': Tasks.MULTIPLE_CHOICE,
    'exams_tr': Tasks.MULTIPLE_CHOICE,
    'belebele_tr': Tasks.MULTIPLE_CHOICE,
    'turkish_plu': Tasks.MULTIPLE_CHOICE,
    'turkish_plu_goal_inference': Tasks.MULTIPLE_CHOICE,
    'turkish_plu_next_event_prediction': Tasks.MULTIPLE_CHOICE,
    'turkish_plu_step_inference': Tasks.MULTIPLE_CHOICE,
    'turkish_plu_step_ordering': Tasks.MULTIPLE_CHOICE,

    # fact-checking, not sure whether these are multi-choice
    # 'trclaim19': Tasks.MULTIPLE_CHOICE,
    'check_worthiness': Tasks.MULTIPLE_CHOICE,
    'relevance_judgment': Tasks.MULTIPLE_CHOICE,

    # text classification
    'sts_tr': Tasks.TEXT_CLASSIFICATION,
    'offenseval_tr': Tasks.TEXT_CLASSIFICATION,
    'news_cat': Tasks.TEXT_CLASSIFICATION,
    'ironytr': Tasks.TEXT_CLASSIFICATION,

    # other generation
    'wmt-tr-en-prompt': Tasks.MACHINE_TRANSLATION,
    'gecturk_generation': Tasks.GRAMMATICAL_ERROR_CORRECTION,
}


TASK_METRIC_DICT = {
    Tasks.EXTRACTIVE_QUESTION_ANSWERING: Metrics.EXACT_MATCH,
    Tasks.MULTIPLE_CHOICE: Metrics.ACCURACY,
    Tasks.TEXT_CLASSIFICATION: Metrics.ACCURACY,
    Tasks.NATURAL_LANGUAGE_INFERENCE: Metrics.ACCURACY,
    Tasks.SUMMARIZATION: Metrics.ROUGE2,
    Tasks.MACHINE_TRANSLATION: Metrics.BLEU,
    Tasks.GRAMMATICAL_ERROR_CORRECTION: Metrics.EXACT_MATCH,
}


GENERATIVE_TASKS = (
    Tasks.SUMMARIZATION,
    Tasks.MACHINE_TRANSLATION,
    Tasks.GRAMMATICAL_ERROR_CORRECTION,
)

DATASET_GROUPS = {
    'QA': {
        'datasets': ['xquad_tr', 'tquad', 'mkqa_tr'],
        'description': 'Turkish splits of SQuAD-like datasets XQuAD and TQUAD.',
    },
    'MCQA': {
        'datasets': ['xcopa_tr', 'exams_tr', 'belebele_tr'] + [x for x in DATASET_TASK_DICT.keys() if x.startswith('turkish_plu')],
        'description': 'Multiple Choice Question Answering datasets: XCOPA, Exams, Belebele and Turkish PLU.'
    },
    'TC': {
        'datasets': ['sts_tr', 'offenseval_tr', 'news_cat', 'ironytr', ],
        'description': 'Text Classification datasets.',
    },
    'NLI': {
        'datasets': ['mnli_tr', 'snli_tr', 'xnli_tr'],
        'description': 'Natural Language Inference (NLI) datasets in Turkish: XNLI, SNLI and MNLI.',
    },
    'SUM': {
        'datasets': [name for name, task in DATASET_TASK_DICT.items() if task == Tasks.SUMMARIZATION],
        'description': 'Summarization datasets in Turkish (XLSum, MLSum, WikiLingua and TrWikiHowSumm).',
    },
    'GEC': {
        'datasets': ['gecturk_generation',],
        'description': 'Grammatical Error Correction task.',
    },
    'MT': {
        'datasets': ['wmt-tr-en-prompt'],
        'description': 'Machine Translation on WMT-16 dataset (English-to-Turkish).',
    },

    #  'TrClaim19': {
    #     'datasets': ['check_worthiness', 'relevance_judgment'],
    #     'description': 'TrClaim19 dataset for fact-checking.',
    # },
}