File size: 3,886 Bytes
569f484
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
from ...smp import *
import numpy as np
import re

FAIL_MSG = 'Failed to obtain answer via API.'

DURATIONS = [
    'short',
    'medium',
    'long',
]

DOMAINS = [
    'Knowledge',
    'Film & Television',
    'Sports Competition',
    'Artistic Performance',
    'Life Record',
    'Multilingual'
]

SUB_CATEGORIES = [
    'Humanity & History',
    'Literature & Art',
    'Biology & Medicine',
    'Finance & Commerce',
    'Astronomy',
    'Geography',
    'Law',
    'Life Tip',
    'Technology',
    'Animation',
    'Movie & TV Show',
    'Documentary',
    'News Report',
    'Esports',
    'Basketball',
    'Football',
    'Athletics',
    'Other Sports',
    'Stage Play',
    'Magic Show',
    'Variety Show',
    'Acrobatics',
    'Handicraft',
    'Food',
    'Fashion',
    'Daily Life',
    'Travel',
    'Pet & Animal',
    'Exercise',
    'Multilingual'
]

TASK_CATEGORIES = [
    'Temporal Perception',
    'Spatial Perception',
    'Attribute Perception',
    'Action Recognition',
    'Object Recognition',
    'OCR Problems',
    'Counting Problem',
    'Temporal Reasoning',
    'Spatial Reasoning',
    'Action Reasoning',
    'Object Reasoning',
    'Information Synopsis',
]


def get_dimension_rating(data_path):
    data = load(data_path)

    duration_rating = {k: {} for k in DURATIONS}
    for duration in DURATIONS + ['overall']:
        duration_rating[duration] = {
            'overall': '',
            'domain': {k: [] for k in DOMAINS},
            'sub_category': {k: [] for k in SUB_CATEGORIES},
            'task_type': {k: [] for k in TASK_CATEGORIES}
        }

    for i in range(len(data)):

        domain = data.iloc[i]['domain']
        sub_ctg = data.iloc[i]['sub_category']
        task_ctg = data.iloc[i]['task_type']

        duration = data.iloc[i]['duration']
        duration_rating[duration]['domain'][domain].append(data.iloc[i]['score'])
        duration_rating[duration]['sub_category'][sub_ctg].append(data.iloc[i]['score'])
        duration_rating[duration]['task_type'][task_ctg].append(data.iloc[i]['score'])

        duration_rating['overall']['domain'][domain].append(data.iloc[i]['score'])
        duration_rating['overall']['sub_category'][sub_ctg].append(data.iloc[i]['score'])
        duration_rating['overall']['task_type'][task_ctg].append(data.iloc[i]['score'])

    for duration in DURATIONS + ['overall']:

        overall_res_dur = f'{np.mean([x for x in sum(duration_rating[duration]["domain"].values(), []) if x >= 0]):.2f}'
        duration_rating[duration]['overall'] = overall_res_dur

        for domain in DOMAINS:
            domain_res_dur = f'{np.mean([x for x in duration_rating[duration]["domain"][domain] if x >= 0]):.2f}'
            duration_rating[duration]['domain'][domain] = domain_res_dur

        for sub_ctg in SUB_CATEGORIES:
            sub_res_dur = f'{np.mean([x for x in duration_rating[duration]["sub_category"][sub_ctg] if x >= 0]):.2f}'
            duration_rating[duration]['sub_category'][sub_ctg] = sub_res_dur

        for task_ctg in TASK_CATEGORIES:
            task_res_dur = f'{np.mean([x for x in duration_rating[duration]["task_type"][task_ctg] if x >= 0]):.2f}'
            duration_rating[duration]['task_type'][task_ctg] = task_res_dur

    return duration_rating


def extract_characters_regex(s):
    s = s.strip()
    answer_prefixes = [
        'The best answer is',
        'The correct answer is',
        'The answer is',
        'The answer',
        'The best option is'
        'The correct option is',
        'Best answer:'
        'Best option:',
        'Answer:',
        'Option:',
    ]
    for answer_prefix in answer_prefixes:
        s = s.replace(answer_prefix, '')

    if len(s.split()) > 10 and not re.search('[ABCD]', s):
        return ''
    matches = re.search(r'[ABCD]', s)
    if matches is None:
        return ''
    return matches[0]