File size: 6,863 Bytes
ee9a738
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8aa46a3
 
 
 
 
 
 
 
 
ee9a738
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c36f3d8
 
 
 
 
ee9a738
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8aa46a3
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import os
import random
import json
import pandas as pd

dimensions = ['Audience', 'Keyword', 'Format', 'Language', 'Length', 'Source']


def make_clickable_model(model_name, link):
    return f'<a target="_blank" style="text-decoration: underline" href="{link}">{model_name}</a>'


def rerank():
    for dimension in dimensions:
        with open(f"all_dimensions/{dimension}.jsonl", "r") as f:
            data = [json.loads(line) for line in f]
        data.sort(key=lambda x: (x["WISE"], x["SICR"]), reverse=True)
        # 排序完后按顺序重新赋值 Rank,这个Rank是从1开始的,且放在第一列
        for i, d in enumerate(data):
            d["Rank"] = i + 1

        with open(f"all_dimensions/{dimension}.jsonl", "w") as f:
            for d in data:
                # 重新构建字典,使 Rank 成为第一个键
                ordered_d = {"Rank": d["Rank"]}
                ordered_d.update({k: v for k, v in d.items() if k != "Rank"})
                f.write(json.dumps(ordered_d) + "\n")


def generate_sample_data():
    model_names = []
    for i in range(10):
        model_names.append(f"Model_{i}")
    for dimension in dimensions:
        for model_name in model_names:
            data = {
                "Model": make_clickable_model(model_name, f"https://huggingface.co/"),
                "WISE": round(random.uniform(0, 1), 2),
                "SICR": round(random.uniform(0, 1), 2),
                "nDCG@10(Original)": round(random.uniform(0, 1), 2),
                "nDCG@10(Instructed)": round(random.uniform(0, 1), 2),
                "nDCG@10(Reversely Instructed)": round(random.uniform(0, 1), 2),
                "MRR@1(Original)": round(random.uniform(0, 1), 2),
                "MRR@1(Instructed)": round(random.uniform(0, 1), 2),
                "MRR@1(Reversely Instructed)": round(random.uniform(0, 1), 2),
            }
            with open(f"all_dimensions/{dimension}.jsonl", "a") as f:
                f.write(json.dumps(data) + "\n")


def get_data(dimension):
    with open(f"all_dimensions/{dimension}.jsonl", "r") as f:
        data = [json.loads(line) for line in f]

    return pd.DataFrame(data)


def get_submission_data():
    if is_empty("temp"):
        return pd.DataFrame()
    data = []
    with open("temp/Audience.jsonl", "r") as f:
        data.extend([json.loads(line) for line in f])
    return pd.DataFrame(data)


def submit(json_file):
    flag, message = check_json_file(json_file)
    if flag:
        with open(json_file, "r") as f:
            data = json.load(f)
        if data['in_huggingface_hub']:
            model_name = make_clickable_model(data["Model"], f"https://huggingface.co")
        else:
            if data["Model Link"]:
                model_name = make_clickable_model(data["Model"], data["Model Link"])
            else:
                model_name = data["Model"]
        all_dimension_data = data["dimensions"]
        for dimension in dimensions:
            each_dimension_data = all_dimension_data[dimension]
            # 如果temp/{dimension}.jsonl文件不存在,则创建
            if not os.path.exists(f"temp/{dimension}.jsonl"):
                with open(f"temp/{dimension}.jsonl", "w"):
                    pass
            with open(f"temp/{dimension}.jsonl", "a") as f:
                f.write(json.dumps({
                    "Model": model_name,
                    "WISE": each_dimension_data["WISE"],
                    "SICR": each_dimension_data["SICR"],
                    "nDCG@10(Original)": each_dimension_data["nDCG@10(Original)"],
                    "nDCG@10(Instructed)": each_dimension_data["nDCG@10(Instructed)"],
                    "nDCG@10(Reversely Instructed)": each_dimension_data["nDCG@10(Reversely Instructed)"],
                    "MRR@1(Original)": each_dimension_data["MRR@1(Original)"],
                    "MRR@1(Instructed)": each_dimension_data["MRR@1(Instructed)"],
                    "MRR@1(Reversely Instructed)": each_dimension_data["MRR@1(Reversely Instructed)"]
                }) + "\n")
        return "Submission successful."
    else:
        return message


def refresh():
    if is_empty("temp"):
        return
    for dimension in dimensions:
        # 读取temp/{dimension}.jsonl文件
        with open(f"temp/{dimension}.jsonl", "r") as f:
            data = [json.loads(line) for line in f]
        # 将其写入all_dimensions/{dimension}.jsonl文件
        # 如果存在相同的模型,则覆盖
        with open(f"all_dimensions/{dimension}.jsonl", "r") as f:
            all_data = [json.loads(line) for line in f]
        for d in data:
            for i, ad in enumerate(all_data):
                if ad["Model"] == d["Model"]:
                    all_data[i] = d
                    break
            else:
                all_data.append(d)
        with open(f"all_dimensions/{dimension}.jsonl", "w") as f:
            for d in all_data:
                f.write(json.dumps(d) + "\n")
        # 删除temp/{dimension}.jsonl文件
        os.remove(f"temp/{dimension}.jsonl")
    rerank()


def check_json_file(json_file):
    with open(json_file, "r") as f:
        try:
            data = json.load(f)
        except json.JSONDecodeError:
            return False, "JSON file is not valid JSON."

        # 检查Model是否已在temp文件夹中
        submission_queue_df = get_submission_data()
        if any([data["Model"] in row["Model"] for _, row in submission_queue_df.iterrows()]):
            return False, "Model already in submission queue."
        # 检查dimensions键是否存在且是否存在对应的值('Audience', 'Keyword', 'Format', 'Language', 'Length', 'Source')
        if "dimensions" not in data:
            return False, "JSON file does not contain 'dimensions' key.",

        all_dimension_data = data["dimensions"]
        if not all([d in all_dimension_data for d in dimensions]):
            return False, "JSON file does not contain all dimensions.",
        # 检查每一个维度的数据是否符合要求( WISE, SICR, nDCG@10(Original), nDCG@10(Instructed), nDCG@10(Reversely Instructed), MRR@1(Original), MRR@1(Instructed), MRR@1(Reversely Instructed))

        for d in dimensions:
            each_dimension_data = all_dimension_data[d]
            if not all(k in each_dimension_data for k in
                       ["WISE", "SICR", "nDCG@10(Original)", "nDCG@10(Instructed)",
                        "nDCG@10(Reversely Instructed)", "MRR@1(Original)", "MRR@1(Instructed)",
                        "MRR@1(Reversely Instructed)"]):
                return False, f"Dimension '{d}' does not contain all required keys.",

    return True, "JSON file is valid."


def is_empty(dir_path):
    # check if the directory contains jsonl files
    return not any([f.endswith(".jsonl") for f in os.listdir(dir_path)])