Spaces:
Sleeping
Sleeping
daishen
commited on
Commit
·
09d10e0
1
Parent(s):
3c4c73c
update app.py
Browse files- get_data_info.py +38 -7
- leaderboard.xlsx +0 -0
- scores.xlsx +0 -0
get_data_info.py
CHANGED
@@ -1,23 +1,54 @@
|
|
1 |
import copy
|
|
|
2 |
import pandas as pd
|
3 |
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
def plot_data():
|
6 |
# read df and replace NaN values with an empty string
|
7 |
leaderboard_df = pd.read_excel(
|
8 |
'leaderboard.xlsx',
|
9 |
-
sheet_name='
|
10 |
header=0,
|
11 |
-
usecols='A:
|
12 |
nrows=14)
|
13 |
leaderboard_df.fillna("-")
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
|
|
|
|
|
18 |
|
19 |
# Get df_overall
|
20 |
-
df_overall =
|
21 |
plot_df_dict = {
|
22 |
"Overall": df_overall,
|
23 |
"Basic Legal NLP": df_nlp,
|
@@ -37,7 +68,7 @@ def tab_data():
|
|
37 |
nrows=14)
|
38 |
leaderboard_df.fillna("-")
|
39 |
|
40 |
-
df_nlp = leaderboard_df.iloc[:, [0] + list(range(2, 18))] # todo
|
41 |
df_basic = leaderboard_df.iloc[:, [0] + list(range(18, 36))] # todo
|
42 |
df_complex = leaderboard_df.iloc[:, [0] + list(range(36, 45))] # todo
|
43 |
|
|
|
1 |
import copy
|
2 |
+
import numpy as np
|
3 |
import pandas as pd
|
4 |
|
5 |
|
6 |
+
def process_plot_data(df, flag=False):
|
7 |
+
# 保留"Model"和"Domain"列,删除其他列
|
8 |
+
df2 = df[["Model", "Domain"]].copy()
|
9 |
+
|
10 |
+
columns_names = ["Model", "Domain", "AR", "ER", "NER", "JS", "CR", "CFM", "SCM", "CJP", "CTP", "LQA", "JRG", "CU", "LC"]
|
11 |
+
# 计算新的列的值
|
12 |
+
for col in columns_names[2:]:
|
13 |
+
if col in ["AR", "ER", "CR", "CFM", "SCM", "CTP", "LQA"]:
|
14 |
+
df2[col] = df[f"{col}-F1"] * 100
|
15 |
+
if col == "CJP":
|
16 |
+
df2[col] = df[[f"{col}-CP-F1", f"{col}-PTP-F1"]].mean(axis=1) * 100
|
17 |
+
if col == "NER":
|
18 |
+
df2[col] = df[f"{col}-Acc"] * 100
|
19 |
+
if col in ["JRG", "LC"]:
|
20 |
+
rouge_mean = df[[f"{col}-ROUGE-1", f"{col}-ROUGE-2", f"{col}-ROUGE-L"]].replace('-', np.nan).mean(axis=1)
|
21 |
+
df2.loc[df[f"{col}-ROUGE-1"] == '-', col] = '-'
|
22 |
+
df2.loc[df[f"{col}-ROUGE-1"] != '-', col] = rouge_mean * 100
|
23 |
+
if col in ["JS", "CU"]:
|
24 |
+
df2[col] = df[[f"{col}-ROUGE-1", f"{col}-ROUGE-2", f"{col}-ROUGE-L"]].mean(axis=1) * 100
|
25 |
+
df2.reindex(columns=columns_names)
|
26 |
+
|
27 |
+
if flag:
|
28 |
+
# 保存到Excel文件
|
29 |
+
with pd.ExcelWriter('scores.xlsx') as writer:
|
30 |
+
df2.to_excel(writer, sheet_name="Sheet1", index=False)
|
31 |
+
|
32 |
+
return df2
|
33 |
+
|
34 |
def plot_data():
|
35 |
# read df and replace NaN values with an empty string
|
36 |
leaderboard_df = pd.read_excel(
|
37 |
'leaderboard.xlsx',
|
38 |
+
sheet_name='Sheet2',
|
39 |
header=0,
|
40 |
+
usecols='A:AS',
|
41 |
nrows=14)
|
42 |
leaderboard_df.fillna("-")
|
43 |
|
44 |
+
df = process_plot_data(leaderboard_df)
|
45 |
+
|
46 |
+
df_nlp = df.iloc[:, [0] + list(range(2, 7))] # todo
|
47 |
+
df_basic = df.iloc[:, [0] + list(range(7, 12))] # todo
|
48 |
+
df_complex = df.iloc[:, [0] + list(range(12, 15))] # todo
|
49 |
|
50 |
# Get df_overall
|
51 |
+
df_overall = df.iloc[:, [0] + list(range(2, 15))]
|
52 |
plot_df_dict = {
|
53 |
"Overall": df_overall,
|
54 |
"Basic Legal NLP": df_nlp,
|
|
|
68 |
nrows=14)
|
69 |
leaderboard_df.fillna("-")
|
70 |
|
71 |
+
df_nlp = leaderboard_df.iloc[:, [0] + list(range(2, 9)) + list(range(12, 18))] # todo
|
72 |
df_basic = leaderboard_df.iloc[:, [0] + list(range(18, 36))] # todo
|
73 |
df_complex = leaderboard_df.iloc[:, [0] + list(range(36, 45))] # todo
|
74 |
|
leaderboard.xlsx
CHANGED
Binary files a/leaderboard.xlsx and b/leaderboard.xlsx differ
|
|
scores.xlsx
ADDED
Binary file (6.4 kB). View file
|
|