daishen commited on
Commit
09d10e0
·
1 Parent(s): 3c4c73c

update app.py

Browse files
Files changed (3) hide show
  1. get_data_info.py +38 -7
  2. leaderboard.xlsx +0 -0
  3. scores.xlsx +0 -0
get_data_info.py CHANGED
@@ -1,23 +1,54 @@
1
  import copy
 
2
  import pandas as pd
3
 
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  def plot_data():
6
  # read df and replace NaN values with an empty string
7
  leaderboard_df = pd.read_excel(
8
  'leaderboard.xlsx',
9
- sheet_name='Sheet1',
10
  header=0,
11
- usecols='A:P',
12
  nrows=14)
13
  leaderboard_df.fillna("-")
14
 
15
- df_nlp = leaderboard_df.iloc[:, [0] + list(range(2, 7))] # todo
16
- df_basic = leaderboard_df.iloc[:, [0] + list(range(7, 13))] # todo
17
- df_complex = leaderboard_df.iloc[:, [0] + list(range(13, 16))] # todo
 
 
18
 
19
  # Get df_overall
20
- df_overall = leaderboard_df.iloc[:, [0] + list(range(2, 16))]
21
  plot_df_dict = {
22
  "Overall": df_overall,
23
  "Basic Legal NLP": df_nlp,
@@ -37,7 +68,7 @@ def tab_data():
37
  nrows=14)
38
  leaderboard_df.fillna("-")
39
 
40
- df_nlp = leaderboard_df.iloc[:, [0] + list(range(2, 18))] # todo
41
  df_basic = leaderboard_df.iloc[:, [0] + list(range(18, 36))] # todo
42
  df_complex = leaderboard_df.iloc[:, [0] + list(range(36, 45))] # todo
43
 
 
1
  import copy
2
+ import numpy as np
3
  import pandas as pd
4
 
5
 
6
+ def process_plot_data(df, flag=False):
7
+ # 保留"Model"和"Domain"列,删除其他列
8
+ df2 = df[["Model", "Domain"]].copy()
9
+
10
+ columns_names = ["Model", "Domain", "AR", "ER", "NER", "JS", "CR", "CFM", "SCM", "CJP", "CTP", "LQA", "JRG", "CU", "LC"]
11
+ # 计算新的列的值
12
+ for col in columns_names[2:]:
13
+ if col in ["AR", "ER", "CR", "CFM", "SCM", "CTP", "LQA"]:
14
+ df2[col] = df[f"{col}-F1"] * 100
15
+ if col == "CJP":
16
+ df2[col] = df[[f"{col}-CP-F1", f"{col}-PTP-F1"]].mean(axis=1) * 100
17
+ if col == "NER":
18
+ df2[col] = df[f"{col}-Acc"] * 100
19
+ if col in ["JRG", "LC"]:
20
+ rouge_mean = df[[f"{col}-ROUGE-1", f"{col}-ROUGE-2", f"{col}-ROUGE-L"]].replace('-', np.nan).mean(axis=1)
21
+ df2.loc[df[f"{col}-ROUGE-1"] == '-', col] = '-'
22
+ df2.loc[df[f"{col}-ROUGE-1"] != '-', col] = rouge_mean * 100
23
+ if col in ["JS", "CU"]:
24
+ df2[col] = df[[f"{col}-ROUGE-1", f"{col}-ROUGE-2", f"{col}-ROUGE-L"]].mean(axis=1) * 100
25
+ df2.reindex(columns=columns_names)
26
+
27
+ if flag:
28
+ # 保存到Excel文件
29
+ with pd.ExcelWriter('scores.xlsx') as writer:
30
+ df2.to_excel(writer, sheet_name="Sheet1", index=False)
31
+
32
+ return df2
33
+
34
  def plot_data():
35
  # read df and replace NaN values with an empty string
36
  leaderboard_df = pd.read_excel(
37
  'leaderboard.xlsx',
38
+ sheet_name='Sheet2',
39
  header=0,
40
+ usecols='A:AS',
41
  nrows=14)
42
  leaderboard_df.fillna("-")
43
 
44
+ df = process_plot_data(leaderboard_df)
45
+
46
+ df_nlp = df.iloc[:, [0] + list(range(2, 7))] # todo
47
+ df_basic = df.iloc[:, [0] + list(range(7, 12))] # todo
48
+ df_complex = df.iloc[:, [0] + list(range(12, 15))] # todo
49
 
50
  # Get df_overall
51
+ df_overall = df.iloc[:, [0] + list(range(2, 15))]
52
  plot_df_dict = {
53
  "Overall": df_overall,
54
  "Basic Legal NLP": df_nlp,
 
68
  nrows=14)
69
  leaderboard_df.fillna("-")
70
 
71
+ df_nlp = leaderboard_df.iloc[:, [0] + list(range(2, 9)) + list(range(12, 18))] # todo
72
  df_basic = leaderboard_df.iloc[:, [0] + list(range(18, 36))] # todo
73
  df_complex = leaderboard_df.iloc[:, [0] + list(range(36, 45))] # todo
74
 
leaderboard.xlsx CHANGED
Binary files a/leaderboard.xlsx and b/leaderboard.xlsx differ
 
scores.xlsx ADDED
Binary file (6.4 kB). View file