Spaces:
Sleeping
Sleeping
daishen
commited on
Commit
Β·
c3dcec1
1
Parent(s):
9425c6e
add app.py
Browse files- app.py +120 -0
- get_data_info.py +57 -0
- leaderboard.xlsx +0 -0
- requirements.txt +11 -0
app.py
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# matplotlib.use('macosx')
|
2 |
+
import gradio as gr
|
3 |
+
import plotly.graph_objects as go
|
4 |
+
from apscheduler.schedulers.background import BackgroundScheduler
|
5 |
+
from get_data_info import plot_data, tab_data
|
6 |
+
|
7 |
+
|
8 |
+
def create_data_interface(df):
|
9 |
+
headers = df.columns
|
10 |
+
types = ["str"] + ["number"] * (len(headers) - 1)
|
11 |
+
|
12 |
+
return gr.components.Dataframe(
|
13 |
+
value=df.values.tolist(),
|
14 |
+
headers=[col_name for col_name in headers],
|
15 |
+
datatype=types,
|
16 |
+
max_rows=10,
|
17 |
+
)
|
18 |
+
|
19 |
+
|
20 |
+
def plot_radar_chart(df, attributes, category_name):
|
21 |
+
fig = go.Figure()
|
22 |
+
|
23 |
+
for index, row in df.iterrows():
|
24 |
+
model = row['Model']
|
25 |
+
values = row[attributes].tolist()
|
26 |
+
fig.add_trace(go.Scatterpolar(
|
27 |
+
r=values,
|
28 |
+
theta=attributes,
|
29 |
+
fill='toself',
|
30 |
+
name=model
|
31 |
+
))
|
32 |
+
|
33 |
+
fig.update_layout(
|
34 |
+
title=f"{category_name}",
|
35 |
+
polar=dict(
|
36 |
+
radialaxis=dict(
|
37 |
+
visible=True,
|
38 |
+
range=[0, 100] #
|
39 |
+
)),
|
40 |
+
showlegend=True
|
41 |
+
)
|
42 |
+
|
43 |
+
return fig
|
44 |
+
|
45 |
+
|
46 |
+
def create_data_interface_for_aggregated(df, category_name):
|
47 |
+
attributes = df.columns[1:]
|
48 |
+
print(f"attributes: {attributes}")
|
49 |
+
plt = plot_radar_chart(df, attributes, category_name)
|
50 |
+
return plt
|
51 |
+
|
52 |
+
|
53 |
+
def reindex_cols(fix_cols, df):
|
54 |
+
# reindex with task_col
|
55 |
+
task_col = [subtask for subtask in fix_cols if subtask in df.columns.values.tolist()]
|
56 |
+
df = df[task_col]
|
57 |
+
return df
|
58 |
+
|
59 |
+
|
60 |
+
def launch_gradio(df1, df2):
|
61 |
+
demo = gr.Blocks()
|
62 |
+
|
63 |
+
with demo:
|
64 |
+
gr.HTML(TITLE)
|
65 |
+
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
66 |
+
|
67 |
+
with gr.Row():
|
68 |
+
for key, df in df1.items():
|
69 |
+
if key == "Overall" or key == "Basic Legal NLP":
|
70 |
+
df = df.replace('', 0)
|
71 |
+
new_df = df[[val for val in df.columns]].copy()
|
72 |
+
# new_df = reindex_cols(Task_COLS, new_df)
|
73 |
+
print(f"{key}: \n{new_df}")
|
74 |
+
plot = create_data_interface_for_aggregated(new_df, key)
|
75 |
+
gr.Plot(plot)
|
76 |
+
del new_df
|
77 |
+
|
78 |
+
with gr.Row():
|
79 |
+
for key, df in df1.items():
|
80 |
+
if key == "Basic Legal Application" or key == "Complex Legal Application":
|
81 |
+
# if True:
|
82 |
+
df = df.replace('', 0)
|
83 |
+
new_df = df[[val for val in df.columns]].copy()
|
84 |
+
# new_df = reindex_cols(Task_COLS, new_df)
|
85 |
+
print(f"{key}: \n{new_df}")
|
86 |
+
plot = create_data_interface_for_aggregated(new_df, key)
|
87 |
+
gr.Plot(plot)
|
88 |
+
del new_df
|
89 |
+
|
90 |
+
for key, df in df2.items():
|
91 |
+
# if key != "Overall":
|
92 |
+
if True:
|
93 |
+
with gr.Tab(key):
|
94 |
+
# df = reindex_cols(Task_COLS, df)
|
95 |
+
create_data_interface(df)
|
96 |
+
|
97 |
+
demo.launch()
|
98 |
+
|
99 |
+
|
100 |
+
if __name__ == "__main__":
|
101 |
+
df1 = plot_data()
|
102 |
+
df2 = tab_data()
|
103 |
+
|
104 |
+
# Constants
|
105 |
+
TITLE = '<h1 align="center" id="space-title">βοΈ LAiW Leaderboard</h1>'
|
106 |
+
INTRODUCTION_TEXT = """π The LAiW Leaderboard is designed to rigorously track, rank, and evaluate state-of-the-art Large Language Models in Legal.
|
107 |
+
|
108 |
+
π‘ Our leaderboard not only covers basic Legal NLP tasks but also incorporates Legal practice tasks such as similar case matching, offering a more comprehensive evaluation for real-world Legal applications.
|
109 |
+
|
110 |
+
π Our evaluation metrics include, but are not limited to, Accuracy, F1 Score, ROUGE score, and Matthews correlation coefficient (MCC), providing a multidimensional assessment of model performance.
|
111 |
+
|
112 |
+
π For more details, refer to our GitHub page [here](https://github.com/Dai-shen/LAiW).
|
113 |
+
"""
|
114 |
+
|
115 |
+
scheduler = BackgroundScheduler()
|
116 |
+
scheduler.add_job(launch_gradio(df1=df1, df2=df2), "interval", seconds=3600)
|
117 |
+
scheduler.start()
|
118 |
+
|
119 |
+
# Launch immediately
|
120 |
+
launch_gradio(df1=df1, df2=df2)
|
get_data_info.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import copy
|
2 |
+
import pandas as pd
|
3 |
+
|
4 |
+
|
5 |
+
def plot_data():
|
6 |
+
# read df and replace NaN values with an empty string
|
7 |
+
leaderboard_df = pd.read_excel(
|
8 |
+
'leaderboard.xlsx',
|
9 |
+
sheet_name='Sheet1',
|
10 |
+
header=0,
|
11 |
+
usecols='A:P',
|
12 |
+
nrows=14)
|
13 |
+
leaderboard_df.fillna("-")
|
14 |
+
|
15 |
+
df_nlp = leaderboard_df.iloc[:, [0] + list(range(2, 7))] # todo
|
16 |
+
df_basic = leaderboard_df.iloc[:, [0] + list(range(7, 13))] # todo
|
17 |
+
df_complex = leaderboard_df.iloc[:, [0] + list(range(13, 16))] # todo
|
18 |
+
|
19 |
+
# Get df_overall
|
20 |
+
df_overall = leaderboard_df.iloc[:, [0] + list(range(2, 16))]
|
21 |
+
plot_df_dict = {
|
22 |
+
"Overall": df_overall,
|
23 |
+
"Basic Legal NLP": df_nlp,
|
24 |
+
"Basic Legal Application": df_basic,
|
25 |
+
"Complex Legal Application": df_complex,
|
26 |
+
}
|
27 |
+
return plot_df_dict
|
28 |
+
|
29 |
+
|
30 |
+
def tab_data():
|
31 |
+
# read df and replace NaN values with an empty string
|
32 |
+
leaderboard_df = pd.read_excel(
|
33 |
+
'leaderboard.xlsx',
|
34 |
+
sheet_name='Sheet2',
|
35 |
+
header=0,
|
36 |
+
usecols='A:AS',
|
37 |
+
nrows=14)
|
38 |
+
leaderboard_df.fillna("-")
|
39 |
+
|
40 |
+
df_nlp = leaderboard_df.iloc[:, [0] + list(range(2, 18))] # todo
|
41 |
+
df_basic = leaderboard_df.iloc[:, [0] + list(range(18, 36))] # todo
|
42 |
+
df_complex = leaderboard_df.iloc[:, [0] + list(range(36, 45))] # todo
|
43 |
+
|
44 |
+
# Get df_overall
|
45 |
+
df_overall = leaderboard_df.iloc[:, [0] + list(range(2, 45))]
|
46 |
+
plot_df_dict = {
|
47 |
+
"Overall": df_overall,
|
48 |
+
"Basic Legal NLP": df_nlp,
|
49 |
+
"Basic Legal Application": df_basic,
|
50 |
+
"Complex Legal Application": df_complex,
|
51 |
+
}
|
52 |
+
return plot_df_dict
|
53 |
+
|
54 |
+
|
55 |
+
if __name__ == "__main__":
|
56 |
+
df1 = plot_data()
|
57 |
+
df2 = tab_data()
|
leaderboard.xlsx
ADDED
Binary file (16.4 kB). View file
|
|
requirements.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiofiles==23.1.0
|
2 |
+
aiohttp==3.8.4
|
3 |
+
aiosignal==1.3.1
|
4 |
+
APScheduler==3.10.1
|
5 |
+
gradio==3.27.0
|
6 |
+
gradio_client==0.1.3
|
7 |
+
pandas==2.0.0
|
8 |
+
matplotlib
|
9 |
+
numpy
|
10 |
+
plotly
|
11 |
+
openpyxl==3.0.10
|