Spaces:

daishen
/

SCULAiW

Sleeping

App Files Files Community

daishen commited on Dec 14, 2023

Commit

c3dcec1

1 Parent(s): 9425c6e

add app.py

Browse files

Files changed (4) hide show

app.py +120 -0
get_data_info.py +57 -0
leaderboard.xlsx +0 -0
requirements.txt +11 -0

app.py ADDED Viewed

	@@ -0,0 +1,120 @@

+# matplotlib.use('macosx')
+import gradio as gr
+import plotly.graph_objects as go
+from apscheduler.schedulers.background import BackgroundScheduler
+from get_data_info import plot_data, tab_data
+def create_data_interface(df):
+    headers = df.columns
+    types = ["str"] + ["number"] * (len(headers) - 1)
+    return gr.components.Dataframe(
+        value=df.values.tolist(),
+        headers=[col_name for col_name in headers],
+        datatype=types,
+        max_rows=10,
+    )
+def plot_radar_chart(df, attributes, category_name):
+    fig = go.Figure()
+    for index, row in df.iterrows():
+        model = row['Model']
+        values = row[attributes].tolist()
+        fig.add_trace(go.Scatterpolar(
+            r=values,
+            theta=attributes,
+            fill='toself',
+            name=model
+        ))
+    fig.update_layout(
+        title=f"{category_name}",
+        polar=dict(
+            radialaxis=dict(
+                visible=True,
+                range=[0, 100]  #
+            )),
+        showlegend=True
+    )
+    return fig
+def create_data_interface_for_aggregated(df, category_name):
+    attributes = df.columns[1:]
+    print(f"attributes: {attributes}")
+    plt = plot_radar_chart(df, attributes, category_name)
+    return plt
+def reindex_cols(fix_cols, df):
+    # reindex with task_col
+    task_col = [subtask for subtask in fix_cols if subtask in df.columns.values.tolist()]
+    df = df[task_col]
+    return df
+def launch_gradio(df1, df2):
+    demo = gr.Blocks()
+    with demo:
+        gr.HTML(TITLE)
+        gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
+        with gr.Row():
+            for key, df in df1.items():
+                if key == "Overall" or key == "Basic Legal NLP":
+                    df = df.replace('', 0)
+                    new_df = df[[val for val in df.columns]].copy()
+                    # new_df = reindex_cols(Task_COLS, new_df)
+                    print(f"{key}: \n{new_df}")
+                    plot = create_data_interface_for_aggregated(new_df, key)
+                    gr.Plot(plot)
+                    del new_df
+        with gr.Row():
+            for key, df in df1.items():
+                if key == "Basic Legal Application" or key == "Complex Legal Application":
+                    # if True:
+                    df = df.replace('', 0)
+                    new_df = df[[val for val in df.columns]].copy()
+                    # new_df = reindex_cols(Task_COLS, new_df)
+                    print(f"{key}: \n{new_df}")
+                    plot = create_data_interface_for_aggregated(new_df, key)
+                    gr.Plot(plot)
+                    del new_df
+        for key, df in df2.items():
+            # if key != "Overall":
+            if True:
+                with gr.Tab(key):
+                    # df = reindex_cols(Task_COLS, df)
+                    create_data_interface(df)
+    demo.launch()
+if __name__ == "__main__":
+    df1 = plot_data()
+    df2 = tab_data()
+    # Constants
+    TITLE = '<h1 align="center" id="space-title">⚖️ LAiW Leaderboard</h1>'
+    INTRODUCTION_TEXT = """🏆 The LAiW Leaderboard is designed to rigorously track, rank, and evaluate state-of-the-art Large Language Models in Legal.
+    💡 Our leaderboard not only covers basic Legal NLP tasks but also incorporates Legal practice tasks such as similar case matching, offering a more comprehensive evaluation for real-world Legal applications.
+    🌟 Our evaluation metrics include, but are not limited to, Accuracy, F1 Score, ROUGE score, and Matthews correlation coefficient (MCC), providing a multidimensional assessment of model performance.
+    🔗 For more details, refer to our GitHub page [here](https://github.com/Dai-shen/LAiW).
+    """
+    scheduler = BackgroundScheduler()
+    scheduler.add_job(launch_gradio(df1=df1, df2=df2), "interval", seconds=3600)
+    scheduler.start()
+    # Launch immediately
+    launch_gradio(df1=df1, df2=df2)

get_data_info.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import copy
+import pandas as pd
+def plot_data():
+    # read df and replace NaN values with an empty string
+    leaderboard_df = pd.read_excel(
+        'leaderboard.xlsx',
+        sheet_name='Sheet1',
+        header=0,
+        usecols='A:P',
+        nrows=14)
+    leaderboard_df.fillna("-")
+    df_nlp = leaderboard_df.iloc[:, [0] + list(range(2, 7))]  # todo
+    df_basic = leaderboard_df.iloc[:, [0] + list(range(7, 13))]  # todo
+    df_complex = leaderboard_df.iloc[:, [0] + list(range(13, 16))]  # todo
+    # Get df_overall
+    df_overall = leaderboard_df.iloc[:, [0] + list(range(2, 16))]
+    plot_df_dict = {
+        "Overall": df_overall,
+        "Basic Legal NLP": df_nlp,
+        "Basic Legal Application": df_basic,
+        "Complex Legal Application": df_complex,
+    }
+    return plot_df_dict
+def tab_data():
+    # read df and replace NaN values with an empty string
+    leaderboard_df = pd.read_excel(
+        'leaderboard.xlsx',
+        sheet_name='Sheet2',
+        header=0,
+        usecols='A:AS',
+        nrows=14)
+    leaderboard_df.fillna("-")
+    df_nlp = leaderboard_df.iloc[:, [0] + list(range(2, 18))]  # todo
+    df_basic = leaderboard_df.iloc[:, [0] + list(range(18, 36))]  # todo
+    df_complex = leaderboard_df.iloc[:, [0] + list(range(36, 45))]  # todo
+    # Get df_overall
+    df_overall = leaderboard_df.iloc[:, [0] + list(range(2, 45))]
+    plot_df_dict = {
+        "Overall": df_overall,
+        "Basic Legal NLP": df_nlp,
+        "Basic Legal Application": df_basic,
+        "Complex Legal Application": df_complex,
+    }
+    return plot_df_dict
+if __name__ == "__main__":
+    df1 = plot_data()
+    df2 = tab_data()

leaderboard.xlsx ADDED Viewed

Binary file (16.4 kB). View file

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+aiofiles==23.1.0
+aiohttp==3.8.4
+aiosignal==1.3.1
+APScheduler==3.10.1
+gradio==3.27.0
+gradio_client==0.1.3
+pandas==2.0.0
+matplotlib
+numpy
+plotly
+openpyxl==3.0.10