Spaces:
Running
Running
[email protected]
commited on
Commit
Β·
57edaa4
1
Parent(s):
c40ac63
update
Browse files
app.py
CHANGED
@@ -3,7 +3,7 @@ from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
|
|
3 |
import pandas as pd
|
4 |
from apscheduler.schedulers.background import BackgroundScheduler
|
5 |
from huggingface_hub import snapshot_download
|
6 |
-
|
7 |
from src.about import (
|
8 |
CITATION_BUTTON_LABEL,
|
9 |
CITATION_BUTTON_TEXT,
|
@@ -58,6 +58,34 @@ LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS,
|
|
58 |
pending_eval_queue_df,
|
59 |
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
def init_leaderboard(dataframe):
|
62 |
if dataframe is None or dataframe.empty:
|
63 |
raise ValueError("Leaderboard DataFrame is empty or None.")
|
@@ -100,14 +128,14 @@ with demo:
|
|
100 |
leaderboard = init_leaderboard(LEADERBOARD_DF)
|
101 |
|
102 |
with gr.TabItem("π Performance Plot", elem_id="llm-benchmark-tab-table", id=1):
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
|
112 |
with gr.TabItem("π About", elem_id="llm-benchmark-tab-table", id=2):
|
113 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
|
|
3 |
import pandas as pd
|
4 |
from apscheduler.schedulers.background import BackgroundScheduler
|
5 |
from huggingface_hub import snapshot_download
|
6 |
+
import plotly.graph_objects as go
|
7 |
from src.about import (
|
8 |
CITATION_BUTTON_LABEL,
|
9 |
CITATION_BUTTON_TEXT,
|
|
|
58 |
pending_eval_queue_df,
|
59 |
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
60 |
|
61 |
+
|
62 |
+
def init_perf_plot(df):
|
63 |
+
df = df.copy()
|
64 |
+
params_col = 'num_params'
|
65 |
+
df["symbol"] = 2 # Triangle
|
66 |
+
df["color"] = ""
|
67 |
+
df.loc[df["model"].str.contains("granite"), "color"] = "grey"
|
68 |
+
acc_col = 'failuresensor_mcqa_el'
|
69 |
+
fig = go.Figure()
|
70 |
+
for i in df.index:
|
71 |
+
fig.add_trace(
|
72 |
+
go.Scatter(
|
73 |
+
x=[df.loc[i, params_col]],
|
74 |
+
y=[df.loc[i, acc_col]],
|
75 |
+
name=df.loc[i, "model"]
|
76 |
+
)
|
77 |
+
)
|
78 |
+
|
79 |
+
fig.update_layout(
|
80 |
+
autosize=False,
|
81 |
+
width=650,
|
82 |
+
height=600,
|
83 |
+
title=f"Model Size Vs Accuracy",
|
84 |
+
xaxis_title=f"{params_col}",
|
85 |
+
yaxis_title="Accuracy",
|
86 |
+
)
|
87 |
+
return fig
|
88 |
+
|
89 |
def init_leaderboard(dataframe):
|
90 |
if dataframe is None or dataframe.empty:
|
91 |
raise ValueError("Leaderboard DataFrame is empty or None.")
|
|
|
128 |
leaderboard = init_leaderboard(LEADERBOARD_DF)
|
129 |
|
130 |
with gr.TabItem("π Performance Plot", elem_id="llm-benchmark-tab-table", id=1):
|
131 |
+
print(LEADERBOARD_DF)
|
132 |
+
# gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
133 |
+
with gr.Row():
|
134 |
+
bs_1_plot = gr.components.Plot(
|
135 |
+
value=init_perf_plot(LEADERBOARD_DF, bs=1),
|
136 |
+
elem_id="bs1-plot",
|
137 |
+
show_label=False,
|
138 |
+
)
|
139 |
|
140 |
with gr.TabItem("π About", elem_id="llm-benchmark-tab-table", id=2):
|
141 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|