[email protected] commited on
Commit
57edaa4
Β·
1 Parent(s): c40ac63
Files changed (1) hide show
  1. app.py +37 -9
app.py CHANGED
@@ -3,7 +3,7 @@ from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
3
  import pandas as pd
4
  from apscheduler.schedulers.background import BackgroundScheduler
5
  from huggingface_hub import snapshot_download
6
-
7
  from src.about import (
8
  CITATION_BUTTON_LABEL,
9
  CITATION_BUTTON_TEXT,
@@ -58,6 +58,34 @@ LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS,
58
  pending_eval_queue_df,
59
  ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  def init_leaderboard(dataframe):
62
  if dataframe is None or dataframe.empty:
63
  raise ValueError("Leaderboard DataFrame is empty or None.")
@@ -100,14 +128,14 @@ with demo:
100
  leaderboard = init_leaderboard(LEADERBOARD_DF)
101
 
102
  with gr.TabItem("πŸ“Š Performance Plot", elem_id="llm-benchmark-tab-table", id=1):
103
- gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
104
- print(LEADERBOARD_DF)
105
- # with gr.Row():
106
- # bs_1_plot = gr.components.Plot(
107
- # value=plot_throughput(LEADERBOARD_DF, bs=1),
108
- # elem_id="bs1-plot",
109
- # show_label=False,
110
- # )
111
 
112
  with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
113
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
 
3
  import pandas as pd
4
  from apscheduler.schedulers.background import BackgroundScheduler
5
  from huggingface_hub import snapshot_download
6
+ import plotly.graph_objects as go
7
  from src.about import (
8
  CITATION_BUTTON_LABEL,
9
  CITATION_BUTTON_TEXT,
 
58
  pending_eval_queue_df,
59
  ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
60
 
61
+
62
+ def init_perf_plot(df):
63
+ df = df.copy()
64
+ params_col = 'num_params'
65
+ df["symbol"] = 2 # Triangle
66
+ df["color"] = ""
67
+ df.loc[df["model"].str.contains("granite"), "color"] = "grey"
68
+ acc_col = 'failuresensor_mcqa_el'
69
+ fig = go.Figure()
70
+ for i in df.index:
71
+ fig.add_trace(
72
+ go.Scatter(
73
+ x=[df.loc[i, params_col]],
74
+ y=[df.loc[i, acc_col]],
75
+ name=df.loc[i, "model"]
76
+ )
77
+ )
78
+
79
+ fig.update_layout(
80
+ autosize=False,
81
+ width=650,
82
+ height=600,
83
+ title=f"Model Size Vs Accuracy",
84
+ xaxis_title=f"{params_col}",
85
+ yaxis_title="Accuracy",
86
+ )
87
+ return fig
88
+
89
  def init_leaderboard(dataframe):
90
  if dataframe is None or dataframe.empty:
91
  raise ValueError("Leaderboard DataFrame is empty or None.")
 
128
  leaderboard = init_leaderboard(LEADERBOARD_DF)
129
 
130
  with gr.TabItem("πŸ“Š Performance Plot", elem_id="llm-benchmark-tab-table", id=1):
131
+ print(LEADERBOARD_DF)
132
+ # gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
133
+ with gr.Row():
134
+ bs_1_plot = gr.components.Plot(
135
+ value=init_perf_plot(LEADERBOARD_DF, bs=1),
136
+ elem_id="bs1-plot",
137
+ show_label=False,
138
+ )
139
 
140
  with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
141
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")