ozayezerceli commited on
Commit
9a3221d
·
verified ·
1 Parent(s): d3a1025

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +183 -35
app.py CHANGED
@@ -3,6 +3,8 @@ from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
3
  import pandas as pd
4
  from apscheduler.schedulers.background import BackgroundScheduler
5
  from huggingface_hub import snapshot_download
 
 
6
 
7
  from src.about import (
8
  CITATION_BUTTON_LABEL,
@@ -18,6 +20,8 @@ from src.display.utils import (
18
  COLS,
19
  EVAL_COLS,
20
  EVAL_TYPES,
 
 
21
  AutoEvalColumn,
22
  ModelType,
23
  fields,
@@ -30,26 +34,27 @@ from src.submission.submit import add_new_eval
30
 
31
 
32
  def restart_space():
33
- API.restart_space(repo_id=REPO_ID)
34
 
35
  ### Space initialisation
36
  try:
37
  print(EVAL_REQUESTS_PATH)
38
  snapshot_download(
39
- repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
40
  )
41
  except Exception:
42
  restart_space()
43
  try:
44
  print(EVAL_RESULTS_PATH)
45
  snapshot_download(
46
- repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
47
  )
48
  except Exception:
49
  restart_space()
50
 
51
 
52
- LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
 
53
 
54
  (
55
  finished_eval_queue_df,
@@ -57,36 +62,76 @@ LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS,
57
  pending_eval_queue_df,
58
  ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
59
 
60
- def init_leaderboard(dataframe):
61
- if dataframe is None or dataframe.empty:
62
- raise ValueError("Leaderboard DataFrame is empty or None.")
63
- return Leaderboard(
64
- value=dataframe,
65
- datatype=[c.type for c in fields(AutoEvalColumn)],
66
- select_columns=SelectColumns(
67
- default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
68
- cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
69
- label="Select Columns to Display:",
70
- ),
71
- search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
72
- hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
73
- filter_columns=[
74
- ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
75
- ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
76
- ColumnFilter(
77
- AutoEvalColumn.params.name,
78
- type="slider",
79
- min=0.01,
80
- max=150,
81
- label="Select the number of parameters (B)",
82
- ),
83
- ColumnFilter(
84
- AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
85
- ),
86
- ],
87
- bool_checkboxgroup_label="Hide models",
88
- interactive=False,
89
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
 
92
  demo = gr.Blocks(css=custom_css)
@@ -96,7 +141,107 @@ with demo:
96
 
97
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
98
  with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
99
- leaderboard = init_leaderboard(LEADERBOARD_DF)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
  with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
102
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
@@ -198,6 +343,9 @@ with demo:
198
  show_copy_button=True,
199
  )
200
 
 
 
 
201
  scheduler = BackgroundScheduler()
202
  scheduler.add_job(restart_space, "interval", seconds=1800)
203
  scheduler.start()
 
3
  import pandas as pd
4
  from apscheduler.schedulers.background import BackgroundScheduler
5
  from huggingface_hub import snapshot_download
6
+ import os
7
+ os.environ['CURL_CA_BUNDLE'] = ''
8
 
9
  from src.about import (
10
  CITATION_BUTTON_LABEL,
 
20
  COLS,
21
  EVAL_COLS,
22
  EVAL_TYPES,
23
+ NUMERIC_INTERVALS,
24
+ TYPES,
25
  AutoEvalColumn,
26
  ModelType,
27
  fields,
 
34
 
35
 
36
  def restart_space():
37
+ API.restart_space(repo_id=REPO_ID,token=TOKEN)
38
 
39
  ### Space initialisation
40
  try:
41
  print(EVAL_REQUESTS_PATH)
42
  snapshot_download(
43
+ repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
44
  )
45
  except Exception:
46
  restart_space()
47
  try:
48
  print(EVAL_RESULTS_PATH)
49
  snapshot_download(
50
+ repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
51
  )
52
  except Exception:
53
  restart_space()
54
 
55
 
56
+ raw_data, original_df = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
57
+ leaderboard_df = original_df.copy()
58
 
59
  (
60
  finished_eval_queue_df,
 
62
  pending_eval_queue_df,
63
  ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
64
 
65
+ # Searching and filtering
66
+ def update_table(
67
+ hidden_df: pd.DataFrame,
68
+ columns: list,
69
+ type_query: list,
70
+ precision_query: str,
71
+ size_query: list,
72
+ show_deleted: bool,
73
+ query: str,
74
+ ):
75
+ filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted)
76
+ filtered_df = filter_queries(query, filtered_df)
77
+ df = select_columns(filtered_df, columns)
78
+ return df
79
+
80
+
81
+ def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
82
+ return df[(df[AutoEvalColumn.dummy.name].str.contains(query, case=False))]
83
+
84
+
85
+ def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
86
+ always_here_cols = [
87
+ AutoEvalColumn.model_type_symbol.name,
88
+ AutoEvalColumn.model.name,
89
+ ]
90
+ # We use COLS to maintain sorting
91
+ filtered_df = df[
92
+ always_here_cols + [c for c in COLS if c in df.columns and c in columns] + [AutoEvalColumn.dummy.name]
93
+ ]
94
+ return filtered_df
95
+
96
+
97
+ def filter_queries(query: str, filtered_df: pd.DataFrame) -> pd.DataFrame:
98
+ final_df = []
99
+ if query != "":
100
+ queries = [q.strip() for q in query.split(";")]
101
+ for _q in queries:
102
+ _q = _q.strip()
103
+ if _q != "":
104
+ temp_filtered_df = search_table(filtered_df, _q)
105
+ if len(temp_filtered_df) > 0:
106
+ final_df.append(temp_filtered_df)
107
+ if len(final_df) > 0:
108
+ filtered_df = pd.concat(final_df)
109
+ filtered_df = filtered_df.drop_duplicates(
110
+ subset=[AutoEvalColumn.model.name, AutoEvalColumn.precision.name, AutoEvalColumn.revision.name]
111
+ )
112
+
113
+ return filtered_df
114
+
115
+
116
+ def filter_models(
117
+ df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, show_deleted: bool
118
+ ) -> pd.DataFrame:
119
+ # Show all models
120
+ if show_deleted:
121
+ filtered_df = df
122
+ else: # Show only still on the hub models
123
+ filtered_df = df[df[AutoEvalColumn.still_on_hub.name] == True]
124
+
125
+ type_emoji = [t[0] for t in type_query]
126
+ filtered_df = filtered_df.loc[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
127
+ filtered_df = filtered_df.loc[df[AutoEvalColumn.precision.name].isin(precision_query + ["None"])]
128
+
129
+ numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[s] for s in size_query]))
130
+ params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
131
+ mask = params_column.apply(lambda x: any(numeric_interval.contains(x)))
132
+ filtered_df = filtered_df.loc[mask]
133
+
134
+ return filtered_df
135
 
136
 
137
  demo = gr.Blocks(css=custom_css)
 
141
 
142
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
143
  with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
144
+ with gr.Row():
145
+ with gr.Column():
146
+ with gr.Row():
147
+ search_bar = gr.Textbox(
148
+ placeholder=" 🔍 Search for your model (separate multiple queries with `;`) and press ENTER...",
149
+ show_label=False,
150
+ elem_id="search-bar",
151
+ )
152
+ with gr.Row():
153
+ shown_columns = gr.CheckboxGroup(
154
+ choices=[
155
+ c.name
156
+ for c in fields(AutoEvalColumn)
157
+ if not c.hidden and not c.never_hidden and not c.dummy
158
+ ],
159
+ value=[
160
+ c.name
161
+ for c in fields(AutoEvalColumn)
162
+ if c.displayed_by_default and not c.hidden and not c.never_hidden
163
+ ],
164
+ label="Select columns to show",
165
+ elem_id="column-select",
166
+ interactive=True,
167
+ )
168
+ with gr.Row():
169
+ deleted_models_visibility = gr.Checkbox(
170
+ value=False, label="Show gated/private/deleted models", interactive=True
171
+ )
172
+ with gr.Column(min_width=320):
173
+ #with gr.Box(elem_id="box-filter"):
174
+ filter_columns_type = gr.CheckboxGroup(
175
+ label="Model types",
176
+ choices=[t.to_str() for t in ModelType],
177
+ value=[t.to_str() for t in ModelType],
178
+ interactive=True,
179
+ elem_id="filter-columns-type",
180
+ )
181
+ filter_columns_precision = gr.CheckboxGroup(
182
+ label="Precision",
183
+ choices=[i.value.name for i in Precision],
184
+ value=[i.value.name for i in Precision],
185
+ interactive=True,
186
+ elem_id="filter-columns-precision",
187
+ )
188
+ filter_columns_size = gr.CheckboxGroup(
189
+ label="Model sizes (in billions of parameters)",
190
+ choices=list(NUMERIC_INTERVALS.keys()),
191
+ value=list(NUMERIC_INTERVALS.keys()),
192
+ interactive=True,
193
+ elem_id="filter-columns-size",
194
+ )
195
+
196
+ leaderboard_table = gr.components.Dataframe(
197
+ value=leaderboard_df[
198
+ [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
199
+ + shown_columns.value
200
+ + [AutoEvalColumn.dummy.name]
201
+ ],
202
+ headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
203
+ datatype=TYPES,
204
+ elem_id="leaderboard-table",
205
+ interactive=False,
206
+ visible=True,
207
+ column_widths=["2%", "33%"]
208
+ )
209
+
210
+ # Dummy leaderboard for handling the case when the user uses backspace key
211
+ hidden_leaderboard_table_for_search = gr.components.Dataframe(
212
+ value=original_df[COLS],
213
+ headers=COLS,
214
+ datatype=TYPES,
215
+ visible=False,
216
+ )
217
+ search_bar.submit(
218
+ update_table,
219
+ [
220
+ hidden_leaderboard_table_for_search,
221
+ shown_columns,
222
+ filter_columns_type,
223
+ filter_columns_precision,
224
+ filter_columns_size,
225
+ deleted_models_visibility,
226
+ search_bar,
227
+ ],
228
+ leaderboard_table,
229
+ )
230
+ for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size, deleted_models_visibility]:
231
+ selector.change(
232
+ update_table,
233
+ [
234
+ hidden_leaderboard_table_for_search,
235
+ shown_columns,
236
+ filter_columns_type,
237
+ filter_columns_precision,
238
+ filter_columns_size,
239
+ deleted_models_visibility,
240
+ search_bar,
241
+ ],
242
+ leaderboard_table,
243
+ queue=True,
244
+ )
245
 
246
  with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
247
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
 
343
  show_copy_button=True,
344
  )
345
 
346
+
347
+
348
+
349
  scheduler = BackgroundScheduler()
350
  scheduler.add_job(restart_space, "interval", seconds=1800)
351
  scheduler.start()