MohamedRashad commited on
Commit
3a93505
Β·
1 Parent(s): f21842d

Add fuzzywuzzy dependency and update model submission functions in utils.py

Browse files
Files changed (3) hide show
  1. app.py +45 -232
  2. requirements.txt +1 -0
  3. utils.py +4 -4
app.py CHANGED
@@ -1,12 +1,11 @@
1
  import gradio as gr
2
  from utils import submit_gradio_module, load_retrieval_results
3
-
4
 
5
  HEADER = """<div style="text-align: center; margin-bottom: 20px;">
6
  <h1>The Arabic RAG Leaderboard</h1>
7
  <p style="font-size: 14px; color: #888;">The only leaderboard you will require for your RAG needs πŸ†</p>
8
  </div>
9
-
10
  """
11
 
12
  ABOUT_SECTION = """
@@ -44,6 +43,28 @@ CITATION_BUTTON_TEXT = """
44
  df = load_retrieval_results()
45
  print(df)
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  def main():
48
 
49
  with gr.Blocks() as demo:
@@ -60,254 +81,46 @@ def main():
60
  interactive=True
61
  )
62
 
63
- with gr.Row():
64
- column_selector_tasks = gr.CheckboxGroup(
65
- choices=[],
66
- value=['Rank', 'Model Name'],
67
- label="Select columns to display",
68
- )
69
-
70
- with gr.Row():
71
- license_filter_retrieval = gr.CheckboxGroup(
72
- choices=[],
73
- value=[], # Default all selected
74
- label="Filter by License",
75
- )
76
- precision_filter_retrieval = gr.CheckboxGroup(
77
- choices=[],
78
- value=[], # Default all selected
79
- label="Filter by Precision",
80
- )
81
-
82
  retrieval_leaderboard = gr.Dataframe(
83
  df,
84
  interactive=False
85
  )
86
 
87
- # def filter_df_3c3h(search_query, selected_cols, precision_filters, license_filters):
88
- # filtered_df = df_3c3h.copy()
89
-
90
- # # Ensure min_size <= max_size
91
- # if min_size > max_size:
92
- # min_size, max_size = max_size, min_size
93
-
94
- # # Apply search filter
95
- # if search_query:
96
- # filtered_df = filtered_df[filtered_df['Model Name'].str.contains(search_query, case=False, na=False)]
97
-
98
- # # Apply Precision filter
99
- # if precision_filters:
100
- # include_missing = 'Missing' in precision_filters
101
- # selected_precisions = [p for p in precision_filters if p != 'Missing']
102
- # if include_missing:
103
- # filtered_df = filtered_df[
104
- # (filtered_df['Precision'].isin(selected_precisions)) |
105
- # (filtered_df['Precision'] == 'UNK') |
106
- # (filtered_df['Precision'].isna())
107
- # ]
108
- # else:
109
- # filtered_df = filtered_df[filtered_df['Precision'].isin(selected_precisions)]
110
-
111
- # # Apply License filter
112
- # if license_filters:
113
- # include_missing = 'Missing' in license_filters
114
- # selected_licenses = [l for l in license_filters if l != 'Missing']
115
- # if include_missing:
116
- # filtered_df = filtered_df[
117
- # (filtered_df['License'].isin(selected_licenses)) |
118
- # (filtered_df['License'] == 'UNK') |
119
- # (filtered_df['License'].isna())
120
- # ]
121
- # else:
122
- # filtered_df = filtered_df[filtered_df['License'].isin(selected_licenses)]
123
-
124
- # # Apply Model Size filter
125
- # filtered_df = filtered_df[
126
- # (filtered_df['Model Size Filter'] >= min_size) &
127
- # (filtered_df['Model Size Filter'] <= max_size)
128
- # ]
129
-
130
- # # Remove existing 'Rank' column if present
131
- # if 'Rank' in filtered_df.columns:
132
- # filtered_df = filtered_df.drop(columns=['Rank'])
133
-
134
- # # Recalculate Rank after filtering
135
- # filtered_df = filtered_df.reset_index(drop=True)
136
- # filtered_df.insert(0, 'Rank', range(1, len(filtered_df) + 1))
137
-
138
- # # Ensure selected columns are present
139
- # selected_cols = [col for col in selected_cols if col in filtered_df.columns]
140
-
141
- # return filtered_df[selected_cols]
142
-
143
- # # Bind the filter function to the appropriate events
144
- # filter_inputs_3c3h = [
145
- # search_box_retrieval,
146
- # precision_filter_retrieval,
147
- # license_filter_retrieval,
148
- # ]
149
- # search_box_retrieval.submit(
150
- # filter_df_3c3h,
151
- # inputs=filter_inputs_3c3h,
152
- # outputs=leaderboard_3c3h
153
- # )
154
 
155
- # # Bind change events for CheckboxGroups and sliders
156
- # for component in filter_inputs_3c3h:
157
- # component.change(
158
- # filter_df_3c3h,
159
- # inputs=filter_inputs_3c3h,
160
- # outputs=leaderboard_3c3h
161
- # )
162
-
163
- submit_gradio_module()
164
 
165
  with gr.Tab("Reranking"):
166
  with gr.Tabs():
167
  with gr.Tab("Leaderboard"):
168
-
169
- with gr.Row():
170
- search_box_tasks = gr.Textbox(
171
  placeholder="Search for models...",
172
  label="Search",
173
  interactive=True
174
  )
175
- with gr.Row():
176
- column_selector_tasks = gr.CheckboxGroup(
177
- choices=[],
178
- value=['Rank', 'Model Name'],
179
- label="Select columns to display",
180
- )
181
- with gr.Row():
182
- license_filter_tasks = gr.CheckboxGroup(
183
- choices=[],
184
- value=[], # Default all selected
185
- label="Filter by License",
186
- )
187
- precision_filter_tasks = gr.CheckboxGroup(
188
- choices=[],
189
- value=[], # Default all selected
190
- label="Filter by Precision",
191
- )
192
- # with gr.Row():
193
- # model_size_min_filter_tasks = gr.Slider(
194
- # minimum=min_model_size_tasks,
195
- # maximum=max_model_size_tasks,
196
- # value=min_model_size_tasks,
197
- # step=1,
198
- # label="Minimum Model Size",
199
- # interactive=True
200
- # )
201
- # model_size_max_filter_tasks = gr.Slider(
202
- # minimum=min_model_size_tasks,
203
- # maximum=max_model_size_tasks,
204
- # value=max_model_size_tasks,
205
- # step=1,
206
- # label="Maximum Model Size",
207
- # interactive=True
208
- # )
209
 
210
- leaderboard_tasks = gr.Dataframe(
211
  df,
212
- # headers="auto", # Automatically use DataFrame's headers
213
- # label="MultiIndex DataFrame",
214
- # interactive=False,
215
- )
216
-
217
- # def filter_df_tasks(search_query, selected_cols, precision_filters, license_filters, min_size, max_size):
218
- # filtered_df = df_tasks.copy()
219
-
220
- # # Ensure min_size <= max_size
221
- # if min_size > max_size:
222
- # min_size, max_size = max_size, min_size
223
-
224
- # # Apply search filter
225
- # if search_query:
226
- # filtered_df = filtered_df[filtered_df['Model Name'].str.contains(search_query, case=False, na=False)]
227
-
228
- # # Apply Precision filter
229
- # if precision_filters:
230
- # include_missing = 'Missing' in precision_filters
231
- # selected_precisions = [p for p in precision_filters if p != 'Missing']
232
- # if include_missing:
233
- # filtered_df = filtered_df[
234
- # (filtered_df['Precision'].isin(selected_precisions)) |
235
- # (filtered_df['Precision'] == 'UNK') |
236
- # (filtered_df['Precision'].isna())
237
- # ]
238
- # else:
239
- # filtered_df = filtered_df[filtered_df['Precision'].isin(selected_precisions)]
240
-
241
- # # Apply License filter
242
- # if license_filters:
243
- # include_missing = 'Missing' in license_filters
244
- # selected_licenses = [l for l in license_filters if l != 'Missing']
245
- # if include_missing:
246
- # filtered_df = filtered_df[
247
- # (filtered_df['License'].isin(selected_licenses)) |
248
- # (filtered_df['License'] == 'UNK') |
249
- # (filtered_df['License'].isna())
250
- # ]
251
- # else:
252
- # filtered_df = filtered_df[filtered_df['License'].isin(selected_licenses)]
253
-
254
- # # Apply Model Size filter
255
- # filtered_df = filtered_df[
256
- # (filtered_df['Model Size Filter'] >= min_size) &
257
- # (filtered_df['Model Size Filter'] <= max_size)
258
- # ]
259
-
260
- # # Remove existing 'Rank' column if present
261
- # if 'Rank' in filtered_df.columns:
262
- # filtered_df = filtered_df.drop(columns=['Rank'])
263
-
264
- # # Sort by the first task column if it exists
265
- # if task_columns:
266
- # first_task = task_columns[0]
267
- # filtered_df = filtered_df.sort_values(by=first_task, ascending=False)
268
- # else:
269
- # filtered_df = filtered_df.sort_values(by='Model Name', ascending=True)
270
-
271
- # # Recalculate Rank after filtering
272
- # filtered_df = filtered_df.reset_index(drop=True)
273
- # filtered_df.insert(0, 'Rank', range(1, len(filtered_df) + 1))
274
-
275
- # # Ensure selected columns are present
276
- # selected_cols = [col for col in selected_cols if col in filtered_df.columns]
277
-
278
- # return filtered_df[selected_cols]
279
-
280
- # # Bind the filter function to the appropriate events
281
- # filter_inputs_tasks = [
282
- # search_box_tasks,
283
- # column_selector_tasks,
284
- # precision_filter_tasks,
285
- # license_filter_tasks,
286
- # model_size_min_filter_tasks,
287
- # model_size_max_filter_tasks
288
- # ]
289
- # search_box_tasks.submit(
290
- # filter_df_tasks,
291
- # inputs=filter_inputs_tasks,
292
- # outputs=leaderboard_tasks
293
- # )
294
-
295
- # # Bind change events for CheckboxGroups and sliders
296
- # for component in filter_inputs_tasks:
297
- # component.change(
298
- # filter_df_tasks,
299
- # inputs=filter_inputs_tasks,
300
- # outputs=leaderboard_tasks
301
- # )
302
 
303
- submit_gradio_module()
 
 
 
 
 
304
 
305
- with gr.Tab("LLM Context Answering"):
306
- with gr.Tabs():
307
- with gr.Tab("Leaderboard"):
308
- pass
309
- with gr.Tab("Submit Here"):
310
- pass
311
 
312
  with gr.Row():
313
  with gr.Accordion("πŸ“™ Citation", open=False):
 
1
  import gradio as gr
2
  from utils import submit_gradio_module, load_retrieval_results
3
+ from fuzzywuzzy import fuzz
4
 
5
  HEADER = """<div style="text-align: center; margin-bottom: 20px;">
6
  <h1>The Arabic RAG Leaderboard</h1>
7
  <p style="font-size: 14px; color: #888;">The only leaderboard you will require for your RAG needs πŸ†</p>
8
  </div>
 
9
  """
10
 
11
  ABOUT_SECTION = """
 
43
  df = load_retrieval_results()
44
  print(df)
45
 
46
+ def search_leaderboard(model_name):
47
+ if not model_name:
48
+ return df
49
+
50
+ threshold = 95 # You can adjust this value to make the search more or less strict
51
+
52
+ def calculate_similarity(row):
53
+ similarity = fuzz.partial_ratio(model_name.lower(), row['model'].lower())
54
+ return similarity if similarity >= threshold else 0
55
+
56
+ # Add a new column for similarity scores
57
+ df['similarity'] = df.apply(calculate_similarity, axis=1)
58
+
59
+ # Filter and sort the dataframe
60
+ filtered_df = df[df['similarity'] > 0].sort_values('similarity', ascending=False)
61
+
62
+ # Remove the similarity column before returning
63
+ filtered_df = filtered_df.drop('similarity', axis=1)
64
+
65
+ return filtered_df
66
+
67
+
68
  def main():
69
 
70
  with gr.Blocks() as demo:
 
81
  interactive=True
82
  )
83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  retrieval_leaderboard = gr.Dataframe(
85
  df,
86
  interactive=False
87
  )
88
 
89
+ # Submit the search box and the leaderboard
90
+ search_box_retrieval.submit(
91
+ search_leaderboard,
92
+ inputs=search_box_retrieval,
93
+ outputs=retrieval_leaderboard
94
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
+ submit_gradio_module("Retriever")
 
 
 
 
 
 
 
 
97
 
98
  with gr.Tab("Reranking"):
99
  with gr.Tabs():
100
  with gr.Tab("Leaderboard"):
101
+ search_box_reranker = gr.Textbox(
 
 
102
  placeholder="Search for models...",
103
  label="Search",
104
  interactive=True
105
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
+ reranker_leaderboard = gr.Dataframe(
108
  df,
109
+ interactive=False,
110
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
+ search_box_reranker.submit(
113
+ search_leaderboard,
114
+ inputs=search_box_reranker,
115
+ outputs=reranker_leaderboard
116
+ )
117
+ submit_gradio_module("Reranker")
118
 
119
+ # with gr.Tab("LLM Context Answering"):
120
+ # with gr.Tabs():
121
+ # with gr.Tab("Leaderboard"):
122
+ # pass
123
+ # submit_gradio_module("LLM")
 
124
 
125
  with gr.Row():
126
  with gr.Accordion("πŸ“™ Citation", open=False):
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ fuzzywuzzy
utils.py CHANGED
@@ -50,7 +50,7 @@ def submit_model(model_name, revision, precision, params, license):
50
  if df_retrieval.empty:
51
  return "**Error: Could not load the retrieval results.**"
52
 
53
- existing_models_results = df_retrieval[['Model', 'Revision', 'Precision']]
54
 
55
  # Handle 'Missing' precision
56
  if precision == 'Missing':
@@ -137,7 +137,7 @@ def load_requests(status_folder):
137
  requests_data = []
138
  folder_path_in_repo = status_folder # 'pending', 'finished', or 'failed'
139
 
140
- hf_api_token = os.environ.get('HF_API_TOKEN', None)
141
 
142
  try:
143
  # List files in the dataset repository
@@ -174,9 +174,9 @@ def load_requests(status_folder):
174
  return df
175
 
176
 
177
- def submit_gradio_module():
178
- with gr.Tab("Submit Model") as submitter_tab:
179
 
 
180
  with gr.Row(equal_height=True):
181
  model_name_input = gr.Textbox(
182
  label="Model",
 
50
  if df_retrieval.empty:
51
  return "**Error: Could not load the retrieval results.**"
52
 
53
+ existing_models_results = df_retrieval[['Model']]
54
 
55
  # Handle 'Missing' precision
56
  if precision == 'Missing':
 
137
  requests_data = []
138
  folder_path_in_repo = status_folder # 'pending', 'finished', or 'failed'
139
 
140
+ hf_api_token = os.environ.get('HF_TOKEN', None)
141
 
142
  try:
143
  # List files in the dataset repository
 
174
  return df
175
 
176
 
177
+ def submit_gradio_module(type):
 
178
 
179
+ with gr.Tab(f"Submit {type}") as submitter_tab:
180
  with gr.Row(equal_height=True):
181
  model_name_input = gr.Textbox(
182
  label="Model",