Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
Β·
3a93505
1
Parent(s):
f21842d
Add fuzzywuzzy dependency and update model submission functions in utils.py
Browse files- app.py +45 -232
- requirements.txt +1 -0
- utils.py +4 -4
app.py
CHANGED
@@ -1,12 +1,11 @@
|
|
1 |
import gradio as gr
|
2 |
from utils import submit_gradio_module, load_retrieval_results
|
3 |
-
|
4 |
|
5 |
HEADER = """<div style="text-align: center; margin-bottom: 20px;">
|
6 |
<h1>The Arabic RAG Leaderboard</h1>
|
7 |
<p style="font-size: 14px; color: #888;">The only leaderboard you will require for your RAG needs π</p>
|
8 |
</div>
|
9 |
-
|
10 |
"""
|
11 |
|
12 |
ABOUT_SECTION = """
|
@@ -44,6 +43,28 @@ CITATION_BUTTON_TEXT = """
|
|
44 |
df = load_retrieval_results()
|
45 |
print(df)
|
46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
def main():
|
48 |
|
49 |
with gr.Blocks() as demo:
|
@@ -60,254 +81,46 @@ def main():
|
|
60 |
interactive=True
|
61 |
)
|
62 |
|
63 |
-
with gr.Row():
|
64 |
-
column_selector_tasks = gr.CheckboxGroup(
|
65 |
-
choices=[],
|
66 |
-
value=['Rank', 'Model Name'],
|
67 |
-
label="Select columns to display",
|
68 |
-
)
|
69 |
-
|
70 |
-
with gr.Row():
|
71 |
-
license_filter_retrieval = gr.CheckboxGroup(
|
72 |
-
choices=[],
|
73 |
-
value=[], # Default all selected
|
74 |
-
label="Filter by License",
|
75 |
-
)
|
76 |
-
precision_filter_retrieval = gr.CheckboxGroup(
|
77 |
-
choices=[],
|
78 |
-
value=[], # Default all selected
|
79 |
-
label="Filter by Precision",
|
80 |
-
)
|
81 |
-
|
82 |
retrieval_leaderboard = gr.Dataframe(
|
83 |
df,
|
84 |
interactive=False
|
85 |
)
|
86 |
|
87 |
-
#
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
# # Apply search filter
|
95 |
-
# if search_query:
|
96 |
-
# filtered_df = filtered_df[filtered_df['Model Name'].str.contains(search_query, case=False, na=False)]
|
97 |
-
|
98 |
-
# # Apply Precision filter
|
99 |
-
# if precision_filters:
|
100 |
-
# include_missing = 'Missing' in precision_filters
|
101 |
-
# selected_precisions = [p for p in precision_filters if p != 'Missing']
|
102 |
-
# if include_missing:
|
103 |
-
# filtered_df = filtered_df[
|
104 |
-
# (filtered_df['Precision'].isin(selected_precisions)) |
|
105 |
-
# (filtered_df['Precision'] == 'UNK') |
|
106 |
-
# (filtered_df['Precision'].isna())
|
107 |
-
# ]
|
108 |
-
# else:
|
109 |
-
# filtered_df = filtered_df[filtered_df['Precision'].isin(selected_precisions)]
|
110 |
-
|
111 |
-
# # Apply License filter
|
112 |
-
# if license_filters:
|
113 |
-
# include_missing = 'Missing' in license_filters
|
114 |
-
# selected_licenses = [l for l in license_filters if l != 'Missing']
|
115 |
-
# if include_missing:
|
116 |
-
# filtered_df = filtered_df[
|
117 |
-
# (filtered_df['License'].isin(selected_licenses)) |
|
118 |
-
# (filtered_df['License'] == 'UNK') |
|
119 |
-
# (filtered_df['License'].isna())
|
120 |
-
# ]
|
121 |
-
# else:
|
122 |
-
# filtered_df = filtered_df[filtered_df['License'].isin(selected_licenses)]
|
123 |
-
|
124 |
-
# # Apply Model Size filter
|
125 |
-
# filtered_df = filtered_df[
|
126 |
-
# (filtered_df['Model Size Filter'] >= min_size) &
|
127 |
-
# (filtered_df['Model Size Filter'] <= max_size)
|
128 |
-
# ]
|
129 |
-
|
130 |
-
# # Remove existing 'Rank' column if present
|
131 |
-
# if 'Rank' in filtered_df.columns:
|
132 |
-
# filtered_df = filtered_df.drop(columns=['Rank'])
|
133 |
-
|
134 |
-
# # Recalculate Rank after filtering
|
135 |
-
# filtered_df = filtered_df.reset_index(drop=True)
|
136 |
-
# filtered_df.insert(0, 'Rank', range(1, len(filtered_df) + 1))
|
137 |
-
|
138 |
-
# # Ensure selected columns are present
|
139 |
-
# selected_cols = [col for col in selected_cols if col in filtered_df.columns]
|
140 |
-
|
141 |
-
# return filtered_df[selected_cols]
|
142 |
-
|
143 |
-
# # Bind the filter function to the appropriate events
|
144 |
-
# filter_inputs_3c3h = [
|
145 |
-
# search_box_retrieval,
|
146 |
-
# precision_filter_retrieval,
|
147 |
-
# license_filter_retrieval,
|
148 |
-
# ]
|
149 |
-
# search_box_retrieval.submit(
|
150 |
-
# filter_df_3c3h,
|
151 |
-
# inputs=filter_inputs_3c3h,
|
152 |
-
# outputs=leaderboard_3c3h
|
153 |
-
# )
|
154 |
|
155 |
-
|
156 |
-
# for component in filter_inputs_3c3h:
|
157 |
-
# component.change(
|
158 |
-
# filter_df_3c3h,
|
159 |
-
# inputs=filter_inputs_3c3h,
|
160 |
-
# outputs=leaderboard_3c3h
|
161 |
-
# )
|
162 |
-
|
163 |
-
submit_gradio_module()
|
164 |
|
165 |
with gr.Tab("Reranking"):
|
166 |
with gr.Tabs():
|
167 |
with gr.Tab("Leaderboard"):
|
168 |
-
|
169 |
-
with gr.Row():
|
170 |
-
search_box_tasks = gr.Textbox(
|
171 |
placeholder="Search for models...",
|
172 |
label="Search",
|
173 |
interactive=True
|
174 |
)
|
175 |
-
with gr.Row():
|
176 |
-
column_selector_tasks = gr.CheckboxGroup(
|
177 |
-
choices=[],
|
178 |
-
value=['Rank', 'Model Name'],
|
179 |
-
label="Select columns to display",
|
180 |
-
)
|
181 |
-
with gr.Row():
|
182 |
-
license_filter_tasks = gr.CheckboxGroup(
|
183 |
-
choices=[],
|
184 |
-
value=[], # Default all selected
|
185 |
-
label="Filter by License",
|
186 |
-
)
|
187 |
-
precision_filter_tasks = gr.CheckboxGroup(
|
188 |
-
choices=[],
|
189 |
-
value=[], # Default all selected
|
190 |
-
label="Filter by Precision",
|
191 |
-
)
|
192 |
-
# with gr.Row():
|
193 |
-
# model_size_min_filter_tasks = gr.Slider(
|
194 |
-
# minimum=min_model_size_tasks,
|
195 |
-
# maximum=max_model_size_tasks,
|
196 |
-
# value=min_model_size_tasks,
|
197 |
-
# step=1,
|
198 |
-
# label="Minimum Model Size",
|
199 |
-
# interactive=True
|
200 |
-
# )
|
201 |
-
# model_size_max_filter_tasks = gr.Slider(
|
202 |
-
# minimum=min_model_size_tasks,
|
203 |
-
# maximum=max_model_size_tasks,
|
204 |
-
# value=max_model_size_tasks,
|
205 |
-
# step=1,
|
206 |
-
# label="Maximum Model Size",
|
207 |
-
# interactive=True
|
208 |
-
# )
|
209 |
|
210 |
-
|
211 |
df,
|
212 |
-
|
213 |
-
|
214 |
-
# interactive=False,
|
215 |
-
)
|
216 |
-
|
217 |
-
# def filter_df_tasks(search_query, selected_cols, precision_filters, license_filters, min_size, max_size):
|
218 |
-
# filtered_df = df_tasks.copy()
|
219 |
-
|
220 |
-
# # Ensure min_size <= max_size
|
221 |
-
# if min_size > max_size:
|
222 |
-
# min_size, max_size = max_size, min_size
|
223 |
-
|
224 |
-
# # Apply search filter
|
225 |
-
# if search_query:
|
226 |
-
# filtered_df = filtered_df[filtered_df['Model Name'].str.contains(search_query, case=False, na=False)]
|
227 |
-
|
228 |
-
# # Apply Precision filter
|
229 |
-
# if precision_filters:
|
230 |
-
# include_missing = 'Missing' in precision_filters
|
231 |
-
# selected_precisions = [p for p in precision_filters if p != 'Missing']
|
232 |
-
# if include_missing:
|
233 |
-
# filtered_df = filtered_df[
|
234 |
-
# (filtered_df['Precision'].isin(selected_precisions)) |
|
235 |
-
# (filtered_df['Precision'] == 'UNK') |
|
236 |
-
# (filtered_df['Precision'].isna())
|
237 |
-
# ]
|
238 |
-
# else:
|
239 |
-
# filtered_df = filtered_df[filtered_df['Precision'].isin(selected_precisions)]
|
240 |
-
|
241 |
-
# # Apply License filter
|
242 |
-
# if license_filters:
|
243 |
-
# include_missing = 'Missing' in license_filters
|
244 |
-
# selected_licenses = [l for l in license_filters if l != 'Missing']
|
245 |
-
# if include_missing:
|
246 |
-
# filtered_df = filtered_df[
|
247 |
-
# (filtered_df['License'].isin(selected_licenses)) |
|
248 |
-
# (filtered_df['License'] == 'UNK') |
|
249 |
-
# (filtered_df['License'].isna())
|
250 |
-
# ]
|
251 |
-
# else:
|
252 |
-
# filtered_df = filtered_df[filtered_df['License'].isin(selected_licenses)]
|
253 |
-
|
254 |
-
# # Apply Model Size filter
|
255 |
-
# filtered_df = filtered_df[
|
256 |
-
# (filtered_df['Model Size Filter'] >= min_size) &
|
257 |
-
# (filtered_df['Model Size Filter'] <= max_size)
|
258 |
-
# ]
|
259 |
-
|
260 |
-
# # Remove existing 'Rank' column if present
|
261 |
-
# if 'Rank' in filtered_df.columns:
|
262 |
-
# filtered_df = filtered_df.drop(columns=['Rank'])
|
263 |
-
|
264 |
-
# # Sort by the first task column if it exists
|
265 |
-
# if task_columns:
|
266 |
-
# first_task = task_columns[0]
|
267 |
-
# filtered_df = filtered_df.sort_values(by=first_task, ascending=False)
|
268 |
-
# else:
|
269 |
-
# filtered_df = filtered_df.sort_values(by='Model Name', ascending=True)
|
270 |
-
|
271 |
-
# # Recalculate Rank after filtering
|
272 |
-
# filtered_df = filtered_df.reset_index(drop=True)
|
273 |
-
# filtered_df.insert(0, 'Rank', range(1, len(filtered_df) + 1))
|
274 |
-
|
275 |
-
# # Ensure selected columns are present
|
276 |
-
# selected_cols = [col for col in selected_cols if col in filtered_df.columns]
|
277 |
-
|
278 |
-
# return filtered_df[selected_cols]
|
279 |
-
|
280 |
-
# # Bind the filter function to the appropriate events
|
281 |
-
# filter_inputs_tasks = [
|
282 |
-
# search_box_tasks,
|
283 |
-
# column_selector_tasks,
|
284 |
-
# precision_filter_tasks,
|
285 |
-
# license_filter_tasks,
|
286 |
-
# model_size_min_filter_tasks,
|
287 |
-
# model_size_max_filter_tasks
|
288 |
-
# ]
|
289 |
-
# search_box_tasks.submit(
|
290 |
-
# filter_df_tasks,
|
291 |
-
# inputs=filter_inputs_tasks,
|
292 |
-
# outputs=leaderboard_tasks
|
293 |
-
# )
|
294 |
-
|
295 |
-
# # Bind change events for CheckboxGroups and sliders
|
296 |
-
# for component in filter_inputs_tasks:
|
297 |
-
# component.change(
|
298 |
-
# filter_df_tasks,
|
299 |
-
# inputs=filter_inputs_tasks,
|
300 |
-
# outputs=leaderboard_tasks
|
301 |
-
# )
|
302 |
|
303 |
-
|
|
|
|
|
|
|
|
|
|
|
304 |
|
305 |
-
with gr.Tab("LLM Context Answering"):
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
pass
|
311 |
|
312 |
with gr.Row():
|
313 |
with gr.Accordion("π Citation", open=False):
|
|
|
1 |
import gradio as gr
|
2 |
from utils import submit_gradio_module, load_retrieval_results
|
3 |
+
from fuzzywuzzy import fuzz
|
4 |
|
5 |
HEADER = """<div style="text-align: center; margin-bottom: 20px;">
|
6 |
<h1>The Arabic RAG Leaderboard</h1>
|
7 |
<p style="font-size: 14px; color: #888;">The only leaderboard you will require for your RAG needs π</p>
|
8 |
</div>
|
|
|
9 |
"""
|
10 |
|
11 |
ABOUT_SECTION = """
|
|
|
43 |
df = load_retrieval_results()
|
44 |
print(df)
|
45 |
|
46 |
+
def search_leaderboard(model_name):
|
47 |
+
if not model_name:
|
48 |
+
return df
|
49 |
+
|
50 |
+
threshold = 95 # You can adjust this value to make the search more or less strict
|
51 |
+
|
52 |
+
def calculate_similarity(row):
|
53 |
+
similarity = fuzz.partial_ratio(model_name.lower(), row['model'].lower())
|
54 |
+
return similarity if similarity >= threshold else 0
|
55 |
+
|
56 |
+
# Add a new column for similarity scores
|
57 |
+
df['similarity'] = df.apply(calculate_similarity, axis=1)
|
58 |
+
|
59 |
+
# Filter and sort the dataframe
|
60 |
+
filtered_df = df[df['similarity'] > 0].sort_values('similarity', ascending=False)
|
61 |
+
|
62 |
+
# Remove the similarity column before returning
|
63 |
+
filtered_df = filtered_df.drop('similarity', axis=1)
|
64 |
+
|
65 |
+
return filtered_df
|
66 |
+
|
67 |
+
|
68 |
def main():
|
69 |
|
70 |
with gr.Blocks() as demo:
|
|
|
81 |
interactive=True
|
82 |
)
|
83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
retrieval_leaderboard = gr.Dataframe(
|
85 |
df,
|
86 |
interactive=False
|
87 |
)
|
88 |
|
89 |
+
# Submit the search box and the leaderboard
|
90 |
+
search_box_retrieval.submit(
|
91 |
+
search_leaderboard,
|
92 |
+
inputs=search_box_retrieval,
|
93 |
+
outputs=retrieval_leaderboard
|
94 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
+
submit_gradio_module("Retriever")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
|
98 |
with gr.Tab("Reranking"):
|
99 |
with gr.Tabs():
|
100 |
with gr.Tab("Leaderboard"):
|
101 |
+
search_box_reranker = gr.Textbox(
|
|
|
|
|
102 |
placeholder="Search for models...",
|
103 |
label="Search",
|
104 |
interactive=True
|
105 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
|
107 |
+
reranker_leaderboard = gr.Dataframe(
|
108 |
df,
|
109 |
+
interactive=False,
|
110 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
|
112 |
+
search_box_reranker.submit(
|
113 |
+
search_leaderboard,
|
114 |
+
inputs=search_box_reranker,
|
115 |
+
outputs=reranker_leaderboard
|
116 |
+
)
|
117 |
+
submit_gradio_module("Reranker")
|
118 |
|
119 |
+
# with gr.Tab("LLM Context Answering"):
|
120 |
+
# with gr.Tabs():
|
121 |
+
# with gr.Tab("Leaderboard"):
|
122 |
+
# pass
|
123 |
+
# submit_gradio_module("LLM")
|
|
|
124 |
|
125 |
with gr.Row():
|
126 |
with gr.Accordion("π Citation", open=False):
|
requirements.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
fuzzywuzzy
|
utils.py
CHANGED
@@ -50,7 +50,7 @@ def submit_model(model_name, revision, precision, params, license):
|
|
50 |
if df_retrieval.empty:
|
51 |
return "**Error: Could not load the retrieval results.**"
|
52 |
|
53 |
-
existing_models_results = df_retrieval[['Model'
|
54 |
|
55 |
# Handle 'Missing' precision
|
56 |
if precision == 'Missing':
|
@@ -137,7 +137,7 @@ def load_requests(status_folder):
|
|
137 |
requests_data = []
|
138 |
folder_path_in_repo = status_folder # 'pending', 'finished', or 'failed'
|
139 |
|
140 |
-
hf_api_token = os.environ.get('
|
141 |
|
142 |
try:
|
143 |
# List files in the dataset repository
|
@@ -174,9 +174,9 @@ def load_requests(status_folder):
|
|
174 |
return df
|
175 |
|
176 |
|
177 |
-
def submit_gradio_module():
|
178 |
-
with gr.Tab("Submit Model") as submitter_tab:
|
179 |
|
|
|
180 |
with gr.Row(equal_height=True):
|
181 |
model_name_input = gr.Textbox(
|
182 |
label="Model",
|
|
|
50 |
if df_retrieval.empty:
|
51 |
return "**Error: Could not load the retrieval results.**"
|
52 |
|
53 |
+
existing_models_results = df_retrieval[['Model']]
|
54 |
|
55 |
# Handle 'Missing' precision
|
56 |
if precision == 'Missing':
|
|
|
137 |
requests_data = []
|
138 |
folder_path_in_repo = status_folder # 'pending', 'finished', or 'failed'
|
139 |
|
140 |
+
hf_api_token = os.environ.get('HF_TOKEN', None)
|
141 |
|
142 |
try:
|
143 |
# List files in the dataset repository
|
|
|
174 |
return df
|
175 |
|
176 |
|
177 |
+
def submit_gradio_module(type):
|
|
|
178 |
|
179 |
+
with gr.Tab(f"Submit {type}") as submitter_tab:
|
180 |
with gr.Row(equal_height=True):
|
181 |
model_name_input = gr.Textbox(
|
182 |
label="Model",
|