Spaces:
Running
Running
Commit
·
e4dae69
1
Parent(s):
eae0069
removing print statements
Browse files- app.py +34 -9
- src/display/css_html_js.py +9 -2
- src/leaderboard/read_evals.py +4 -4
- src/populate.py +1 -4
app.py
CHANGED
@@ -90,7 +90,19 @@ def init_leaderboard(dataframe):
|
|
90 |
interactive=False,
|
91 |
)
|
92 |
|
93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
leaderboard_dataframes = {
|
96 |
region: get_leaderboard_df(
|
@@ -104,15 +116,28 @@ leaderboard_dataframes = {
|
|
104 |
}
|
105 |
# Preload leaderboard blocks
|
106 |
js_switch_code = """
|
107 |
-
(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
document.querySelectorAll('[id^="leaderboard-"]').forEach(el => el.classList.remove("visible"));
|
109 |
-
const target = document.getElementById(
|
110 |
if (target) {
|
111 |
target.classList.add("visible");
|
112 |
}
|
113 |
}
|
114 |
"""
|
115 |
|
|
|
116 |
demo = gr.Blocks(css=custom_css)
|
117 |
with demo:
|
118 |
gr.HTML(TITLE)
|
@@ -122,19 +147,19 @@ with demo:
|
|
122 |
with gr.TabItem("🏅 mSTEB Text Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
123 |
with gr.Row():
|
124 |
region_dropdown = gr.Dropdown(
|
125 |
-
choices=
|
126 |
label="Select Region",
|
127 |
value="All",
|
128 |
interactive=True,
|
129 |
)
|
130 |
|
131 |
# Region-specific leaderboard containers
|
132 |
-
for
|
133 |
with gr.Column(
|
134 |
-
elem_id=f"leaderboard-{
|
135 |
-
elem_classes=["visible"] if
|
136 |
-
)
|
137 |
-
init_leaderboard(leaderboard_dataframes[
|
138 |
|
139 |
# JS hook to toggle visible leaderboard
|
140 |
region_dropdown.change(None, js=js_switch_code, inputs=[region_dropdown])
|
|
|
90 |
interactive=False,
|
91 |
)
|
92 |
|
93 |
+
REGION_MAP = {
|
94 |
+
"All": "All",
|
95 |
+
"Africa": "Africa",
|
96 |
+
"Americas/Oceania": "Americas_Oceania",
|
97 |
+
"Asia (S)": "Asia_S",
|
98 |
+
"Asia (SE)": "Asia_SE",
|
99 |
+
"Asia (W, C)": "Asia_W_C",
|
100 |
+
"Asia (E)": "Asia_E",
|
101 |
+
"Europe (W, N, S)": "Europe_W_N_S",
|
102 |
+
"Europe (E)": "Europe_E",
|
103 |
+
}
|
104 |
+
|
105 |
+
REGIONS = ["All", "Africa", "Americas_Oceania", "Asia_S", "Asia_SE", "Asia_W_C", "Asia_E", "Europe_W_N_S", "Europe_E"]
|
106 |
|
107 |
leaderboard_dataframes = {
|
108 |
region: get_leaderboard_df(
|
|
|
116 |
}
|
117 |
# Preload leaderboard blocks
|
118 |
js_switch_code = """
|
119 |
+
(displayRegion) => {
|
120 |
+
const regionMap = {
|
121 |
+
"All": "All",
|
122 |
+
"Africa": "Africa",
|
123 |
+
"Americas/Oceania": "Americas_Oceania",
|
124 |
+
"Asia (S)": "Asia_S",
|
125 |
+
"Asia (SE)": "Asia_SE",
|
126 |
+
"Asia (W, C)": "Asia_W_C",
|
127 |
+
"Asia (E)": "Asia_E",
|
128 |
+
"Europe (W, N, S)": "Europe_W_N_S",
|
129 |
+
"Europe (E)": "Europe_E"
|
130 |
+
};
|
131 |
+
const region = regionMap[displayRegion];
|
132 |
document.querySelectorAll('[id^="leaderboard-"]').forEach(el => el.classList.remove("visible"));
|
133 |
+
const target = document.getElementById("leaderboard-" + region);
|
134 |
if (target) {
|
135 |
target.classList.add("visible");
|
136 |
}
|
137 |
}
|
138 |
"""
|
139 |
|
140 |
+
|
141 |
demo = gr.Blocks(css=custom_css)
|
142 |
with demo:
|
143 |
gr.HTML(TITLE)
|
|
|
147 |
with gr.TabItem("🏅 mSTEB Text Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
148 |
with gr.Row():
|
149 |
region_dropdown = gr.Dropdown(
|
150 |
+
choices=list(REGION_MAP.keys()),
|
151 |
label="Select Region",
|
152 |
value="All",
|
153 |
interactive=True,
|
154 |
)
|
155 |
|
156 |
# Region-specific leaderboard containers
|
157 |
+
for display_name, region_key in REGION_MAP.items():
|
158 |
with gr.Column(
|
159 |
+
elem_id=f"leaderboard-{region_key}",
|
160 |
+
elem_classes=["visible"] if region_key == "All" else []
|
161 |
+
):
|
162 |
+
init_leaderboard(leaderboard_dataframes[region_key])
|
163 |
|
164 |
# JS hook to toggle visible leaderboard
|
165 |
region_dropdown.change(None, js=js_switch_code, inputs=[region_dropdown])
|
src/display/css_html_js.py
CHANGED
@@ -98,10 +98,17 @@ custom_css = """
|
|
98 |
display: none;
|
99 |
}
|
100 |
#leaderboard-All.visible,
|
101 |
-
#leaderboard-
|
102 |
-
#leaderboard-
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
display: block;
|
104 |
}
|
|
|
105 |
"""
|
106 |
|
107 |
get_window_url_params = """
|
|
|
98 |
display: none;
|
99 |
}
|
100 |
#leaderboard-All.visible,
|
101 |
+
#leaderboard-Africa.visible,
|
102 |
+
#leaderboard-Americas_Oceania.visible,
|
103 |
+
#leaderboard-Asia_S.visible,
|
104 |
+
#leaderboard-Asia_SE.visible,
|
105 |
+
#leaderboard-Asia_W_C.visible,
|
106 |
+
#leaderboard-Asia_E.visible,
|
107 |
+
#leaderboard-Europe_W_N_S.visible,
|
108 |
+
#leaderboard-Europe_E.visible {
|
109 |
display: block;
|
110 |
}
|
111 |
+
|
112 |
"""
|
113 |
|
114 |
get_window_url_params = """
|
src/leaderboard/read_evals.py
CHANGED
@@ -207,8 +207,8 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
207 |
for model_result_filepath in model_result_filepaths:
|
208 |
# Creation of result
|
209 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
210 |
-
print('testing this one')
|
211 |
-
print(eval_result)
|
212 |
eval_result.update_with_request_file(requests_path)
|
213 |
|
214 |
# Store results of same eval together
|
@@ -225,7 +225,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
225 |
results.append(v)
|
226 |
except KeyError: # not all eval values present
|
227 |
continue
|
228 |
-
print('results')
|
229 |
-
print(results)
|
230 |
|
231 |
return results
|
|
|
207 |
for model_result_filepath in model_result_filepaths:
|
208 |
# Creation of result
|
209 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
210 |
+
# print('testing this one')
|
211 |
+
# print(eval_result)
|
212 |
eval_result.update_with_request_file(requests_path)
|
213 |
|
214 |
# Store results of same eval together
|
|
|
225 |
results.append(v)
|
226 |
except KeyError: # not all eval values present
|
227 |
continue
|
228 |
+
# print('results')
|
229 |
+
# print(results)
|
230 |
|
231 |
return results
|
src/populate.py
CHANGED
@@ -14,15 +14,12 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
14 |
# this here if region is none gets main results. I have to pass region value here to get region based results
|
15 |
# and they should come.
|
16 |
all_data_json = [v.to_dict(region) for v in raw_data]
|
17 |
-
print('all_data_json', all_data_json)
|
18 |
df = pd.DataFrame.from_records(all_data_json)
|
19 |
-
print('df', df)
|
20 |
df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
21 |
df = df[cols].round(decimals=2)
|
22 |
-
print('df after sorting', df)
|
23 |
# filter out if any of the benchmarks have not been produced
|
24 |
df = df[has_no_nan_values(df, benchmark_cols)]
|
25 |
-
print('df after filtering', df)
|
26 |
return df
|
27 |
|
28 |
|
|
|
14 |
# this here if region is none gets main results. I have to pass region value here to get region based results
|
15 |
# and they should come.
|
16 |
all_data_json = [v.to_dict(region) for v in raw_data]
|
17 |
+
# print('all_data_json', all_data_json)
|
18 |
df = pd.DataFrame.from_records(all_data_json)
|
|
|
19 |
df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
20 |
df = df[cols].round(decimals=2)
|
|
|
21 |
# filter out if any of the benchmarks have not been produced
|
22 |
df = df[has_no_nan_values(df, benchmark_cols)]
|
|
|
23 |
return df
|
24 |
|
25 |
|