Spaces:
Running
Running
Fix missing results by reading all files
Browse files- app.py +5 -5
- src/results.py +14 -10
app.py
CHANGED
@@ -7,12 +7,12 @@ from src.details import update_subtasks_component, update_load_details_component
|
|
7 |
display_details, update_sample_idx_component, clear_details
|
8 |
from src.results import update_load_results_component, \
|
9 |
load_results_dataframes, display_results, update_tasks_component, clear_results, \
|
10 |
-
|
11 |
|
12 |
|
13 |
# if __name__ == "__main__":
|
14 |
-
|
15 |
-
load_results_dataframes = partial(load_results_dataframes,
|
16 |
|
17 |
with gr.Blocks(fill_height=True) as demo:
|
18 |
gr.HTML("<h1 style='text-align: center;'>Compare Results of the 🤗 Open LLM Leaderboard</h1>")
|
@@ -20,10 +20,10 @@ with gr.Blocks(fill_height=True) as demo:
|
|
20 |
|
21 |
with gr.Row():
|
22 |
with gr.Column():
|
23 |
-
model_id_1 = gr.Dropdown(choices=list(
|
24 |
dataframe_1 = gr.Dataframe(visible=False)
|
25 |
with gr.Column():
|
26 |
-
model_id_2 = gr.Dropdown(choices=list(
|
27 |
dataframe_2 = gr.Dataframe(visible=False)
|
28 |
|
29 |
with gr.Row():
|
|
|
7 |
display_details, update_sample_idx_component, clear_details
|
8 |
from src.results import update_load_results_component, \
|
9 |
load_results_dataframes, display_results, update_tasks_component, clear_results, \
|
10 |
+
sort_result_paths_per_model, fetch_result_paths
|
11 |
|
12 |
|
13 |
# if __name__ == "__main__":
|
14 |
+
result_paths_per_model = sort_result_paths_per_model(fetch_result_paths())
|
15 |
+
load_results_dataframes = partial(load_results_dataframes, result_paths_per_model=result_paths_per_model)
|
16 |
|
17 |
with gr.Blocks(fill_height=True) as demo:
|
18 |
gr.HTML("<h1 style='text-align: center;'>Compare Results of the 🤗 Open LLM Leaderboard</h1>")
|
|
|
20 |
|
21 |
with gr.Row():
|
22 |
with gr.Column():
|
23 |
+
model_id_1 = gr.Dropdown(choices=list(result_paths_per_model.keys()), label="Models")
|
24 |
dataframe_1 = gr.Dataframe(visible=False)
|
25 |
with gr.Column():
|
26 |
+
model_id_2 = gr.Dropdown(choices=list(result_paths_per_model.keys()), label="Models")
|
27 |
dataframe_2 = gr.Dataframe(visible=False)
|
28 |
|
29 |
with gr.Row():
|
src/results.py
CHANGED
@@ -13,35 +13,39 @@ def fetch_result_paths():
|
|
13 |
return paths
|
14 |
|
15 |
|
16 |
-
def
|
17 |
from collections import defaultdict
|
18 |
|
19 |
d = defaultdict(list)
|
20 |
for path in paths:
|
21 |
model_id, _ = path[len(RESULTS_DATASET_ID) + 1:].rsplit("/", 1)
|
22 |
d[model_id].append(path)
|
23 |
-
return {model_id:
|
24 |
|
25 |
|
26 |
def update_load_results_component():
|
27 |
return (gr.Button("Load", interactive=True), ) * 2
|
28 |
|
29 |
|
30 |
-
def load_results_dataframe(model_id,
|
31 |
-
if not model_id or not
|
32 |
return
|
33 |
-
|
34 |
fs = HfFileSystem()
|
35 |
-
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
|
|
38 |
df = pd.json_normalize([{key: value for key, value in data.items()}])
|
39 |
# df.columns = df.columns.str.split(".") # .split return a list instead of a tuple
|
40 |
return df.set_index(pd.Index([model_name])).reset_index()
|
41 |
|
42 |
|
43 |
-
def load_results_dataframes(*model_ids,
|
44 |
-
return [load_results_dataframe(model_id,
|
45 |
|
46 |
|
47 |
def display_results(task, *dfs):
|
|
|
13 |
return paths
|
14 |
|
15 |
|
16 |
+
def sort_result_paths_per_model(paths):
|
17 |
from collections import defaultdict
|
18 |
|
19 |
d = defaultdict(list)
|
20 |
for path in paths:
|
21 |
model_id, _ = path[len(RESULTS_DATASET_ID) + 1:].rsplit("/", 1)
|
22 |
d[model_id].append(path)
|
23 |
+
return {model_id: sorted(paths) for model_id, paths in d.items()}
|
24 |
|
25 |
|
26 |
def update_load_results_component():
|
27 |
return (gr.Button("Load", interactive=True), ) * 2
|
28 |
|
29 |
|
30 |
+
def load_results_dataframe(model_id, result_paths_per_model=None):
|
31 |
+
if not model_id or not result_paths_per_model:
|
32 |
return
|
33 |
+
result_paths = result_paths_per_model[model_id]
|
34 |
fs = HfFileSystem()
|
35 |
+
data = {"results": {}, "configs": {}}
|
36 |
+
for path in result_paths:
|
37 |
+
with fs.open(path, "r") as f:
|
38 |
+
d = json.load(f)
|
39 |
+
data["results"].update(d["results"])
|
40 |
+
data["configs"].update(d["configs"])
|
41 |
+
model_name = d.get("model_name", "Model")
|
42 |
df = pd.json_normalize([{key: value for key, value in data.items()}])
|
43 |
# df.columns = df.columns.str.split(".") # .split return a list instead of a tuple
|
44 |
return df.set_index(pd.Index([model_name])).reset_index()
|
45 |
|
46 |
|
47 |
+
def load_results_dataframes(*model_ids, result_paths_per_model=None):
|
48 |
+
return [load_results_dataframe(model_id, result_paths_per_model=result_paths_per_model) for model_id in model_ids]
|
49 |
|
50 |
|
51 |
def display_results(task, *dfs):
|