Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Alina Lozovskaia
commited on
Commit
·
6b9cbbe
1
Parent(s):
2293858
Updated populate.py
Browse files- src/populate.py +55 -46
src/populate.py
CHANGED
@@ -1,65 +1,74 @@
|
|
1 |
import json
|
2 |
import os
|
3 |
-
|
4 |
import pandas as pd
|
5 |
-
|
6 |
from src.display.formatting import has_no_nan_values, make_clickable_model
|
7 |
from src.display.utils import AutoEvalColumn, EvalQueueColumn, baseline_row
|
8 |
from src.leaderboard.filter_models import filter_models_flags
|
9 |
from src.leaderboard.read_evals import get_raw_eval_results
|
10 |
|
11 |
|
12 |
-
def
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
|
|
19 |
|
20 |
-
df = pd.DataFrame.from_records(all_data_json)
|
21 |
-
df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
22 |
-
df = df[cols].round(decimals=2)
|
23 |
|
24 |
-
|
25 |
-
|
26 |
-
|
|
|
|
|
27 |
|
28 |
|
29 |
-
def get_evaluation_queue_df(save_path
|
30 |
-
|
31 |
all_evals = []
|
32 |
-
|
33 |
for entry in entries:
|
34 |
-
if ".
|
35 |
-
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
-
|
43 |
-
elif ".md" not in entry:
|
44 |
-
# this is a folder
|
45 |
-
sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if not e.startswith(".")]
|
46 |
-
for sub_entry in sub_entries:
|
47 |
-
file_path = os.path.join(save_path, entry, sub_entry)
|
48 |
-
with open(file_path) as fp:
|
49 |
-
try:
|
50 |
-
data = json.load(fp)
|
51 |
-
except json.JSONDecodeError:
|
52 |
-
print(f"Error reading {file_path}")
|
53 |
-
continue
|
54 |
|
55 |
-
data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
|
56 |
-
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
|
57 |
-
all_evals.append(data)
|
58 |
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
|
|
|
|
|
|
|
1 |
import json
|
2 |
import os
|
|
|
3 |
import pandas as pd
|
|
|
4 |
from src.display.formatting import has_no_nan_values, make_clickable_model
|
5 |
from src.display.utils import AutoEvalColumn, EvalQueueColumn, baseline_row
|
6 |
from src.leaderboard.filter_models import filter_models_flags
|
7 |
from src.leaderboard.read_evals import get_raw_eval_results
|
8 |
|
9 |
|
10 |
+
def _load_json_data(file_path):
|
11 |
+
"""Safely load JSON data from a file."""
|
12 |
+
try:
|
13 |
+
with open(file_path, "r") as file:
|
14 |
+
return json.load(file)
|
15 |
+
except json.JSONDecodeError:
|
16 |
+
print(f"Error reading JSON from {file_path}")
|
17 |
+
return None # Or raise an exception
|
18 |
|
|
|
|
|
|
|
19 |
|
20 |
+
def _process_model_data(entry, model_name_key="model", revision_key="revision"):
|
21 |
+
"""Enrich model data with clickable links and revisions."""
|
22 |
+
entry[EvalQueueColumn.model.name] = make_clickable_model(entry.get(model_name_key, ""))
|
23 |
+
entry[EvalQueueColumn.revision.name] = entry.get(revision_key, "main")
|
24 |
+
return entry
|
25 |
|
26 |
|
27 |
+
def get_evaluation_queue_df(save_path, cols):
|
28 |
+
"""Generate dataframes for pending, running, and finished evaluation entries."""
|
29 |
all_evals = []
|
30 |
+
entries = os.listdir(save_path)
|
31 |
for entry in entries:
|
32 |
+
if entry.startswith(".") or entry.endswith(".md"):
|
33 |
+
continue
|
34 |
+
file_path = os.path.join(save_path, entry)
|
35 |
+
if os.path.isfile(file_path): # Check if it's a file
|
36 |
+
data = _load_json_data(file_path)
|
37 |
+
if data:
|
38 |
+
all_evals.append(_process_model_data(data))
|
39 |
+
else:
|
40 |
+
# Optionally handle directory contents if needed
|
41 |
+
sub_entries = os.listdir(file_path)
|
42 |
+
for sub_entry in sub_entries:
|
43 |
+
sub_file_path = os.path.join(file_path, sub_entry)
|
44 |
+
if os.path.isfile(sub_file_path):
|
45 |
+
data = _load_json_data(sub_file_path)
|
46 |
+
if data:
|
47 |
+
all_evals.append(_process_model_data(data))
|
48 |
|
49 |
+
# Organizing data by status
|
50 |
+
status_map = {
|
51 |
+
"PENDING": ["PENDING", "RERUN"],
|
52 |
+
"RUNNING": ["RUNNING"],
|
53 |
+
"FINISHED": ["FINISHED", "PENDING_NEW_EVAL"],
|
54 |
+
}
|
55 |
+
status_dfs = {status: [] for status in status_map}
|
56 |
+
for eval_data in all_evals:
|
57 |
+
for status, extra_statuses in status_map.items():
|
58 |
+
if eval_data["status"] in extra_statuses:
|
59 |
+
status_dfs[status].append(eval_data)
|
60 |
|
61 |
+
return tuple(pd.DataFrame(status_dfs[status], columns=cols) for status in ["FINISHED", "RUNNING", "PENDING"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
|
|
|
|
|
|
63 |
|
64 |
+
def get_leaderboard_df(results_path, requests_path, dynamic_path, cols, benchmark_cols):
|
65 |
+
"""Retrieve and process leaderboard data."""
|
66 |
+
raw_data = get_raw_eval_results(results_path, requests_path, dynamic_path)
|
67 |
+
all_data_json = [model.to_dict() for model in raw_data] + [baseline_row]
|
68 |
+
filter_models_flags(all_data_json)
|
69 |
+
|
70 |
+
df = pd.DataFrame.from_records(all_data_json)
|
71 |
+
df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
72 |
+
df = df[cols].round(decimals=2)
|
73 |
+
df = df[has_no_nan_values(df, benchmark_cols)]
|
74 |
+
return raw_data, df
|