Spaces:
Running
Running
Commit
·
f924923
1
Parent(s):
e51e6f4
fix: duplicated entries with multiple languages
Browse files- src/populate.py +17 -20
src/populate.py
CHANGED
@@ -7,30 +7,27 @@ from src.leaderboard.read_evals import get_raw_assessment_results
|
|
7 |
|
8 |
|
9 |
def expand_multi_language_entries(df):
|
10 |
-
"""
|
11 |
if df.empty or auto_eval_column_attrs.language.name not in df.columns:
|
12 |
return df
|
13 |
-
|
14 |
-
expanded_rows = []
|
15 |
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
expanded_rows.append(row_copy)
|
32 |
|
33 |
-
return
|
34 |
|
35 |
|
36 |
def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
|
|
|
7 |
|
8 |
|
9 |
def expand_multi_language_entries(df):
|
10 |
+
"""Keep multi-language entries as single rows but create individual language columns for filtering"""
|
11 |
if df.empty or auto_eval_column_attrs.language.name not in df.columns:
|
12 |
return df
|
|
|
|
|
13 |
|
14 |
+
# Get all unique individual languages
|
15 |
+
all_languages = set()
|
16 |
+
for value in df[auto_eval_column_attrs.language.name].unique():
|
17 |
+
if isinstance(value, str):
|
18 |
+
languages = [lang.strip() for lang in value.split("/")]
|
19 |
+
all_languages.update(languages)
|
20 |
+
|
21 |
+
# Create individual language columns for filtering
|
22 |
+
for lang in sorted(all_languages):
|
23 |
+
if lang: # Skip empty strings
|
24 |
+
safe_lang = lang.replace("+", "plus").replace("#", "sharp").replace(" ", "_").lower()
|
25 |
+
col_name = f"_lang_{safe_lang}"
|
26 |
+
df[col_name] = df[auto_eval_column_attrs.language.name].apply(
|
27 |
+
lambda x: lang in str(x) if x is not None else False
|
28 |
+
)
|
|
|
29 |
|
30 |
+
return df
|
31 |
|
32 |
|
33 |
def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
|