Spaces:
Running
Running
Commit
·
f924923
1
Parent(s):
e51e6f4
fix: duplicated entries with multiple languages
Browse files- src/populate.py +17 -20
src/populate.py
CHANGED
|
@@ -7,30 +7,27 @@ from src.leaderboard.read_evals import get_raw_assessment_results
|
|
| 7 |
|
| 8 |
|
| 9 |
def expand_multi_language_entries(df):
|
| 10 |
-
"""
|
| 11 |
if df.empty or auto_eval_column_attrs.language.name not in df.columns:
|
| 12 |
return df
|
| 13 |
-
|
| 14 |
-
expanded_rows = []
|
| 15 |
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
expanded_rows.append(row_copy)
|
| 32 |
|
| 33 |
-
return
|
| 34 |
|
| 35 |
|
| 36 |
def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
|
|
|
|
| 7 |
|
| 8 |
|
| 9 |
def expand_multi_language_entries(df):
|
| 10 |
+
"""Keep multi-language entries as single rows but create individual language columns for filtering"""
|
| 11 |
if df.empty or auto_eval_column_attrs.language.name not in df.columns:
|
| 12 |
return df
|
|
|
|
|
|
|
| 13 |
|
| 14 |
+
# Get all unique individual languages
|
| 15 |
+
all_languages = set()
|
| 16 |
+
for value in df[auto_eval_column_attrs.language.name].unique():
|
| 17 |
+
if isinstance(value, str):
|
| 18 |
+
languages = [lang.strip() for lang in value.split("/")]
|
| 19 |
+
all_languages.update(languages)
|
| 20 |
+
|
| 21 |
+
# Create individual language columns for filtering
|
| 22 |
+
for lang in sorted(all_languages):
|
| 23 |
+
if lang: # Skip empty strings
|
| 24 |
+
safe_lang = lang.replace("+", "plus").replace("#", "sharp").replace(" ", "_").lower()
|
| 25 |
+
col_name = f"_lang_{safe_lang}"
|
| 26 |
+
df[col_name] = df[auto_eval_column_attrs.language.name].apply(
|
| 27 |
+
lambda x: lang in str(x) if x is not None else False
|
| 28 |
+
)
|
|
|
|
| 29 |
|
| 30 |
+
return df
|
| 31 |
|
| 32 |
|
| 33 |
def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
|