seonglae-holistic commited on
Commit
f924923
·
1 Parent(s): e51e6f4

fix: duplicated entries with multiple languages

Browse files
Files changed (1) hide show
  1. src/populate.py +17 -20
src/populate.py CHANGED
@@ -7,30 +7,27 @@ from src.leaderboard.read_evals import get_raw_assessment_results
7
 
8
 
9
  def expand_multi_language_entries(df):
10
- """Expand multi-language entries (like 'Python/C++') into separate rows for OR filtering"""
11
  if df.empty or auto_eval_column_attrs.language.name not in df.columns:
12
  return df
13
-
14
- expanded_rows = []
15
 
16
- for idx, row in df.iterrows():
17
- lang_value = row[auto_eval_column_attrs.language.name]
18
-
19
- # If language contains /, create separate rows for each language
20
- if isinstance(lang_value, str) and "/" in lang_value:
21
- languages = [lang.strip() for lang in lang_value.split("/")]
22
- for lang in languages:
23
- new_row = row.copy()
24
- new_row[auto_eval_column_attrs.language.name] = lang
25
- new_row["_original_language"] = lang_value # Keep original for display
26
- expanded_rows.append(new_row)
27
- else:
28
- # Keep single language rows as is
29
- row_copy = row.copy()
30
- row_copy["_original_language"] = lang_value
31
- expanded_rows.append(row_copy)
32
 
33
- return pd.DataFrame(expanded_rows).reset_index(drop=True)
34
 
35
 
36
  def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
 
7
 
8
 
9
  def expand_multi_language_entries(df):
10
+ """Keep multi-language entries as single rows but create individual language columns for filtering"""
11
  if df.empty or auto_eval_column_attrs.language.name not in df.columns:
12
  return df
 
 
13
 
14
+ # Get all unique individual languages
15
+ all_languages = set()
16
+ for value in df[auto_eval_column_attrs.language.name].unique():
17
+ if isinstance(value, str):
18
+ languages = [lang.strip() for lang in value.split("/")]
19
+ all_languages.update(languages)
20
+
21
+ # Create individual language columns for filtering
22
+ for lang in sorted(all_languages):
23
+ if lang: # Skip empty strings
24
+ safe_lang = lang.replace("+", "plus").replace("#", "sharp").replace(" ", "_").lower()
25
+ col_name = f"_lang_{safe_lang}"
26
+ df[col_name] = df[auto_eval_column_attrs.language.name].apply(
27
+ lambda x: lang in str(x) if x is not None else False
28
+ )
 
29
 
30
+ return df
31
 
32
 
33
  def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):