seonglae-holistic commited on
Commit
92edcfa
·
1 Parent(s): ca72b36

feat: multi programming language select

Browse files
app.py CHANGED
@@ -81,6 +81,38 @@ initialize_data_directories()
81
  # Load data for leaderboard
82
  LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  # Load queue data
85
  (
86
  finished_eval_queue_df,
@@ -96,30 +128,46 @@ def init_leaderboard(dataframe):
96
  empty_df = pd.DataFrame(columns=pd.Index(all_columns))
97
  print("Warning: Leaderboard DataFrame is empty. Using empty dataframe.")
98
  dataframe = empty_df
99
- filter_columns = [
100
- ColumnFilter(auto_eval_column_attrs.library_type.name, type="checkboxgroup", label="Library types"),
101
- ColumnFilter(auto_eval_column_attrs.language.name, type="checkboxgroup", label="Programming Language"),
102
- ColumnFilter(
103
- auto_eval_column_attrs.stars.name,
104
- type="slider",
105
- min=0,
106
- max=50000,
107
- label="GitHub Stars",
108
- ),
109
- ColumnFilter(
110
- auto_eval_column_attrs.availability.name, type="boolean", label="Show only active libraries" # type: ignore
111
- ),
112
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  return Leaderboard(
114
  value=dataframe,
115
- datatype=[getattr(auto_eval_column_attrs, field).type for field in AutoEvalColumn.model_fields],
116
  select_columns=SelectColumns(
117
  default_selection=[getattr(auto_eval_column_attrs, field).name for field in AutoEvalColumn.model_fields if getattr(auto_eval_column_attrs, field).displayed_by_default],
118
  cant_deselect=[getattr(auto_eval_column_attrs, field).name for field in AutoEvalColumn.model_fields if getattr(auto_eval_column_attrs, field).never_hidden],
119
  label="Select Columns to Display:",
120
  ),
121
  search_columns=[auto_eval_column_attrs.library.name, auto_eval_column_attrs.license_name.name],
122
- hide_columns=[getattr(auto_eval_column_attrs, field).name for field in AutoEvalColumn.model_fields if getattr(auto_eval_column_attrs, field).hidden],
123
  filter_columns=filter_columns, # type: ignore
124
  bool_checkboxgroup_label="Filter libraries",
125
  interactive=False,
@@ -197,8 +245,8 @@ with demo:
197
  language = gr.Dropdown(
198
  choices=[i.value.name for i in Language if i != Language.Other],
199
  label="Programming Language",
200
- multiselect=False,
201
- value="Python",
202
  interactive=True,
203
  )
204
  framework = gr.Textbox(label="Framework/Ecosystem (e.g., PyTorch, React)")
 
81
  # Load data for leaderboard
82
  LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
83
 
84
+ # Extract unique languages for filtering
85
+ def get_unique_languages(df):
86
+ """Extract all unique individual languages from the Language column"""
87
+ if df.empty or auto_eval_column_attrs.language.name not in df.columns:
88
+ return []
89
+
90
+ all_languages = set()
91
+ for value in df[auto_eval_column_attrs.language.name].unique():
92
+ if isinstance(value, str):
93
+ if "/" in value:
94
+ languages = [lang.strip() for lang in value.split("/")]
95
+ all_languages.update(languages)
96
+ else:
97
+ all_languages.add(value.strip())
98
+
99
+ return sorted(list(all_languages))
100
+
101
+ # Create a mapping for language filtering
102
+ UNIQUE_LANGUAGES = get_unique_languages(LEADERBOARD_DF)
103
+
104
+ # Create a special column for individual language filtering
105
+ if not LEADERBOARD_DF.empty:
106
+ # Create a column that contains all individual languages as a list
107
+ LEADERBOARD_DF["_languages_list"] = LEADERBOARD_DF[auto_eval_column_attrs.language.name].apply(
108
+ lambda x: [lang.strip() for lang in str(x).split("/")] if pd.notna(x) else []
109
+ )
110
+
111
+ # Create a text version of Active Maintenance for checkboxgroup filtering
112
+ LEADERBOARD_DF["_maintenance_filter"] = LEADERBOARD_DF[auto_eval_column_attrs.availability.name].apply(
113
+ lambda x: "Active" if x else "Inactive"
114
+ )
115
+
116
  # Load queue data
117
  (
118
  finished_eval_queue_df,
 
128
  empty_df = pd.DataFrame(columns=pd.Index(all_columns))
129
  print("Warning: Leaderboard DataFrame is empty. Using empty dataframe.")
130
  dataframe = empty_df
131
+
132
+ # Create filter columns list with proper typing
133
+ filter_columns = []
134
+
135
+ # 1. Library types
136
+ filter_columns.append(ColumnFilter(auto_eval_column_attrs.library_type.name, type="checkboxgroup", label="Library types"))
137
+
138
+ # 2. Programming Language (checkboxgroup - OR filtering)
139
+ filter_columns.append(ColumnFilter(auto_eval_column_attrs.language.name, type="checkboxgroup", label="Programming Language"))
140
+
141
+ # 3. GitHub Stars
142
+ filter_columns.append(ColumnFilter(
143
+ auto_eval_column_attrs.stars.name,
144
+ type="slider",
145
+ min=0,
146
+ max=50000,
147
+ label="GitHub Stars",
148
+ ))
149
+
150
+ # 4. Maintenance Status (checkboxgroup - separate from languages)
151
+ filter_columns.append(ColumnFilter("_maintenance_filter", type="checkboxgroup", label="Maintenance Status"))
152
+
153
+ # Hide columns
154
+ hidden_columns = [getattr(auto_eval_column_attrs, field).name for field in AutoEvalColumn.model_fields if getattr(auto_eval_column_attrs, field).hidden]
155
+ hidden_columns.extend(["_languages_list", "_maintenance_filter", "_original_language"]) # Hide helper columns
156
+
157
+ # Update datatypes
158
+ datatypes = [getattr(auto_eval_column_attrs, field).type for field in AutoEvalColumn.model_fields]
159
+ datatypes.extend(["str", "str", "str"]) # For helper columns
160
+
161
  return Leaderboard(
162
  value=dataframe,
163
+ datatype=datatypes,
164
  select_columns=SelectColumns(
165
  default_selection=[getattr(auto_eval_column_attrs, field).name for field in AutoEvalColumn.model_fields if getattr(auto_eval_column_attrs, field).displayed_by_default],
166
  cant_deselect=[getattr(auto_eval_column_attrs, field).name for field in AutoEvalColumn.model_fields if getattr(auto_eval_column_attrs, field).never_hidden],
167
  label="Select Columns to Display:",
168
  ),
169
  search_columns=[auto_eval_column_attrs.library.name, auto_eval_column_attrs.license_name.name],
170
+ hide_columns=hidden_columns,
171
  filter_columns=filter_columns, # type: ignore
172
  bool_checkboxgroup_label="Filter libraries",
173
  interactive=False,
 
245
  language = gr.Dropdown(
246
  choices=[i.value.name for i in Language if i != Language.Other],
247
  label="Programming Language",
248
+ multiselect=True,
249
+ value=["Python"],
250
  interactive=True,
251
  )
252
  framework = gr.Textbox(label="Framework/Ecosystem (e.g., PyTorch, React)")
src/leaderboard/read_evals.py CHANGED
@@ -19,6 +19,7 @@ class AssessmentResult(BaseModel):
19
  results: dict # Risk scores
20
  framework: str = ""
21
  language: Language = Language.Other
 
22
  library_type: LibraryType = LibraryType.Unknown
23
  license: str = "?"
24
  stars: int = 0
@@ -58,7 +59,14 @@ class AssessmentResult(BaseModel):
58
  # Library metadata
59
  framework = assessment.get("framework", "")
60
  language_str = assessment.get("language", "Other")
61
- language = next((lang for lang in Language if lang.value.name == language_str), Language.Other)
 
 
 
 
 
 
 
62
 
63
  # Availability and verification
64
  last_update = assessment.get("last_updated", "")
@@ -80,6 +88,7 @@ class AssessmentResult(BaseModel):
80
  results=risk_scores,
81
  framework=framework,
82
  language=language,
 
83
  license=assessment.get("license", "?"),
84
  availability=assessment.get("active_maintenance", True),
85
  verified=assessment.get("independently_verified", False),
@@ -115,7 +124,7 @@ class AssessmentResult(BaseModel):
115
  "assessment_id": self.assessment_id, # not a column, just a save name
116
  auto_eval_column_attrs.library_type.name: self.library_type.value.name,
117
  auto_eval_column_attrs.library_type_symbol.name: self.library_type.value.symbol,
118
- auto_eval_column_attrs.language.name: self.language.value.name,
119
  auto_eval_column_attrs.framework.name: self.framework,
120
  auto_eval_column_attrs.library.name: make_clickable_library(self.library_name),
121
  auto_eval_column_attrs.version.name: self.version,
 
19
  results: dict # Risk scores
20
  framework: str = ""
21
  language: Language = Language.Other
22
+ language_str: str = "" # Original language string to support multiple languages
23
  library_type: LibraryType = LibraryType.Unknown
24
  license: str = "?"
25
  stars: int = 0
 
59
  # Library metadata
60
  framework = assessment.get("framework", "")
61
  language_str = assessment.get("language", "Other")
62
+
63
+ # Handle multiple languages separated by /
64
+ if "/" in language_str:
65
+ language_parts = [lang.strip() for lang in language_str.split("/")]
66
+ # Store the full string but parse the first language for enum
67
+ language = next((lang for lang in Language if lang.value.name == language_parts[0]), Language.Other)
68
+ else:
69
+ language = next((lang for lang in Language if lang.value.name == language_str), Language.Other)
70
 
71
  # Availability and verification
72
  last_update = assessment.get("last_updated", "")
 
88
  results=risk_scores,
89
  framework=framework,
90
  language=language,
91
+ language_str=language_str,
92
  license=assessment.get("license", "?"),
93
  availability=assessment.get("active_maintenance", True),
94
  verified=assessment.get("independently_verified", False),
 
124
  "assessment_id": self.assessment_id, # not a column, just a save name
125
  auto_eval_column_attrs.library_type.name: self.library_type.value.name,
126
  auto_eval_column_attrs.library_type_symbol.name: self.library_type.value.symbol,
127
+ auto_eval_column_attrs.language.name: self.language_str if self.language_str else self.language.value.name,
128
  auto_eval_column_attrs.framework.name: self.framework,
129
  auto_eval_column_attrs.library.name: make_clickable_library(self.library_name),
130
  auto_eval_column_attrs.version.name: self.version,
src/populate.py CHANGED
@@ -6,6 +6,33 @@ from src.display.utils import auto_eval_column_attrs
6
  from src.leaderboard.read_evals import get_raw_assessment_results
7
 
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
10
  """Read all the runs in the folder and return a dataframe
11
 
@@ -26,6 +53,9 @@ def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_co
26
  # Create dataframe from assessment results
27
  all_df = pd.DataFrame.from_records([r.to_dict() for r in assessment_results])
28
 
 
 
 
29
  # Ensure we have all the needed display columns
30
  all_columns = set(all_df.columns)
31
  for col in benchmark_cols:
 
6
  from src.leaderboard.read_evals import get_raw_assessment_results
7
 
8
 
9
+ def expand_multi_language_entries(df):
10
+ """Expand multi-language entries (like 'Python/C++') into separate rows for OR filtering"""
11
+ if df.empty or auto_eval_column_attrs.language.name not in df.columns:
12
+ return df
13
+
14
+ expanded_rows = []
15
+
16
+ for idx, row in df.iterrows():
17
+ lang_value = row[auto_eval_column_attrs.language.name]
18
+
19
+ # If language contains /, create separate rows for each language
20
+ if isinstance(lang_value, str) and "/" in lang_value:
21
+ languages = [lang.strip() for lang in lang_value.split("/")]
22
+ for lang in languages:
23
+ new_row = row.copy()
24
+ new_row[auto_eval_column_attrs.language.name] = lang
25
+ new_row["_original_language"] = lang_value # Keep original for display
26
+ expanded_rows.append(new_row)
27
+ else:
28
+ # Keep single language rows as is
29
+ row_copy = row.copy()
30
+ row_copy["_original_language"] = lang_value
31
+ expanded_rows.append(row_copy)
32
+
33
+ return pd.DataFrame(expanded_rows).reset_index(drop=True)
34
+
35
+
36
  def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
37
  """Read all the runs in the folder and return a dataframe
38
 
 
53
  # Create dataframe from assessment results
54
  all_df = pd.DataFrame.from_records([r.to_dict() for r in assessment_results])
55
 
56
+ # Expand multi-language entries for OR filtering
57
+ all_df = expand_multi_language_entries(all_df)
58
+
59
  # Ensure we have all the needed display columns
60
  all_columns = set(all_df.columns)
61
  for col in benchmark_cols:
src/submission/submit.py CHANGED
@@ -57,7 +57,7 @@ def add_new_eval(
57
  "library": library_name,
58
  "version": library_version,
59
  "repository_url": repository_url,
60
- "language": language,
61
  "framework": framework,
62
  "library_type": library_type.value.name,
63
  "license": license_name,
 
57
  "library": library_name,
58
  "version": library_version,
59
  "repository_url": repository_url,
60
+ "language": "/".join(language) if isinstance(language, list) else language,
61
  "framework": framework,
62
  "library_type": library_type.value.name,
63
  "license": license_name,