Spaces:
Running
Running
"""Functions to populate the leaderboard""" | |
import pandas as pd | |
from src.display.utils import auto_eval_column_attrs | |
from src.leaderboard.read_evals import get_raw_assessment_results | |
def expand_multi_language_entries(df): | |
"""Expand multi-language entries (like 'Python/C++') into separate rows for OR filtering""" | |
if df.empty or auto_eval_column_attrs.language.name not in df.columns: | |
return df | |
expanded_rows = [] | |
for idx, row in df.iterrows(): | |
lang_value = row[auto_eval_column_attrs.language.name] | |
# If language contains /, create separate rows for each language | |
if isinstance(lang_value, str) and "/" in lang_value: | |
languages = [lang.strip() for lang in lang_value.split("/")] | |
for lang in languages: | |
new_row = row.copy() | |
new_row[auto_eval_column_attrs.language.name] = lang | |
new_row["_original_language"] = lang_value # Keep original for display | |
expanded_rows.append(new_row) | |
else: | |
# Keep single language rows as is | |
row_copy = row.copy() | |
row_copy["_original_language"] = lang_value | |
expanded_rows.append(row_copy) | |
return pd.DataFrame(expanded_rows).reset_index(drop=True) | |
def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols): | |
"""Read all the runs in the folder and return a dataframe | |
Args: | |
eval_results_path: Path to the assessment result files | |
eval_requests_path: Path to the assessment request files | |
cols: Columns names to include in the dataframe | |
benchmark_cols: Risk categories column names (display names) | |
Returns: | |
Pandas dataframe for the leaderboard | |
""" | |
try: | |
assessment_results = get_raw_assessment_results(eval_results_path, eval_requests_path) | |
# If we get results, convert to dataframe | |
if len(assessment_results) > 0: | |
# Create dataframe from assessment results | |
all_df = pd.DataFrame.from_records([r.to_dict() for r in assessment_results]) | |
# Expand multi-language entries for OR filtering | |
all_df = expand_multi_language_entries(all_df) | |
# Ensure we have all the needed display columns | |
all_columns = set(all_df.columns) | |
for col in benchmark_cols: | |
if col not in all_columns: | |
print(f"Warning: Column '{col}' missing, adding empty column") | |
all_df[col] = 10.0 # Default to highest risk | |
# Sort by Trust Score (ascending - lower is better) | |
if auto_eval_column_attrs.overall_risk.name in all_df.columns: | |
all_df = all_df.sort_values(by=[auto_eval_column_attrs.overall_risk.name]) | |
return all_df | |
return pd.DataFrame(columns=cols + benchmark_cols) # Empty dataframe with all columns | |
except Exception as e: | |
print(f"Error reading evaluation results: {e}") | |
import traceback | |
traceback.print_exc() | |
return pd.DataFrame(columns=cols + benchmark_cols) # Return empty dataframe with all columns | |
def get_evaluation_queue_df(eval_requests_path, eval_cols): | |
"""Read from the evaluation queue directory and return dataframes for each status | |
Args: | |
eval_requests_path: Path to the assessment request files | |
eval_cols: Columns for the queue dataframes | |
Returns: | |
Tuple of dataframes (finished, running, pending) | |
""" | |
try: | |
import glob | |
import json | |
import os | |
# Find all request files | |
request_files = glob.glob(os.path.join(eval_requests_path, "*.json")) | |
finished_data = [] | |
running_data = [] | |
pending_data = [] | |
for file_path in request_files: | |
try: | |
with open(file_path, "r", encoding="utf-8") as f: | |
data = json.load(f) | |
# Extract relevant fields | |
row = { | |
"library": data.get("library", ""), | |
"version": data.get("version", ""), | |
"language": data.get("language", ""), | |
"framework": data.get("framework", ""), | |
"library_type": data.get("library_type", ""), | |
"status": data.get("status", "UNKNOWN") | |
} | |
# Add to appropriate dataframe based on status | |
if row["status"] == "FINISHED": | |
finished_data.append(row) | |
elif row["status"] == "RUNNING": | |
running_data.append(row) | |
elif row["status"] == "PENDING": | |
pending_data.append(row) | |
except Exception as e: | |
print(f"Error reading request file {file_path}: {e}") | |
continue | |
# Convert to dataframes | |
finished_df = pd.DataFrame(finished_data, columns=eval_cols) | |
running_df = pd.DataFrame(running_data, columns=eval_cols) | |
pending_df = pd.DataFrame(pending_data, columns=eval_cols) | |
return finished_df, running_df, pending_df | |
except Exception as e: | |
print(f"Error reading evaluation queue: {e}") | |
# Return empty dataframes | |
empty_df = pd.DataFrame(columns=eval_cols) | |
return empty_df.copy(), empty_df.copy(), empty_df.copy() | |