Spaces:
Running
Running
File size: 5,522 Bytes
bccaf50 9ab539a 124bec5 bccaf50 9ab539a 92edcfa f924923 92edcfa f924923 92edcfa f924923 92edcfa bccaf50 5fc842f bccaf50 92edcfa 5fc842f 3211e96 124bec5 bccaf50 5fc842f bccaf50 f03f82b 5fc842f 9ab539a bccaf50 124bec5 bccaf50 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
"""Functions to populate the leaderboard"""
import pandas as pd
from src.display.utils import auto_eval_column_attrs
from src.leaderboard.read_evals import get_raw_assessment_results
def expand_multi_language_entries(df):
"""Keep multi-language entries as single rows but create individual language columns for filtering"""
if df.empty or auto_eval_column_attrs.language.name not in df.columns:
return df
# Get all unique individual languages
all_languages = set()
for value in df[auto_eval_column_attrs.language.name].unique():
if isinstance(value, str):
languages = [lang.strip() for lang in value.split("/")]
all_languages.update(languages)
# Create individual language columns for filtering
for lang in sorted(all_languages):
if lang: # Skip empty strings
safe_lang = lang.replace("+", "plus").replace("#", "sharp").replace(" ", "_").lower()
col_name = f"_lang_{safe_lang}"
df[col_name] = df[auto_eval_column_attrs.language.name].apply(
lambda x: lang in str(x) if x is not None else False
)
return df
def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
"""Read all the runs in the folder and return a dataframe
Args:
eval_results_path: Path to the assessment result files
eval_requests_path: Path to the assessment request files
cols: Columns names to include in the dataframe
benchmark_cols: Risk categories column names (display names)
Returns:
Pandas dataframe for the leaderboard
"""
try:
assessment_results = get_raw_assessment_results(eval_results_path, eval_requests_path)
# If we get results, convert to dataframe
if len(assessment_results) > 0:
# Create dataframe from assessment results
all_df = pd.DataFrame.from_records([r.to_dict() for r in assessment_results])
# Expand multi-language entries for OR filtering
all_df = expand_multi_language_entries(all_df)
# Ensure we have all the needed display columns
all_columns = set(all_df.columns)
for col in benchmark_cols:
if col not in all_columns:
print(f"Warning: Column '{col}' missing, adding empty column")
all_df[col] = 10.0 # Default to highest risk
# Sort by Trust Score (ascending - lower is better)
if auto_eval_column_attrs.overall_risk.name in all_df.columns:
all_df = all_df.sort_values(by=[auto_eval_column_attrs.overall_risk.name])
return all_df
return pd.DataFrame(columns=cols + benchmark_cols) # Empty dataframe with all columns
except Exception as e:
print(f"Error reading evaluation results: {e}")
import traceback
traceback.print_exc()
return pd.DataFrame(columns=cols + benchmark_cols) # Return empty dataframe with all columns
def get_evaluation_queue_df(eval_requests_path, eval_cols):
"""Read from the evaluation queue directory and return dataframes for each status
Args:
eval_requests_path: Path to the assessment request files
eval_cols: Columns for the queue dataframes
Returns:
Tuple of dataframes (finished, running, pending)
"""
try:
import glob
import json
import os
# Find all request files
request_files = glob.glob(os.path.join(eval_requests_path, "*.json"))
finished_data = []
running_data = []
pending_data = []
for file_path in request_files:
try:
with open(file_path, "r", encoding="utf-8") as f:
data = json.load(f)
# Extract relevant fields
row = {
"library": data.get("library", ""),
"version": data.get("version", ""),
"language": data.get("language", ""),
"framework": data.get("framework", ""),
"library_type": data.get("library_type", ""),
"status": data.get("status", "UNKNOWN")
}
# Add to appropriate dataframe based on status
if row["status"] == "FINISHED":
finished_data.append(row)
elif row["status"] == "RUNNING":
running_data.append(row)
elif row["status"] == "PENDING":
pending_data.append(row)
except Exception as e:
print(f"Error reading request file {file_path}: {e}")
continue
# Convert to dataframes
finished_df = pd.DataFrame(finished_data, columns=eval_cols)
running_df = pd.DataFrame(running_data, columns=eval_cols)
pending_df = pd.DataFrame(pending_data, columns=eval_cols)
return finished_df, running_df, pending_df
except Exception as e:
print(f"Error reading evaluation queue: {e}")
# Return empty dataframes
empty_df = pd.DataFrame(columns=eval_cols)
return empty_df.copy(), empty_df.copy(), empty_df.copy()
|