Spaces:
Running
Running
File size: 5,654 Bytes
bccaf50 9ab539a 124bec5 bccaf50 9ab539a 92edcfa bccaf50 5fc842f bccaf50 92edcfa 5fc842f 3211e96 124bec5 bccaf50 5fc842f bccaf50 f03f82b 5fc842f 9ab539a bccaf50 124bec5 bccaf50 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
"""Functions to populate the leaderboard"""
import pandas as pd
from src.display.utils import auto_eval_column_attrs
from src.leaderboard.read_evals import get_raw_assessment_results
def expand_multi_language_entries(df):
"""Expand multi-language entries (like 'Python/C++') into separate rows for OR filtering"""
if df.empty or auto_eval_column_attrs.language.name not in df.columns:
return df
expanded_rows = []
for idx, row in df.iterrows():
lang_value = row[auto_eval_column_attrs.language.name]
# If language contains /, create separate rows for each language
if isinstance(lang_value, str) and "/" in lang_value:
languages = [lang.strip() for lang in lang_value.split("/")]
for lang in languages:
new_row = row.copy()
new_row[auto_eval_column_attrs.language.name] = lang
new_row["_original_language"] = lang_value # Keep original for display
expanded_rows.append(new_row)
else:
# Keep single language rows as is
row_copy = row.copy()
row_copy["_original_language"] = lang_value
expanded_rows.append(row_copy)
return pd.DataFrame(expanded_rows).reset_index(drop=True)
def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
"""Read all the runs in the folder and return a dataframe
Args:
eval_results_path: Path to the assessment result files
eval_requests_path: Path to the assessment request files
cols: Columns names to include in the dataframe
benchmark_cols: Risk categories column names (display names)
Returns:
Pandas dataframe for the leaderboard
"""
try:
assessment_results = get_raw_assessment_results(eval_results_path, eval_requests_path)
# If we get results, convert to dataframe
if len(assessment_results) > 0:
# Create dataframe from assessment results
all_df = pd.DataFrame.from_records([r.to_dict() for r in assessment_results])
# Expand multi-language entries for OR filtering
all_df = expand_multi_language_entries(all_df)
# Ensure we have all the needed display columns
all_columns = set(all_df.columns)
for col in benchmark_cols:
if col not in all_columns:
print(f"Warning: Column '{col}' missing, adding empty column")
all_df[col] = 10.0 # Default to highest risk
# Sort by Trust Score (ascending - lower is better)
if auto_eval_column_attrs.overall_risk.name in all_df.columns:
all_df = all_df.sort_values(by=[auto_eval_column_attrs.overall_risk.name])
return all_df
return pd.DataFrame(columns=cols + benchmark_cols) # Empty dataframe with all columns
except Exception as e:
print(f"Error reading evaluation results: {e}")
import traceback
traceback.print_exc()
return pd.DataFrame(columns=cols + benchmark_cols) # Return empty dataframe with all columns
def get_evaluation_queue_df(eval_requests_path, eval_cols):
"""Read from the evaluation queue directory and return dataframes for each status
Args:
eval_requests_path: Path to the assessment request files
eval_cols: Columns for the queue dataframes
Returns:
Tuple of dataframes (finished, running, pending)
"""
try:
import glob
import json
import os
# Find all request files
request_files = glob.glob(os.path.join(eval_requests_path, "*.json"))
finished_data = []
running_data = []
pending_data = []
for file_path in request_files:
try:
with open(file_path, "r", encoding="utf-8") as f:
data = json.load(f)
# Extract relevant fields
row = {
"library": data.get("library", ""),
"version": data.get("version", ""),
"language": data.get("language", ""),
"framework": data.get("framework", ""),
"library_type": data.get("library_type", ""),
"status": data.get("status", "UNKNOWN")
}
# Add to appropriate dataframe based on status
if row["status"] == "FINISHED":
finished_data.append(row)
elif row["status"] == "RUNNING":
running_data.append(row)
elif row["status"] == "PENDING":
pending_data.append(row)
except Exception as e:
print(f"Error reading request file {file_path}: {e}")
continue
# Convert to dataframes
finished_df = pd.DataFrame(finished_data, columns=eval_cols)
running_df = pd.DataFrame(running_data, columns=eval_cols)
pending_df = pd.DataFrame(pending_data, columns=eval_cols)
return finished_df, running_df, pending_df
except Exception as e:
print(f"Error reading evaluation queue: {e}")
# Return empty dataframes
empty_df = pd.DataFrame(columns=eval_cols)
return empty_df.copy(), empty_df.copy(), empty_df.copy()
|