LibVulnWatch / src /leaderboard /read_evals.py
wu981526092's picture
add
995dcf8
raw
history blame
9.25 kB
import glob
import json
import math
import os
from dataclasses import dataclass
from datetime import datetime
import numpy as np
from src.display.formatting import make_clickable_library, make_clickable_report
from src.display.utils import AutoEvalColumn, LibraryType, Tasks, Language, AssessmentStatus
@dataclass
class AssessmentResult:
"""Represents one full vulnerability assessment. Built from a combination of the result and request file for a given library.
"""
assessment_id: str # Unique identifier
library_name: str # org/repo
org: str
repo: str
version: str
results: dict # Risk scores
framework: str = ""
language: Language = Language.Other
library_type: LibraryType = LibraryType.Unknown
license: str = "?"
stars: int = 0
last_update: str = ""
availability: bool = True
verified: bool = False
report_url: str = "" # URL to detailed assessment report
@classmethod
def init_from_json_file(self, json_filepath):
"""Initializes the assessment result from a JSON file"""
with open(json_filepath) as fp:
data = json.load(fp)
assessment = data.get("assessment", {})
# Get library and org
library_name = assessment.get("library_name", "")
org_and_repo = library_name.split("/", 1)
if len(org_and_repo) == 1:
org = None
repo = org_and_repo[0]
assessment_id = f"{repo}_{assessment.get('version', '')}"
else:
org = org_and_repo[0]
repo = org_and_repo[1]
assessment_id = f"{org}_{repo}_{assessment.get('version', '')}"
# Extract risk scores
risk_scores = {}
for task in Tasks:
domain = task.value
score = assessment.get("scores", {}).get(domain.benchmark, None)
if score is not None:
risk_scores[domain.benchmark] = score
# Library metadata
framework = assessment.get("framework", "")
language_str = assessment.get("language", "Other")
language = next((lang for lang in Language if lang.value.name == language_str), Language.Other)
# Availability and verification
last_update = assessment.get("last_updated", "")
if last_update:
try:
# Format date for display
dt = datetime.fromisoformat(last_update)
last_update = dt.strftime("%Y-%m-%d")
except:
pass
return self(
assessment_id=assessment_id,
library_name=library_name,
org=org,
repo=repo,
version=assessment.get("version", ""),
results=risk_scores,
framework=framework,
language=language,
license=assessment.get("license", "?"),
availability=assessment.get("active_maintenance", True),
verified=assessment.get("independently_verified", False),
last_update=last_update,
report_url=assessment.get("report_url", ""),
)
def update_with_request_file(self, requests_path):
"""Finds the relevant request file for the current library and updates info with it"""
request_file = get_request_file_for_library(requests_path, self.library_name, self.version)
try:
with open(request_file, "r") as f:
request = json.load(f)
self.library_type = LibraryType.from_str(request.get("library_type", ""))
self.stars = request.get("stars", 0)
except Exception:
print(f"Could not find request file for {self.library_name} version {self.version}")
def to_dict(self):
"""Converts the Assessment Result to a dict compatible with our dataframe display"""
# Calculate Trust Score as equal-weight average
weights = {
"license_validation": 0.2,
"security_assessment": 0.2,
"maintenance_health": 0.2,
"dependency_management": 0.2,
"regulatory_compliance": 0.2
}
# Calculate Trust Score - if domain is missing, use highest risk score (10)
risk_sum = 0
weight_sum = 0
for domain, weight in weights.items():
score = self.results.get(domain, 10) # Default to highest risk if missing
risk_sum += score * weight
weight_sum += weight
trust_score = risk_sum / weight_sum if weight_sum > 0 else 10
data_dict = {
"assessment_id": self.assessment_id, # not a column, just a save name
AutoEvalColumn.library_type.name: self.library_type.value.name,
AutoEvalColumn.library_type_symbol.name: self.library_type.value.symbol,
AutoEvalColumn.language.name: self.language.value.name,
AutoEvalColumn.framework.name: self.framework,
AutoEvalColumn.library.name: make_clickable_library(self.library_name),
AutoEvalColumn.version.name: self.version,
AutoEvalColumn.overall_risk.name: trust_score,
AutoEvalColumn.license_name.name: self.license,
AutoEvalColumn.stars.name: self.stars,
AutoEvalColumn.last_update.name: self.last_update,
AutoEvalColumn.verified.name: self.verified,
AutoEvalColumn.availability.name: self.availability,
AutoEvalColumn.report_url.name: make_clickable_report(self.report_url),
}
# Add task-specific risk scores - map to display column names
for task in Tasks:
task_enum = task.value # Task dataclass instance
benchmark_key = task_enum.benchmark # e.g., "license_validation"
col_name = task_enum.col_name # Use the display name, e.g., "License Risk"
risk_score = self.results.get(benchmark_key, 10) # Default to highest risk
data_dict[col_name] = risk_score
return data_dict
def get_request_file_for_library(requests_path, library_name, version):
"""Selects the correct request file for a given library. Only keeps runs tagged as FINISHED"""
# Try multiple naming patterns for flexibility
possible_patterns = [
f"{library_name.replace('/', '_')}_eval_request_*.json", # Original pattern
f"{library_name.replace('/', '_')}_request.json", # Simple pattern
f"{library_name.replace('/', '_')}*.json" # Fallback pattern
]
request_files = []
for pattern in possible_patterns:
pattern_path = os.path.join(requests_path, pattern)
found_files = glob.glob(pattern_path)
request_files.extend(found_files)
if not request_files:
print(f"Warning: No request files found matching {library_name}")
return ""
# Select correct request file (version)
request_file = ""
request_files = sorted(request_files, reverse=True)
for tmp_request_file in request_files:
try:
with open(tmp_request_file, "r") as f:
req_content = json.load(f)
if (
req_content.get("status", "") in ["FINISHED"] and
req_content.get("version", "") == version
):
request_file = tmp_request_file
break
except Exception as e:
print(f"Error reading {tmp_request_file}: {e}")
continue
return request_file
def get_raw_assessment_results(results_path: str, requests_path: str) -> list[AssessmentResult]:
"""From the path of the results folder root, extract all needed info for assessments"""
assessment_filepaths = []
for root, _, files in os.walk(results_path):
# We should only have json files in assessment results
if len(files) == 0 or any([not f.endswith(".json") for f in files]):
continue
# Sort the files by date if they have date info
try:
files.sort(key=lambda x: datetime.fromisoformat(json.loads(open(os.path.join(root, x)).read())["assessment"]["completed_time"]), reverse=True)
except:
pass
for file in files:
assessment_filepaths.append(os.path.join(root, file))
assessment_results = {}
for assessment_filepath in assessment_filepaths:
# Creation of result
assessment_result = AssessmentResult.init_from_json_file(assessment_filepath)
assessment_result.update_with_request_file(requests_path)
# Store results of same eval together
assessment_id = assessment_result.assessment_id
if assessment_id in assessment_results.keys():
assessment_results[assessment_id].results.update({k: v for k, v in assessment_result.results.items() if v is not None})
else:
assessment_results[assessment_id] = assessment_result
results = []
for v in assessment_results.values():
try:
v.to_dict() # we test if the dict version is complete
results.append(v)
except KeyError: # not all eval values present
continue
return results