Spaces:
Running
Running
Commit
·
124bec5
1
Parent(s):
93c138a
refactor: migrate to pydantic model
Browse files- app.py +23 -24
- src/display/utils.py +68 -52
- src/leaderboard/read_evals.py +41 -39
- src/populate.py +4 -5
app.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
import gradio as gr
|
|
|
2 |
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
|
3 |
import pandas as pd
|
4 |
from apscheduler.schedulers.background import BackgroundScheduler
|
5 |
from huggingface_hub import snapshot_download
|
6 |
import os
|
7 |
-
import shutil
|
8 |
|
9 |
from src.about import (
|
10 |
CITATION_BUTTON_LABEL,
|
@@ -22,10 +22,9 @@ from src.display.utils import (
|
|
22 |
EVAL_COLS,
|
23 |
EVAL_TYPES,
|
24 |
AutoEvalColumn,
|
|
|
25 |
LibraryType,
|
26 |
-
fields,
|
27 |
Language,
|
28 |
-
AssessmentStatus
|
29 |
)
|
30 |
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN, LOCAL_MODE
|
31 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
@@ -94,34 +93,34 @@ def init_leaderboard(dataframe):
|
|
94 |
if dataframe is None or dataframe.empty:
|
95 |
# Create an empty dataframe with the expected columns
|
96 |
all_columns = COLS + [task.value.col_name for task in Tasks]
|
97 |
-
empty_df = pd.DataFrame(columns=all_columns)
|
98 |
print("Warning: Leaderboard DataFrame is empty. Using empty dataframe.")
|
99 |
dataframe = empty_df
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
datatype=[c.type for c in fields(AutoEvalColumn)],
|
104 |
-
select_columns=SelectColumns(
|
105 |
-
default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
|
106 |
-
cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
|
107 |
-
label="Select Columns to Display:",
|
108 |
-
),
|
109 |
-
search_columns=[AutoEvalColumn.library.name, AutoEvalColumn.license_name.name],
|
110 |
-
hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
|
111 |
-
filter_columns=[
|
112 |
-
ColumnFilter(AutoEvalColumn.library_type.name, type="checkboxgroup", label="Library types"),
|
113 |
-
ColumnFilter(AutoEvalColumn.language.name, type="checkboxgroup", label="Programming Language"),
|
114 |
ColumnFilter(
|
115 |
-
|
116 |
type="slider",
|
117 |
min=0,
|
118 |
max=50000,
|
119 |
label="GitHub Stars",
|
120 |
),
|
121 |
ColumnFilter(
|
122 |
-
|
123 |
),
|
124 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
bool_checkboxgroup_label="Filter libraries",
|
126 |
interactive=False,
|
127 |
)
|
@@ -150,7 +149,7 @@ with demo:
|
|
150 |
open=False,
|
151 |
):
|
152 |
with gr.Row():
|
153 |
-
finished_eval_table =
|
154 |
value=finished_eval_queue_df,
|
155 |
headers=EVAL_COLS,
|
156 |
datatype=EVAL_TYPES,
|
@@ -161,7 +160,7 @@ with demo:
|
|
161 |
open=False,
|
162 |
):
|
163 |
with gr.Row():
|
164 |
-
running_eval_table =
|
165 |
value=running_eval_queue_df,
|
166 |
headers=EVAL_COLS,
|
167 |
datatype=EVAL_TYPES,
|
@@ -173,7 +172,7 @@ with demo:
|
|
173 |
open=False,
|
174 |
):
|
175 |
with gr.Row():
|
176 |
-
pending_eval_table =
|
177 |
value=pending_eval_queue_df,
|
178 |
headers=EVAL_COLS,
|
179 |
datatype=EVAL_TYPES,
|
|
|
1 |
import gradio as gr
|
2 |
+
from gradio.components import Dataframe
|
3 |
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
|
4 |
import pandas as pd
|
5 |
from apscheduler.schedulers.background import BackgroundScheduler
|
6 |
from huggingface_hub import snapshot_download
|
7 |
import os
|
|
|
8 |
|
9 |
from src.about import (
|
10 |
CITATION_BUTTON_LABEL,
|
|
|
22 |
EVAL_COLS,
|
23 |
EVAL_TYPES,
|
24 |
AutoEvalColumn,
|
25 |
+
auto_eval_column_attrs,
|
26 |
LibraryType,
|
|
|
27 |
Language,
|
|
|
28 |
)
|
29 |
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN, LOCAL_MODE
|
30 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
|
|
93 |
if dataframe is None or dataframe.empty:
|
94 |
# Create an empty dataframe with the expected columns
|
95 |
all_columns = COLS + [task.value.col_name for task in Tasks]
|
96 |
+
empty_df = pd.DataFrame(columns=pd.Index(all_columns))
|
97 |
print("Warning: Leaderboard DataFrame is empty. Using empty dataframe.")
|
98 |
dataframe = empty_df
|
99 |
+
filter_columns = [
|
100 |
+
ColumnFilter(auto_eval_column_attrs.library_type.name, type="checkboxgroup", label="Library types"),
|
101 |
+
ColumnFilter(auto_eval_column_attrs.language.name, type="checkboxgroup", label="Programming Language"),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
ColumnFilter(
|
103 |
+
auto_eval_column_attrs.stars.name,
|
104 |
type="slider",
|
105 |
min=0,
|
106 |
max=50000,
|
107 |
label="GitHub Stars",
|
108 |
),
|
109 |
ColumnFilter(
|
110 |
+
auto_eval_column_attrs.availability.name, type="boolean", label="Show only active libraries" # type: ignore
|
111 |
),
|
112 |
+
]
|
113 |
+
return Leaderboard(
|
114 |
+
value=dataframe,
|
115 |
+
datatype=[getattr(auto_eval_column_attrs, field).type for field in AutoEvalColumn.model_fields],
|
116 |
+
select_columns=SelectColumns(
|
117 |
+
default_selection=[getattr(auto_eval_column_attrs, field).name for field in AutoEvalColumn.model_fields if getattr(auto_eval_column_attrs, field).displayed_by_default],
|
118 |
+
cant_deselect=[getattr(auto_eval_column_attrs, field).name for field in AutoEvalColumn.model_fields if getattr(auto_eval_column_attrs, field).never_hidden],
|
119 |
+
label="Select Columns to Display:",
|
120 |
+
),
|
121 |
+
search_columns=[auto_eval_column_attrs.library.name, auto_eval_column_attrs.license_name.name],
|
122 |
+
hide_columns=[getattr(auto_eval_column_attrs, field).name for field in AutoEvalColumn.model_fields if getattr(auto_eval_column_attrs, field).hidden],
|
123 |
+
filter_columns=filter_columns, # type: ignore
|
124 |
bool_checkboxgroup_label="Filter libraries",
|
125 |
interactive=False,
|
126 |
)
|
|
|
149 |
open=False,
|
150 |
):
|
151 |
with gr.Row():
|
152 |
+
finished_eval_table = Dataframe(
|
153 |
value=finished_eval_queue_df,
|
154 |
headers=EVAL_COLS,
|
155 |
datatype=EVAL_TYPES,
|
|
|
160 |
open=False,
|
161 |
):
|
162 |
with gr.Row():
|
163 |
+
running_eval_table = Dataframe(
|
164 |
value=running_eval_queue_df,
|
165 |
headers=EVAL_COLS,
|
166 |
datatype=EVAL_TYPES,
|
|
|
172 |
open=False,
|
173 |
):
|
174 |
with gr.Row():
|
175 |
+
pending_eval_table = Dataframe(
|
176 |
value=pending_eval_queue_df,
|
177 |
headers=EVAL_COLS,
|
178 |
datatype=EVAL_TYPES,
|
src/display/utils.py
CHANGED
@@ -2,16 +2,13 @@ from dataclasses import dataclass
|
|
2 |
from enum import Enum
|
3 |
|
4 |
from src.about import Tasks
|
5 |
-
|
6 |
-
def fields(raw_class):
|
7 |
-
return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
|
8 |
|
9 |
|
10 |
# These classes are for user facing column names,
|
11 |
# to avoid having to change them all around the code
|
12 |
# when a modif is needed
|
13 |
-
|
14 |
-
class ColumnContent:
|
15 |
name: str
|
16 |
type: str
|
17 |
displayed_by_default: bool
|
@@ -19,42 +16,61 @@ class ColumnContent:
|
|
19 |
never_hidden: bool = False
|
20 |
|
21 |
## Leaderboard columns
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
auto_eval_column_attrs
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
## For the queue columns in the submission tab
|
50 |
@dataclass(frozen=True)
|
51 |
class EvalQueueColumn: # Queue column
|
52 |
-
library = ColumnContent("library", "markdown", True)
|
53 |
-
version = ColumnContent("version", "str", True)
|
54 |
-
language = ColumnContent("language", "str", True)
|
55 |
-
framework = ColumnContent("framework", "str", True)
|
56 |
-
library_type = ColumnContent("library_type", "str", True)
|
57 |
-
status = ColumnContent("status", "str", True)
|
58 |
|
59 |
## All the library information that we might need
|
60 |
@dataclass
|
@@ -65,27 +81,27 @@ class LibraryDetails:
|
|
65 |
|
66 |
|
67 |
class LibraryType(Enum):
|
68 |
-
ML = LibraryDetails(name="
|
69 |
-
LLM = LibraryDetails(name="
|
70 |
-
AGENT = LibraryDetails(name="
|
71 |
-
VIS = LibraryDetails(name="
|
72 |
-
GENERAL = LibraryDetails(name="
|
73 |
Unknown = LibraryDetails(name="", symbol="?")
|
74 |
|
75 |
def to_str(self, separator=" "):
|
76 |
return f"{self.value.symbol}{separator}{self.value.name}"
|
77 |
|
78 |
@staticmethod
|
79 |
-
def from_str(type):
|
80 |
-
if "
|
81 |
return LibraryType.ML
|
82 |
-
if "
|
83 |
return LibraryType.LLM
|
84 |
-
if "
|
85 |
return LibraryType.AGENT
|
86 |
-
if "
|
87 |
return LibraryType.VIS
|
88 |
-
if "
|
89 |
return LibraryType.GENERAL
|
90 |
return LibraryType.Unknown
|
91 |
|
@@ -103,11 +119,11 @@ class AssessmentStatus(Enum):
|
|
103 |
Disputed = LibraryDetails("Disputed")
|
104 |
|
105 |
# Column selection
|
106 |
-
COLS = [
|
|
|
107 |
|
108 |
-
EVAL_COLS = [
|
109 |
-
EVAL_TYPES = [
|
110 |
|
111 |
# Task columns for benchmarking - use the display column names from the Tasks enum
|
112 |
BENCHMARK_COLS = [task.value.col_name for task in Tasks]
|
113 |
-
|
|
|
2 |
from enum import Enum
|
3 |
|
4 |
from src.about import Tasks
|
5 |
+
from pydantic import BaseModel
|
|
|
|
|
6 |
|
7 |
|
8 |
# These classes are for user facing column names,
|
9 |
# to avoid having to change them all around the code
|
10 |
# when a modif is needed
|
11 |
+
class ColumnContent(BaseModel):
|
|
|
12 |
name: str
|
13 |
type: str
|
14 |
displayed_by_default: bool
|
|
|
16 |
never_hidden: bool = False
|
17 |
|
18 |
## Leaderboard columns
|
19 |
+
class AutoEvalColumn(BaseModel):
|
20 |
+
library_type_symbol: ColumnContent
|
21 |
+
library: ColumnContent
|
22 |
+
overall_risk: ColumnContent
|
23 |
+
# Task columns
|
24 |
+
license: ColumnContent
|
25 |
+
security: ColumnContent
|
26 |
+
maintenance: ColumnContent
|
27 |
+
dependency: ColumnContent
|
28 |
+
regulatory: ColumnContent
|
29 |
+
# Library information
|
30 |
+
library_type: ColumnContent
|
31 |
+
framework: ColumnContent
|
32 |
+
version: ColumnContent
|
33 |
+
language: ColumnContent
|
34 |
+
license_name: ColumnContent
|
35 |
+
stars: ColumnContent
|
36 |
+
availability: ColumnContent
|
37 |
+
report_url: ColumnContent
|
38 |
+
last_update: ColumnContent
|
39 |
+
verified: ColumnContent
|
40 |
+
|
41 |
+
auto_eval_column_attrs = AutoEvalColumn(
|
42 |
+
library_type_symbol=ColumnContent(name="T", type="str", displayed_by_default=True, never_hidden=True),
|
43 |
+
library=ColumnContent(name="Library", type="markdown", displayed_by_default=True, never_hidden=True),
|
44 |
+
overall_risk=ColumnContent(name="Trust Score", type="number", displayed_by_default=True),
|
45 |
+
# Task columns from Tasks enum
|
46 |
+
license=ColumnContent(name="License Risk", type="number", displayed_by_default=True),
|
47 |
+
security=ColumnContent(name="Security Risk", type="number", displayed_by_default=True),
|
48 |
+
maintenance=ColumnContent(name="Maintenance Risk", type="number", displayed_by_default=True),
|
49 |
+
dependency=ColumnContent(name="Dependency Risk", type="number", displayed_by_default=True),
|
50 |
+
regulatory=ColumnContent(name="Regulatory Risk", type="number", displayed_by_default=True),
|
51 |
+
# Library information
|
52 |
+
library_type=ColumnContent(name="Type", type="str", displayed_by_default=False),
|
53 |
+
framework=ColumnContent(name="Framework", type="str", displayed_by_default=False),
|
54 |
+
version=ColumnContent(name="Version", type="str", displayed_by_default=False, hidden=True),
|
55 |
+
language=ColumnContent(name="Language", type="str", displayed_by_default=False),
|
56 |
+
license_name=ColumnContent(name="License", type="str", displayed_by_default=True),
|
57 |
+
stars=ColumnContent(name="GitHub ⭐", type="number", displayed_by_default=False),
|
58 |
+
availability=ColumnContent(name="Active Maintenance", type="bool", displayed_by_default=True),
|
59 |
+
report_url=ColumnContent(name="Report", type="markdown", displayed_by_default=True),
|
60 |
+
last_update=ColumnContent(name="Last Update", type="str", displayed_by_default=False),
|
61 |
+
verified=ColumnContent(name="Verified", type="bool", displayed_by_default=False),
|
62 |
+
)
|
63 |
+
|
64 |
|
65 |
## For the queue columns in the submission tab
|
66 |
@dataclass(frozen=True)
|
67 |
class EvalQueueColumn: # Queue column
|
68 |
+
library = ColumnContent(name="library", type="markdown", displayed_by_default=True)
|
69 |
+
version = ColumnContent(name="version", type="str", displayed_by_default=True)
|
70 |
+
language = ColumnContent(name="language", type="str", displayed_by_default=True)
|
71 |
+
framework = ColumnContent(name="framework", type="str", displayed_by_default=True)
|
72 |
+
library_type = ColumnContent(name="library_type", type="str", displayed_by_default=True)
|
73 |
+
status = ColumnContent(name="status", type="str", displayed_by_default=True)
|
74 |
|
75 |
## All the library information that we might need
|
76 |
@dataclass
|
|
|
81 |
|
82 |
|
83 |
class LibraryType(Enum):
|
84 |
+
ML = LibraryDetails(name="Machine Learning", symbol="🟢")
|
85 |
+
LLM = LibraryDetails(name="LLM Framework", symbol="🔶")
|
86 |
+
AGENT = LibraryDetails(name="Agent Framework", symbol="⭕")
|
87 |
+
VIS = LibraryDetails(name="LLM Inference", symbol="🟦")
|
88 |
+
GENERAL = LibraryDetails(name="LLM Orchestration", symbol="🟣")
|
89 |
Unknown = LibraryDetails(name="", symbol="?")
|
90 |
|
91 |
def to_str(self, separator=" "):
|
92 |
return f"{self.value.symbol}{separator}{self.value.name}"
|
93 |
|
94 |
@staticmethod
|
95 |
+
def from_str(type: str) -> "LibraryType":
|
96 |
+
if "Machine Learning" in type or "🟢" in type:
|
97 |
return LibraryType.ML
|
98 |
+
if "LLM Framework" in type or "🔶" in type:
|
99 |
return LibraryType.LLM
|
100 |
+
if "Agent Framework" in type or "⭕" in type:
|
101 |
return LibraryType.AGENT
|
102 |
+
if "LLM Inference" in type or "🟦" in type:
|
103 |
return LibraryType.VIS
|
104 |
+
if "LLM Orchestration" in type or "🟣" in type:
|
105 |
return LibraryType.GENERAL
|
106 |
return LibraryType.Unknown
|
107 |
|
|
|
119 |
Disputed = LibraryDetails("Disputed")
|
120 |
|
121 |
# Column selection
|
122 |
+
COLS = [getattr(auto_eval_column_attrs, field).name for field in AutoEvalColumn.model_fields if not getattr(auto_eval_column_attrs, field).hidden]
|
123 |
+
fields = AutoEvalColumn.model_fields
|
124 |
|
125 |
+
EVAL_COLS = [getattr(EvalQueueColumn, field).name for field in vars(EvalQueueColumn) if not field.startswith('_')]
|
126 |
+
EVAL_TYPES = [getattr(EvalQueueColumn, field).type for field in vars(EvalQueueColumn) if not field.startswith('_')]
|
127 |
|
128 |
# Task columns for benchmarking - use the display column names from the Tasks enum
|
129 |
BENCHMARK_COLS = [task.value.col_name for task in Tasks]
|
|
src/leaderboard/read_evals.py
CHANGED
@@ -1,18 +1,14 @@
|
|
1 |
import glob
|
2 |
import json
|
3 |
-
import math
|
4 |
import os
|
5 |
-
from dataclasses import dataclass
|
6 |
from datetime import datetime
|
7 |
-
|
8 |
-
import numpy as np
|
9 |
|
10 |
from src.display.formatting import make_clickable_library, make_clickable_report
|
11 |
-
from src.display.utils import
|
12 |
|
13 |
|
14 |
-
|
15 |
-
class AssessmentResult:
|
16 |
"""Represents one full vulnerability assessment. Built from a combination of the result and request file for a given library.
|
17 |
"""
|
18 |
assessment_id: str # Unique identifier
|
@@ -32,7 +28,7 @@ class AssessmentResult:
|
|
32 |
report_url: str = "" # URL to detailed assessment report
|
33 |
|
34 |
@classmethod
|
35 |
-
def init_from_json_file(
|
36 |
"""Initializes the assessment result from a JSON file"""
|
37 |
with open(json_filepath) as fp:
|
38 |
data = json.load(fp)
|
@@ -43,7 +39,7 @@ class AssessmentResult:
|
|
43 |
org_and_repo = library_name.split("/", 1)
|
44 |
|
45 |
if len(org_and_repo) == 1:
|
46 |
-
org =
|
47 |
repo = org_and_repo[0]
|
48 |
assessment_id = f"{repo}_{assessment.get('version', '')}"
|
49 |
else:
|
@@ -71,10 +67,11 @@ class AssessmentResult:
|
|
71 |
# Format date for display
|
72 |
dt = datetime.fromisoformat(last_update)
|
73 |
last_update = dt.strftime("%Y-%m-%d")
|
74 |
-
except:
|
|
|
75 |
pass
|
76 |
|
77 |
-
return
|
78 |
assessment_id=assessment_id,
|
79 |
library_name=library_name,
|
80 |
org=org,
|
@@ -90,18 +87,6 @@ class AssessmentResult:
|
|
90 |
report_url=assessment.get("report_url", ""),
|
91 |
)
|
92 |
|
93 |
-
def update_with_request_file(self, requests_path):
|
94 |
-
"""Finds the relevant request file for the current library and updates info with it"""
|
95 |
-
request_file = get_request_file_for_library(requests_path, self.library_name, self.version)
|
96 |
-
|
97 |
-
try:
|
98 |
-
with open(request_file, "r") as f:
|
99 |
-
request = json.load(f)
|
100 |
-
self.library_type = LibraryType.from_str(request.get("library_type", ""))
|
101 |
-
self.stars = request.get("stars", 0)
|
102 |
-
except Exception:
|
103 |
-
print(f"Could not find request file for {self.library_name} version {self.version}")
|
104 |
-
|
105 |
def to_dict(self):
|
106 |
"""Converts the Assessment Result to a dict compatible with our dataframe display"""
|
107 |
# Calculate Trust Score as equal-weight average
|
@@ -123,22 +108,24 @@ class AssessmentResult:
|
|
123 |
weight_sum += weight
|
124 |
|
125 |
trust_score = risk_sum / weight_sum if weight_sum > 0 else 10
|
|
|
|
|
126 |
|
127 |
data_dict = {
|
128 |
"assessment_id": self.assessment_id, # not a column, just a save name
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
}
|
143 |
|
144 |
# Add task-specific risk scores - map to display column names
|
@@ -147,11 +134,25 @@ class AssessmentResult:
|
|
147 |
benchmark_key = task_enum.benchmark # e.g., "license_validation"
|
148 |
col_name = task_enum.col_name # Use the display name, e.g., "License Risk"
|
149 |
risk_score = self.results.get(benchmark_key, 10) # Default to highest risk
|
150 |
-
|
|
|
151 |
|
152 |
return data_dict
|
153 |
|
154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
def get_request_file_for_library(requests_path, library_name, version):
|
156 |
"""Selects the correct request file for a given library. Only keeps runs tagged as FINISHED"""
|
157 |
# Try multiple naming patterns for flexibility
|
@@ -203,7 +204,8 @@ def get_raw_assessment_results(results_path: str, requests_path: str) -> list[As
|
|
203 |
# Sort the files by date if they have date info
|
204 |
try:
|
205 |
files.sort(key=lambda x: datetime.fromisoformat(json.loads(open(os.path.join(root, x)).read())["assessment"]["completed_time"]), reverse=True)
|
206 |
-
except:
|
|
|
207 |
pass
|
208 |
|
209 |
for file in files:
|
@@ -213,7 +215,7 @@ def get_raw_assessment_results(results_path: str, requests_path: str) -> list[As
|
|
213 |
for assessment_filepath in assessment_filepaths:
|
214 |
# Creation of result
|
215 |
assessment_result = AssessmentResult.init_from_json_file(assessment_filepath)
|
216 |
-
assessment_result.update_with_request_file(
|
217 |
|
218 |
# Store results of same eval together
|
219 |
assessment_id = assessment_result.assessment_id
|
|
|
1 |
import glob
|
2 |
import json
|
|
|
3 |
import os
|
|
|
4 |
from datetime import datetime
|
5 |
+
from pydantic import BaseModel
|
|
|
6 |
|
7 |
from src.display.formatting import make_clickable_library, make_clickable_report
|
8 |
+
from src.display.utils import auto_eval_column_attrs, LibraryType, Tasks, Language
|
9 |
|
10 |
|
11 |
+
class AssessmentResult(BaseModel):
|
|
|
12 |
"""Represents one full vulnerability assessment. Built from a combination of the result and request file for a given library.
|
13 |
"""
|
14 |
assessment_id: str # Unique identifier
|
|
|
28 |
report_url: str = "" # URL to detailed assessment report
|
29 |
|
30 |
@classmethod
|
31 |
+
def init_from_json_file(cls, json_filepath):
|
32 |
"""Initializes the assessment result from a JSON file"""
|
33 |
with open(json_filepath) as fp:
|
34 |
data = json.load(fp)
|
|
|
39 |
org_and_repo = library_name.split("/", 1)
|
40 |
|
41 |
if len(org_and_repo) == 1:
|
42 |
+
org = ""
|
43 |
repo = org_and_repo[0]
|
44 |
assessment_id = f"{repo}_{assessment.get('version', '')}"
|
45 |
else:
|
|
|
67 |
# Format date for display
|
68 |
dt = datetime.fromisoformat(last_update)
|
69 |
last_update = dt.strftime("%Y-%m-%d")
|
70 |
+
except Exception as e:
|
71 |
+
print(e)
|
72 |
pass
|
73 |
|
74 |
+
return cls(
|
75 |
assessment_id=assessment_id,
|
76 |
library_name=library_name,
|
77 |
org=org,
|
|
|
87 |
report_url=assessment.get("report_url", ""),
|
88 |
)
|
89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
def to_dict(self):
|
91 |
"""Converts the Assessment Result to a dict compatible with our dataframe display"""
|
92 |
# Calculate Trust Score as equal-weight average
|
|
|
108 |
weight_sum += weight
|
109 |
|
110 |
trust_score = risk_sum / weight_sum if weight_sum > 0 else 10
|
111 |
+
# Round to 1 decimal place
|
112 |
+
trust_score = round(trust_score, 1)
|
113 |
|
114 |
data_dict = {
|
115 |
"assessment_id": self.assessment_id, # not a column, just a save name
|
116 |
+
auto_eval_column_attrs.library_type.name: self.library_type.value.name,
|
117 |
+
auto_eval_column_attrs.library_type_symbol.name: self.library_type.value.symbol,
|
118 |
+
auto_eval_column_attrs.language.name: self.language.value.name,
|
119 |
+
auto_eval_column_attrs.framework.name: self.framework,
|
120 |
+
auto_eval_column_attrs.library.name: make_clickable_library(self.library_name),
|
121 |
+
auto_eval_column_attrs.version.name: self.version,
|
122 |
+
auto_eval_column_attrs.overall_risk.name: trust_score,
|
123 |
+
auto_eval_column_attrs.license_name.name: self.license,
|
124 |
+
auto_eval_column_attrs.stars.name: self.stars,
|
125 |
+
auto_eval_column_attrs.last_update.name: self.last_update,
|
126 |
+
auto_eval_column_attrs.verified.name: self.verified,
|
127 |
+
auto_eval_column_attrs.availability.name: self.availability,
|
128 |
+
auto_eval_column_attrs.report_url.name: make_clickable_report(self.report_url),
|
129 |
}
|
130 |
|
131 |
# Add task-specific risk scores - map to display column names
|
|
|
134 |
benchmark_key = task_enum.benchmark # e.g., "license_validation"
|
135 |
col_name = task_enum.col_name # Use the display name, e.g., "License Risk"
|
136 |
risk_score = self.results.get(benchmark_key, 10) # Default to highest risk
|
137 |
+
# Round to 1 decimal place
|
138 |
+
data_dict[col_name] = round(risk_score, 1)
|
139 |
|
140 |
return data_dict
|
141 |
|
142 |
|
143 |
+
def update_with_request_file(self, assessment_filepath):
|
144 |
+
"""Finds the relevant request file for the current library and updates info with it"""
|
145 |
+
try:
|
146 |
+
with open(assessment_filepath, "r") as f:
|
147 |
+
request = json.load(f)["assessment"]
|
148 |
+
self.library_type = LibraryType.from_str(request.get("framework", ""))
|
149 |
+
self.stars = request.get("github_stars", 0)
|
150 |
+
except Exception as e:
|
151 |
+
print(e)
|
152 |
+
print(f"Could not find request file for {self.library_name} version {self.version}")
|
153 |
+
|
154 |
+
|
155 |
+
|
156 |
def get_request_file_for_library(requests_path, library_name, version):
|
157 |
"""Selects the correct request file for a given library. Only keeps runs tagged as FINISHED"""
|
158 |
# Try multiple naming patterns for flexibility
|
|
|
204 |
# Sort the files by date if they have date info
|
205 |
try:
|
206 |
files.sort(key=lambda x: datetime.fromisoformat(json.loads(open(os.path.join(root, x)).read())["assessment"]["completed_time"]), reverse=True)
|
207 |
+
except Exception as e:
|
208 |
+
print(e)
|
209 |
pass
|
210 |
|
211 |
for file in files:
|
|
|
215 |
for assessment_filepath in assessment_filepaths:
|
216 |
# Creation of result
|
217 |
assessment_result = AssessmentResult.init_from_json_file(assessment_filepath)
|
218 |
+
assessment_result.update_with_request_file(assessment_filepath)
|
219 |
|
220 |
# Store results of same eval together
|
221 |
assessment_id = assessment_result.assessment_id
|
src/populate.py
CHANGED
@@ -2,9 +2,8 @@
|
|
2 |
|
3 |
import pandas as pd
|
4 |
|
5 |
-
from src.display.utils import
|
6 |
from src.leaderboard.read_evals import get_raw_assessment_results
|
7 |
-
from src.about import Tasks
|
8 |
|
9 |
|
10 |
def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
|
@@ -35,8 +34,8 @@ def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_co
|
|
35 |
all_df[col] = 10.0 # Default to highest risk
|
36 |
|
37 |
# Sort by Trust Score (ascending - lower is better)
|
38 |
-
if
|
39 |
-
all_df = all_df.sort_values(by=[
|
40 |
|
41 |
return all_df
|
42 |
|
@@ -72,7 +71,7 @@ def get_evaluation_queue_df(eval_requests_path, eval_cols):
|
|
72 |
|
73 |
for file_path in request_files:
|
74 |
try:
|
75 |
-
with open(file_path, "r") as f:
|
76 |
data = json.load(f)
|
77 |
|
78 |
# Extract relevant fields
|
|
|
2 |
|
3 |
import pandas as pd
|
4 |
|
5 |
+
from src.display.utils import auto_eval_column_attrs
|
6 |
from src.leaderboard.read_evals import get_raw_assessment_results
|
|
|
7 |
|
8 |
|
9 |
def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
|
|
|
34 |
all_df[col] = 10.0 # Default to highest risk
|
35 |
|
36 |
# Sort by Trust Score (ascending - lower is better)
|
37 |
+
if auto_eval_column_attrs.overall_risk.name in all_df.columns:
|
38 |
+
all_df = all_df.sort_values(by=[auto_eval_column_attrs.overall_risk.name])
|
39 |
|
40 |
return all_df
|
41 |
|
|
|
71 |
|
72 |
for file_path in request_files:
|
73 |
try:
|
74 |
+
with open(file_path, "r", encoding="utf-8") as f:
|
75 |
data = json.load(f)
|
76 |
|
77 |
# Extract relevant fields
|