Spaces:
Running
Running
refactor: update table columns
#1
by
seonglae-holistic
- opened
- README.md +1 -3
- app.py +85 -79
- assessment-queue/pytorch_pytorch_eval_request_FINISHED_v2.1.0.json +2 -2
- assessment-queue/pytorch_pytorch_eval_request_timestamp_def456.json +2 -2
- assessment-results/agent_development_kit.json +4 -4
- assessment-results/browser_use.json +4 -4
- assessment-results/composio.json +4 -4
- assessment-results/crewai.json +4 -4
- assessment-results/{huggingface_transformers.json → huggingface_candle.json} +11 -11
- assessment-results/jax.json +5 -5
- assessment-results/langchain.json +3 -3
- assessment-results/langgraph.json +3 -3
- assessment-results/llamaindex.json +5 -5
- assessment-results/metagpt.json +5 -5
- assessment-results/onnx.json +4 -4
- assessment-results/pydantic_ai.json +3 -3
- assessment-results/pytorch.json +4 -4
- assessment-results/sglang.json +3 -3
- assessment-results/smolagents.json +5 -5
- assessment-results/stagehand.json +5 -5
- assessment-results/tensorflow.json +6 -6
- assessment-results/tensorrt.json +4 -4
- assessment-results/text_generation_inference.json +3 -3
- assessment-results/vllm.json +3 -3
- pyproject.toml +1 -25
- src/about.py +42 -37
- src/display/css_html_js.py +0 -8
- src/display/formatting.py +24 -8
- src/display/utils.py +51 -69
- src/envs.py +7 -16
- src/leaderboard/read_evals.py +43 -61
- src/populate.py +13 -11
- src/submission/check_validity.py +41 -5
- src/submission/submit.py +36 -9
- uv.lock +0 -0
README.md
CHANGED
@@ -7,7 +7,7 @@ sdk: gradio
|
|
7 |
app_file: app.py
|
8 |
pinned: true
|
9 |
license: mit
|
10 |
-
short_description:
|
11 |
sdk_version: 5.19.0
|
12 |
---
|
13 |
|
@@ -46,5 +46,3 @@ You'll find
|
|
46 |
- the main table' columns names and properties in `src/display/utils.py`
|
47 |
- the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
|
48 |
- the logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`
|
49 |
-
|
50 |
-
> **LibVulnWatch** was presented at the **ACL 2025 Student Research Workshop** and accepted to the **ICML 2025 Technical AI Governance workshop**. The system uncovers hidden security, licensing, maintenance, dependency and regulatory risks in popular AI libraries and publishes a public leaderboard for transparent ecosystem monitoring.
|
|
|
7 |
app_file: app.py
|
8 |
pinned: true
|
9 |
license: mit
|
10 |
+
short_description: Duplicate this leaderboard to initialize your own!
|
11 |
sdk_version: 5.19.0
|
12 |
---
|
13 |
|
|
|
46 |
- the main table' columns names and properties in `src/display/utils.py`
|
47 |
- the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
|
48 |
- the logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`
|
|
|
|
app.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
import gradio as gr
|
2 |
-
from gradio.components import Dataframe
|
3 |
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
|
4 |
import pandas as pd
|
5 |
from apscheduler.schedulers.background import BackgroundScheduler
|
|
|
6 |
import os
|
7 |
-
|
8 |
|
9 |
from src.about import (
|
10 |
CITATION_BUTTON_LABEL,
|
@@ -22,9 +22,12 @@ from src.display.utils import (
|
|
22 |
EVAL_COLS,
|
23 |
EVAL_TYPES,
|
24 |
AutoEvalColumn,
|
25 |
-
|
|
|
|
|
|
|
26 |
)
|
27 |
-
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, REPO_ID, LOCAL_MODE
|
28 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
29 |
from src.submission.submit import add_new_eval
|
30 |
|
@@ -48,50 +51,42 @@ def initialize_data_directories():
|
|
48 |
os.makedirs(EVAL_REQUESTS_PATH, exist_ok=True)
|
49 |
os.makedirs(EVAL_RESULTS_PATH, exist_ok=True)
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
# Initialize data
|
52 |
initialize_data_directories()
|
53 |
|
54 |
# Load data for leaderboard
|
55 |
LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
|
56 |
|
57 |
-
# Extract unique languages for filtering
|
58 |
-
def get_unique_languages(df):
|
59 |
-
"""Extract all unique individual languages from the Language column"""
|
60 |
-
if df.empty or auto_eval_column_attrs.language.name not in df.columns:
|
61 |
-
return []
|
62 |
-
|
63 |
-
all_languages = set()
|
64 |
-
for value in df[auto_eval_column_attrs.language.name].unique():
|
65 |
-
if isinstance(value, str):
|
66 |
-
if "/" in value:
|
67 |
-
languages = [lang.strip() for lang in value.split("/")]
|
68 |
-
all_languages.update(languages)
|
69 |
-
else:
|
70 |
-
all_languages.add(value.strip())
|
71 |
-
|
72 |
-
return sorted(list(all_languages))
|
73 |
-
|
74 |
-
# Create a mapping for language filtering
|
75 |
-
UNIQUE_LANGUAGES = get_unique_languages(LEADERBOARD_DF)
|
76 |
-
|
77 |
-
# Create a special column for individual language filtering
|
78 |
-
if not LEADERBOARD_DF.empty:
|
79 |
-
# Create a column that contains all individual languages as a list
|
80 |
-
LEADERBOARD_DF["_languages_list"] = LEADERBOARD_DF[auto_eval_column_attrs.language.name].apply(
|
81 |
-
lambda x: [lang.strip() for lang in str(x).split("/")] if pd.notna(x) else []
|
82 |
-
)
|
83 |
-
|
84 |
-
# Create a text version of Active Maintenance for checkboxgroup filtering
|
85 |
-
LEADERBOARD_DF["_maintenance_filter"] = LEADERBOARD_DF[auto_eval_column_attrs.availability.name].apply(
|
86 |
-
lambda x: "Active" if x else "Inactive"
|
87 |
-
)
|
88 |
-
|
89 |
# Load queue data
|
90 |
(
|
91 |
finished_eval_queue_df,
|
92 |
running_eval_queue_df,
|
93 |
pending_eval_queue_df,
|
94 |
-
rejected_eval_queue_df,
|
95 |
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
96 |
|
97 |
def init_leaderboard(dataframe):
|
@@ -99,53 +94,40 @@ def init_leaderboard(dataframe):
|
|
99 |
if dataframe is None or dataframe.empty:
|
100 |
# Create an empty dataframe with the expected columns
|
101 |
all_columns = COLS + [task.value.col_name for task in Tasks]
|
102 |
-
empty_df = pd.DataFrame(columns=
|
103 |
print("Warning: Leaderboard DataFrame is empty. Using empty dataframe.")
|
104 |
dataframe = empty_df
|
105 |
|
106 |
-
# Create filter columns list with proper typing
|
107 |
-
filter_columns = []
|
108 |
-
|
109 |
-
# 1. Library types
|
110 |
-
filter_columns.append(ColumnFilter(auto_eval_column_attrs.library_type.name, type="checkboxgroup", label="Library types"))
|
111 |
-
|
112 |
-
# 2. Programming Language (checkboxgroup - OR filtering)
|
113 |
-
filter_columns.append(ColumnFilter(auto_eval_column_attrs.language.name, type="checkboxgroup", label="Programming Language"))
|
114 |
-
|
115 |
-
# 3. GitHub Stars
|
116 |
-
filter_columns.append(ColumnFilter(
|
117 |
-
auto_eval_column_attrs.stars.name,
|
118 |
-
type="slider",
|
119 |
-
min=0,
|
120 |
-
max=50000,
|
121 |
-
label="GitHub Stars",
|
122 |
-
))
|
123 |
-
|
124 |
-
# 4. Maintenance Status (checkboxgroup - separate from languages)
|
125 |
-
filter_columns.append(ColumnFilter("_maintenance_filter", type="checkboxgroup", label="Maintenance Status"))
|
126 |
-
|
127 |
-
# Hide columns
|
128 |
-
hidden_columns = [getattr(auto_eval_column_attrs, field).name for field in AutoEvalColumn.model_fields if getattr(auto_eval_column_attrs, field).hidden]
|
129 |
-
hidden_columns.extend(["_languages_list", "_maintenance_filter", "_original_language"]) # Hide helper columns
|
130 |
-
|
131 |
return Leaderboard(
|
132 |
value=dataframe,
|
133 |
-
datatype=
|
134 |
select_columns=SelectColumns(
|
135 |
-
default_selection=[
|
136 |
-
cant_deselect=[
|
137 |
label="Select Columns to Display:",
|
138 |
),
|
139 |
-
search_columns=[
|
140 |
-
hide_columns=
|
141 |
-
filter_columns=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
bool_checkboxgroup_label="Filter libraries",
|
143 |
interactive=False,
|
144 |
)
|
145 |
|
146 |
|
147 |
-
demo = gr.Blocks(css=custom_css
|
148 |
-
# demo = gr.Blocks(css=custom_css, theme=Soft(font=["sans-serif"], font_mono=["monospace"]))
|
149 |
with demo:
|
150 |
gr.HTML(TITLE)
|
151 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
@@ -168,7 +150,7 @@ with demo:
|
|
168 |
open=False,
|
169 |
):
|
170 |
with gr.Row():
|
171 |
-
finished_eval_table = Dataframe(
|
172 |
value=finished_eval_queue_df,
|
173 |
headers=EVAL_COLS,
|
174 |
datatype=EVAL_TYPES,
|
@@ -179,7 +161,7 @@ with demo:
|
|
179 |
open=False,
|
180 |
):
|
181 |
with gr.Row():
|
182 |
-
running_eval_table = Dataframe(
|
183 |
value=running_eval_queue_df,
|
184 |
headers=EVAL_COLS,
|
185 |
datatype=EVAL_TYPES,
|
@@ -191,7 +173,7 @@ with demo:
|
|
191 |
open=False,
|
192 |
):
|
193 |
with gr.Row():
|
194 |
-
pending_eval_table = Dataframe(
|
195 |
value=pending_eval_queue_df,
|
196 |
headers=EVAL_COLS,
|
197 |
datatype=EVAL_TYPES,
|
@@ -202,7 +184,26 @@ with demo:
|
|
202 |
|
203 |
with gr.Row():
|
204 |
with gr.Column():
|
205 |
-
library_name_textbox = gr.Textbox(label="Library name")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
206 |
|
207 |
submit_button = gr.Button("Submit for Assessment")
|
208 |
submission_result = gr.Markdown()
|
@@ -210,18 +211,23 @@ with demo:
|
|
210 |
add_new_eval,
|
211 |
[
|
212 |
library_name_textbox,
|
|
|
|
|
|
|
|
|
|
|
213 |
],
|
214 |
submission_result,
|
215 |
)
|
216 |
|
217 |
with gr.Row():
|
218 |
-
with gr.Accordion("📙 Citation", open=
|
219 |
-
citation_button = gr.
|
220 |
value=CITATION_BUTTON_TEXT,
|
221 |
label=CITATION_BUTTON_LABEL,
|
222 |
-
lines=
|
223 |
elem_id="citation-button",
|
224 |
-
|
225 |
)
|
226 |
|
227 |
# Only schedule space restarts if not in local mode
|
|
|
1 |
import gradio as gr
|
|
|
2 |
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
|
3 |
import pandas as pd
|
4 |
from apscheduler.schedulers.background import BackgroundScheduler
|
5 |
+
from huggingface_hub import snapshot_download
|
6 |
import os
|
7 |
+
import shutil
|
8 |
|
9 |
from src.about import (
|
10 |
CITATION_BUTTON_LABEL,
|
|
|
22 |
EVAL_COLS,
|
23 |
EVAL_TYPES,
|
24 |
AutoEvalColumn,
|
25 |
+
LibraryType,
|
26 |
+
fields,
|
27 |
+
Language,
|
28 |
+
AssessmentStatus
|
29 |
)
|
30 |
+
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN, LOCAL_MODE
|
31 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
32 |
from src.submission.submit import add_new_eval
|
33 |
|
|
|
51 |
os.makedirs(EVAL_REQUESTS_PATH, exist_ok=True)
|
52 |
os.makedirs(EVAL_RESULTS_PATH, exist_ok=True)
|
53 |
|
54 |
+
if LOCAL_MODE:
|
55 |
+
print("Running in local mode, using local directories only")
|
56 |
+
return
|
57 |
+
|
58 |
+
# Try to download from HF if not in local mode
|
59 |
+
try:
|
60 |
+
print(f"Downloading request data from {QUEUE_REPO} to {EVAL_REQUESTS_PATH}")
|
61 |
+
snapshot_download(
|
62 |
+
repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset",
|
63 |
+
tqdm_class=None, etag_timeout=30, token=TOKEN
|
64 |
+
)
|
65 |
+
except Exception as e:
|
66 |
+
print(f"Failed to download request data: {e}")
|
67 |
+
print("Using local data only")
|
68 |
+
|
69 |
+
try:
|
70 |
+
print(f"Downloading result data from {RESULTS_REPO} to {EVAL_RESULTS_PATH}")
|
71 |
+
snapshot_download(
|
72 |
+
repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset",
|
73 |
+
tqdm_class=None, etag_timeout=30, token=TOKEN
|
74 |
+
)
|
75 |
+
except Exception as e:
|
76 |
+
print(f"Failed to download result data: {e}")
|
77 |
+
print("Using local data only")
|
78 |
+
|
79 |
# Initialize data
|
80 |
initialize_data_directories()
|
81 |
|
82 |
# Load data for leaderboard
|
83 |
LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
|
84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
# Load queue data
|
86 |
(
|
87 |
finished_eval_queue_df,
|
88 |
running_eval_queue_df,
|
89 |
pending_eval_queue_df,
|
|
|
90 |
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
91 |
|
92 |
def init_leaderboard(dataframe):
|
|
|
94 |
if dataframe is None or dataframe.empty:
|
95 |
# Create an empty dataframe with the expected columns
|
96 |
all_columns = COLS + [task.value.col_name for task in Tasks]
|
97 |
+
empty_df = pd.DataFrame(columns=all_columns)
|
98 |
print("Warning: Leaderboard DataFrame is empty. Using empty dataframe.")
|
99 |
dataframe = empty_df
|
100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
return Leaderboard(
|
102 |
value=dataframe,
|
103 |
+
datatype=[c.type for c in fields(AutoEvalColumn)],
|
104 |
select_columns=SelectColumns(
|
105 |
+
default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
|
106 |
+
cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
|
107 |
label="Select Columns to Display:",
|
108 |
),
|
109 |
+
search_columns=[AutoEvalColumn.library.name, AutoEvalColumn.license_name.name],
|
110 |
+
hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
|
111 |
+
filter_columns=[
|
112 |
+
ColumnFilter(AutoEvalColumn.library_type.name, type="checkboxgroup", label="Library types"),
|
113 |
+
ColumnFilter(AutoEvalColumn.language.name, type="checkboxgroup", label="Programming Language"),
|
114 |
+
ColumnFilter(
|
115 |
+
AutoEvalColumn.stars.name,
|
116 |
+
type="slider",
|
117 |
+
min=0,
|
118 |
+
max=50000,
|
119 |
+
label="GitHub Stars",
|
120 |
+
),
|
121 |
+
ColumnFilter(
|
122 |
+
AutoEvalColumn.availability.name, type="boolean", label="Show only active libraries", default=True
|
123 |
+
),
|
124 |
+
],
|
125 |
bool_checkboxgroup_label="Filter libraries",
|
126 |
interactive=False,
|
127 |
)
|
128 |
|
129 |
|
130 |
+
demo = gr.Blocks(css=custom_css)
|
|
|
131 |
with demo:
|
132 |
gr.HTML(TITLE)
|
133 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
|
|
150 |
open=False,
|
151 |
):
|
152 |
with gr.Row():
|
153 |
+
finished_eval_table = gr.components.Dataframe(
|
154 |
value=finished_eval_queue_df,
|
155 |
headers=EVAL_COLS,
|
156 |
datatype=EVAL_TYPES,
|
|
|
161 |
open=False,
|
162 |
):
|
163 |
with gr.Row():
|
164 |
+
running_eval_table = gr.components.Dataframe(
|
165 |
value=running_eval_queue_df,
|
166 |
headers=EVAL_COLS,
|
167 |
datatype=EVAL_TYPES,
|
|
|
173 |
open=False,
|
174 |
):
|
175 |
with gr.Row():
|
176 |
+
pending_eval_table = gr.components.Dataframe(
|
177 |
value=pending_eval_queue_df,
|
178 |
headers=EVAL_COLS,
|
179 |
datatype=EVAL_TYPES,
|
|
|
184 |
|
185 |
with gr.Row():
|
186 |
with gr.Column():
|
187 |
+
library_name_textbox = gr.Textbox(label="Library name (org/repo format)")
|
188 |
+
library_version_textbox = gr.Textbox(label="Version", placeholder="v1.0.0")
|
189 |
+
library_type = gr.Dropdown(
|
190 |
+
choices=[t.to_str(" : ") for t in LibraryType if t != LibraryType.Unknown],
|
191 |
+
label="Library type",
|
192 |
+
multiselect=False,
|
193 |
+
value=None,
|
194 |
+
interactive=True,
|
195 |
+
)
|
196 |
+
|
197 |
+
with gr.Column():
|
198 |
+
language = gr.Dropdown(
|
199 |
+
choices=[i.value.name for i in Language if i != Language.Other],
|
200 |
+
label="Programming Language",
|
201 |
+
multiselect=False,
|
202 |
+
value="Python",
|
203 |
+
interactive=True,
|
204 |
+
)
|
205 |
+
framework = gr.Textbox(label="Framework/Ecosystem (e.g., PyTorch, React)")
|
206 |
+
repository_url = gr.Textbox(label="Repository URL")
|
207 |
|
208 |
submit_button = gr.Button("Submit for Assessment")
|
209 |
submission_result = gr.Markdown()
|
|
|
211 |
add_new_eval,
|
212 |
[
|
213 |
library_name_textbox,
|
214 |
+
library_version_textbox,
|
215 |
+
repository_url,
|
216 |
+
language,
|
217 |
+
framework,
|
218 |
+
library_type,
|
219 |
],
|
220 |
submission_result,
|
221 |
)
|
222 |
|
223 |
with gr.Row():
|
224 |
+
with gr.Accordion("📙 Citation", open=False):
|
225 |
+
citation_button = gr.Textbox(
|
226 |
value=CITATION_BUTTON_TEXT,
|
227 |
label=CITATION_BUTTON_LABEL,
|
228 |
+
lines=20,
|
229 |
elem_id="citation-button",
|
230 |
+
show_copy_button=True,
|
231 |
)
|
232 |
|
233 |
# Only schedule space restarts if not in local mode
|
assessment-queue/pytorch_pytorch_eval_request_FINISHED_v2.1.0.json
CHANGED
@@ -3,8 +3,8 @@
|
|
3 |
"version": "v2.1.0",
|
4 |
"repository_url": "https://github.com/pytorch/pytorch",
|
5 |
"language": "Python",
|
6 |
-
"framework": "
|
7 |
-
"library_type": "
|
8 |
"license": "BSD-3",
|
9 |
"stars": 72300,
|
10 |
"status": "FINISHED",
|
|
|
3 |
"version": "v2.1.0",
|
4 |
"repository_url": "https://github.com/pytorch/pytorch",
|
5 |
"language": "Python",
|
6 |
+
"framework": "Machine Learning",
|
7 |
+
"library_type": "machine learning",
|
8 |
"license": "BSD-3",
|
9 |
"stars": 72300,
|
10 |
"status": "FINISHED",
|
assessment-queue/pytorch_pytorch_eval_request_timestamp_def456.json
CHANGED
@@ -3,8 +3,8 @@
|
|
3 |
"version": "v2.1.0",
|
4 |
"repository_url": "https://github.com/pytorch/pytorch",
|
5 |
"language": "Python",
|
6 |
-
"framework": "
|
7 |
-
"library_type": "
|
8 |
"license": "BSD-3",
|
9 |
"stars": 72300,
|
10 |
"status": "FINISHED",
|
|
|
3 |
"version": "v2.1.0",
|
4 |
"repository_url": "https://github.com/pytorch/pytorch",
|
5 |
"language": "Python",
|
6 |
+
"framework": "Machine Learning",
|
7 |
+
"library_type": "machine learning",
|
8 |
"license": "BSD-3",
|
9 |
"stars": 72300,
|
10 |
"status": "FINISHED",
|
assessment-results/agent_development_kit.json
CHANGED
@@ -1,15 +1,15 @@
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
-
"library_name": "
|
4 |
-
"version": "
|
5 |
"language": "Python",
|
6 |
"framework": "Agent Framework",
|
7 |
"completed_time": "2024-06-07T12:00:00Z",
|
8 |
"last_updated": "2024-06-07T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
-
"report_url": "https://
|
12 |
-
"repository_url": "https://github.com/
|
13 |
"github_stars": 3800,
|
14 |
"license": "MIT",
|
15 |
"scores": {
|
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
+
"library_name": "microsoft/agent-development-kit",
|
4 |
+
"version": "v0.2.0",
|
5 |
"language": "Python",
|
6 |
"framework": "Agent Framework",
|
7 |
"completed_time": "2024-06-07T12:00:00Z",
|
8 |
"last_updated": "2024-06-07T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
+
"report_url": "https://github.com/LibVulnWatch/reports/raw/main/agent_development_kit.html",
|
12 |
+
"repository_url": "https://github.com/microsoft/agent-development-kit",
|
13 |
"github_stars": 3800,
|
14 |
"license": "MIT",
|
15 |
"scores": {
|
assessment-results/browser_use.json
CHANGED
@@ -1,15 +1,15 @@
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
-
"library_name": "
|
4 |
-
"version": "v0.
|
5 |
"language": "Python",
|
6 |
"framework": "Agent Framework",
|
7 |
"completed_time": "2024-06-09T12:00:00Z",
|
8 |
"last_updated": "2024-06-09T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
-
"report_url": "https://
|
12 |
-
"repository_url": "https://github.com/
|
13 |
"github_stars": 3200,
|
14 |
"license": "MIT",
|
15 |
"scores": {
|
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
+
"library_name": "langchain-ai/browser-use",
|
4 |
+
"version": "v0.5.1",
|
5 |
"language": "Python",
|
6 |
"framework": "Agent Framework",
|
7 |
"completed_time": "2024-06-09T12:00:00Z",
|
8 |
"last_updated": "2024-06-09T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
+
"report_url": "https://github.com/LibVulnWatch/reports/raw/main/browser_use.html",
|
12 |
+
"repository_url": "https://github.com/langchain-ai/browser-use",
|
13 |
"github_stars": 3200,
|
14 |
"license": "MIT",
|
15 |
"scores": {
|
assessment-results/composio.json
CHANGED
@@ -1,15 +1,15 @@
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
-
"library_name": "
|
4 |
-
"version": "v0.
|
5 |
"language": "Python",
|
6 |
"framework": "Agent Framework",
|
7 |
"completed_time": "2024-06-10T12:00:00Z",
|
8 |
"last_updated": "2024-06-10T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
-
"report_url": "https://
|
12 |
-
"repository_url": "https://github.com/
|
13 |
"github_stars": 1200,
|
14 |
"license": "MIT",
|
15 |
"scores": {
|
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
+
"library_name": "ComposableAI/composio",
|
4 |
+
"version": "v0.4.2",
|
5 |
"language": "Python",
|
6 |
"framework": "Agent Framework",
|
7 |
"completed_time": "2024-06-10T12:00:00Z",
|
8 |
"last_updated": "2024-06-10T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
+
"report_url": "https://github.com/LibVulnWatch/reports/raw/main/composio.html",
|
12 |
+
"repository_url": "https://github.com/ComposableAI/composio",
|
13 |
"github_stars": 1200,
|
14 |
"license": "MIT",
|
15 |
"scores": {
|
assessment-results/crewai.json
CHANGED
@@ -1,15 +1,15 @@
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
-
"library_name": "
|
4 |
-
"version": "v0.
|
5 |
"language": "Python",
|
6 |
"framework": "Agent Framework",
|
7 |
"completed_time": "2024-06-15T12:00:00Z",
|
8 |
"last_updated": "2024-06-15T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
-
"report_url": "https://
|
12 |
-
"repository_url": "https://github.com/
|
13 |
"github_stars": 8200,
|
14 |
"license": "MIT",
|
15 |
"scores": {
|
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
+
"library_name": "joaomdmoura/crewAI",
|
4 |
+
"version": "v0.9.4",
|
5 |
"language": "Python",
|
6 |
"framework": "Agent Framework",
|
7 |
"completed_time": "2024-06-15T12:00:00Z",
|
8 |
"last_updated": "2024-06-15T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
+
"report_url": "https://github.com/LibVulnWatch/reports/raw/main/crewai.html",
|
12 |
+
"repository_url": "https://github.com/joaomdmoura/crewAI",
|
13 |
"github_stars": 8200,
|
14 |
"license": "MIT",
|
15 |
"scores": {
|
assessment-results/{huggingface_transformers.json → huggingface_candle.json}
RENAMED
@@ -1,23 +1,23 @@
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
-
"library_name": "huggingface/
|
4 |
-
"version": "
|
5 |
-
"language": "
|
6 |
-
"framework": "
|
7 |
"completed_time": "2024-06-22T12:00:00Z",
|
8 |
"last_updated": "2024-06-22T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
-
"report_url": "https://
|
12 |
-
"repository_url": "https://github.com/huggingface/
|
13 |
-
"github_stars":
|
14 |
"license": "Apache-2.0",
|
15 |
"scores": {
|
16 |
-
"license_validation":
|
17 |
-
"security_assessment":
|
18 |
-
"maintenance_health":
|
19 |
"dependency_management": 1,
|
20 |
-
"regulatory_compliance":
|
21 |
},
|
22 |
"details": {
|
23 |
"license_validation": {
|
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
+
"library_name": "huggingface/candle",
|
4 |
+
"version": "v0.3.2",
|
5 |
+
"language": "Rust",
|
6 |
+
"framework": "Machine Learning",
|
7 |
"completed_time": "2024-06-22T12:00:00Z",
|
8 |
"last_updated": "2024-06-22T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
+
"report_url": "https://github.com/LibVulnWatch/reports/raw/main/huggingface_candle.html",
|
12 |
+
"repository_url": "https://github.com/huggingface/candle",
|
13 |
+
"github_stars": 12500,
|
14 |
"license": "Apache-2.0",
|
15 |
"scores": {
|
16 |
+
"license_validation": 4,
|
17 |
+
"security_assessment": 2,
|
18 |
+
"maintenance_health": 2,
|
19 |
"dependency_management": 1,
|
20 |
+
"regulatory_compliance": 2
|
21 |
},
|
22 |
"details": {
|
23 |
"license_validation": {
|
assessment-results/jax.json
CHANGED
@@ -1,16 +1,16 @@
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
-
"library_name": "
|
4 |
"version": "v0.4.23",
|
5 |
"language": "Python",
|
6 |
-
"framework": "
|
7 |
"completed_time": "2024-06-24T12:00:00Z",
|
8 |
"last_updated": "2024-06-24T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
-
"report_url": "https://
|
12 |
-
"repository_url": "https://github.com/
|
13 |
-
"github_stars":
|
14 |
"license": "Apache-2.0",
|
15 |
"scores": {
|
16 |
"license_validation": 5,
|
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
+
"library_name": "google/jax",
|
4 |
"version": "v0.4.23",
|
5 |
"language": "Python",
|
6 |
+
"framework": "Machine Learning",
|
7 |
"completed_time": "2024-06-24T12:00:00Z",
|
8 |
"last_updated": "2024-06-24T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
+
"report_url": "https://github.com/LibVulnWatch/reports/raw/main/jax.html",
|
12 |
+
"repository_url": "https://github.com/google/jax",
|
13 |
+
"github_stars": 36000,
|
14 |
"license": "Apache-2.0",
|
15 |
"scores": {
|
16 |
"license_validation": 5,
|
assessment-results/langchain.json
CHANGED
@@ -1,16 +1,16 @@
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
"library_name": "langchain-ai/langchain",
|
4 |
-
"version": "v0.
|
5 |
"language": "Python",
|
6 |
"framework": "LLM Orchestration",
|
7 |
"completed_time": "2024-06-17T12:00:00Z",
|
8 |
"last_updated": "2024-06-17T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
-
"report_url": "https://
|
12 |
"repository_url": "https://github.com/langchain-ai/langchain",
|
13 |
-
"github_stars":
|
14 |
"license": "MIT",
|
15 |
"scores": {
|
16 |
"license_validation": 5,
|
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
"library_name": "langchain-ai/langchain",
|
4 |
+
"version": "v0.1.0",
|
5 |
"language": "Python",
|
6 |
"framework": "LLM Orchestration",
|
7 |
"completed_time": "2024-06-17T12:00:00Z",
|
8 |
"last_updated": "2024-06-17T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
+
"report_url": "https://github.com/LibVulnWatch/reports/raw/main/langchain.html",
|
12 |
"repository_url": "https://github.com/langchain-ai/langchain",
|
13 |
+
"github_stars": 79000,
|
14 |
"license": "MIT",
|
15 |
"scores": {
|
16 |
"license_validation": 5,
|
assessment-results/langgraph.json
CHANGED
@@ -1,16 +1,16 @@
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
"library_name": "langchain-ai/langgraph",
|
4 |
-
"version": "
|
5 |
"language": "Python",
|
6 |
"framework": "Agent Framework",
|
7 |
"completed_time": "2024-06-13T12:00:00Z",
|
8 |
"last_updated": "2024-06-13T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
-
"report_url": "https://
|
12 |
"repository_url": "https://github.com/langchain-ai/langgraph",
|
13 |
-
"github_stars":
|
14 |
"license": "Proprietary",
|
15 |
"scores": {
|
16 |
"license_validation": 1,
|
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
"library_name": "langchain-ai/langgraph",
|
4 |
+
"version": "v0.0.20",
|
5 |
"language": "Python",
|
6 |
"framework": "Agent Framework",
|
7 |
"completed_time": "2024-06-13T12:00:00Z",
|
8 |
"last_updated": "2024-06-13T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
+
"report_url": "https://github.com/LibVulnWatch/reports/raw/main/langgraph.html",
|
12 |
"repository_url": "https://github.com/langchain-ai/langgraph",
|
13 |
+
"github_stars": 4500,
|
14 |
"license": "Proprietary",
|
15 |
"scores": {
|
16 |
"license_validation": 1,
|
assessment-results/llamaindex.json
CHANGED
@@ -1,16 +1,16 @@
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
-
"library_name": "
|
4 |
-
"version": "v0.
|
5 |
"language": "Python",
|
6 |
"framework": "LLM Orchestration",
|
7 |
"completed_time": "2024-06-20T12:00:00Z",
|
8 |
"last_updated": "2024-06-20T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
-
"report_url": "https://
|
12 |
-
"repository_url": "https://github.com/
|
13 |
-
"github_stars":
|
14 |
"license": "MIT",
|
15 |
"scores": {
|
16 |
"license_validation": 4,
|
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
+
"library_name": "jerryjliu/llama_index",
|
4 |
+
"version": "v0.9.14",
|
5 |
"language": "Python",
|
6 |
"framework": "LLM Orchestration",
|
7 |
"completed_time": "2024-06-20T12:00:00Z",
|
8 |
"last_updated": "2024-06-20T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
+
"report_url": "https://github.com/LibVulnWatch/reports/raw/main/llamaindex.html",
|
12 |
+
"repository_url": "https://github.com/jerryjliu/llama_index",
|
13 |
+
"github_stars": 27000,
|
14 |
"license": "MIT",
|
15 |
"scores": {
|
16 |
"license_validation": 4,
|
assessment-results/metagpt.json
CHANGED
@@ -1,16 +1,16 @@
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
-
"library_name": "
|
4 |
-
"version": "v0.
|
5 |
"language": "Python",
|
6 |
"framework": "Agent Framework",
|
7 |
"completed_time": "2024-06-14T12:00:00Z",
|
8 |
"last_updated": "2024-06-14T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
-
"report_url": "https://
|
12 |
-
"repository_url": "https://github.com/
|
13 |
-
"github_stars":
|
14 |
"license": "MIT",
|
15 |
"scores": {
|
16 |
"license_validation": 4,
|
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
+
"library_name": "geekan/MetaGPT",
|
4 |
+
"version": "v0.7.0",
|
5 |
"language": "Python",
|
6 |
"framework": "Agent Framework",
|
7 |
"completed_time": "2024-06-14T12:00:00Z",
|
8 |
"last_updated": "2024-06-14T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
+
"report_url": "https://github.com/LibVulnWatch/reports/raw/main/metagpt.html",
|
12 |
+
"repository_url": "https://github.com/geekan/MetaGPT",
|
13 |
+
"github_stars": 32500,
|
14 |
"license": "MIT",
|
15 |
"scores": {
|
16 |
"license_validation": 4,
|
assessment-results/onnx.json
CHANGED
@@ -1,16 +1,16 @@
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
"library_name": "onnx/onnx",
|
4 |
-
"version": "v1.
|
5 |
"language": "C++/Python",
|
6 |
-
"framework": "
|
7 |
"completed_time": "2024-06-22T11:00:00Z",
|
8 |
"last_updated": "2024-06-22T11:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
-
"report_url": "https://
|
12 |
"repository_url": "https://github.com/onnx/onnx",
|
13 |
-
"github_stars":
|
14 |
"license": "MIT",
|
15 |
"scores": {
|
16 |
"license_validation": 4,
|
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
"library_name": "onnx/onnx",
|
4 |
+
"version": "v1.15.0",
|
5 |
"language": "C++/Python",
|
6 |
+
"framework": "Machine Learning",
|
7 |
"completed_time": "2024-06-22T11:00:00Z",
|
8 |
"last_updated": "2024-06-22T11:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
+
"report_url": "https://github.com/LibVulnWatch/reports/raw/main/onnx.html",
|
12 |
"repository_url": "https://github.com/onnx/onnx",
|
13 |
+
"github_stars": 16200,
|
14 |
"license": "MIT",
|
15 |
"scores": {
|
16 |
"license_validation": 4,
|
assessment-results/pydantic_ai.json
CHANGED
@@ -1,16 +1,16 @@
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
"library_name": "pydantic/pydantic-ai",
|
4 |
-
"version": "v0.
|
5 |
"language": "Python",
|
6 |
"framework": "Agent Framework",
|
7 |
"completed_time": "2024-06-08T12:00:00Z",
|
8 |
"last_updated": "2024-06-08T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
-
"report_url": "https://
|
12 |
"repository_url": "https://github.com/pydantic/pydantic-ai",
|
13 |
-
"github_stars":
|
14 |
"license": "MIT",
|
15 |
"scores": {
|
16 |
"license_validation": 5,
|
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
"library_name": "pydantic/pydantic-ai",
|
4 |
+
"version": "v0.7.0",
|
5 |
"language": "Python",
|
6 |
"framework": "Agent Framework",
|
7 |
"completed_time": "2024-06-08T12:00:00Z",
|
8 |
"last_updated": "2024-06-08T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
+
"report_url": "https://github.com/LibVulnWatch/reports/raw/main/pydantic_ai.html",
|
12 |
"repository_url": "https://github.com/pydantic/pydantic-ai",
|
13 |
+
"github_stars": 5800,
|
14 |
"license": "MIT",
|
15 |
"scores": {
|
16 |
"license_validation": 5,
|
assessment-results/pytorch.json
CHANGED
@@ -1,16 +1,16 @@
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
"library_name": "pytorch/pytorch",
|
4 |
-
"version": "v2.
|
5 |
"language": "C++/Python",
|
6 |
-
"framework": "
|
7 |
"completed_time": "2024-06-25T12:00:00Z",
|
8 |
"last_updated": "2024-06-25T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
-
"report_url": "https://
|
12 |
"repository_url": "https://github.com/pytorch/pytorch",
|
13 |
-
"github_stars":
|
14 |
"license": "BSD-3-Clause",
|
15 |
"scores": {
|
16 |
"license_validation": 5,
|
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
"library_name": "pytorch/pytorch",
|
4 |
+
"version": "v2.2.1",
|
5 |
"language": "C++/Python",
|
6 |
+
"framework": "Machine Learning",
|
7 |
"completed_time": "2024-06-25T12:00:00Z",
|
8 |
"last_updated": "2024-06-25T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
+
"report_url": "https://github.com/LibVulnWatch/reports/raw/main/pytorch.html",
|
12 |
"repository_url": "https://github.com/pytorch/pytorch",
|
13 |
+
"github_stars": 74500,
|
14 |
"license": "BSD-3-Clause",
|
15 |
"scores": {
|
16 |
"license_validation": 5,
|
assessment-results/sglang.json
CHANGED
@@ -1,16 +1,16 @@
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
"library_name": "sgl-project/sglang",
|
4 |
-
"version": "v0.
|
5 |
"language": "Python/C++",
|
6 |
"framework": "LLM Inference",
|
7 |
"completed_time": "2024-06-19T12:00:00Z",
|
8 |
"last_updated": "2024-06-19T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
-
"report_url": "https://
|
12 |
"repository_url": "https://github.com/sgl-project/sglang",
|
13 |
-
"github_stars":
|
14 |
"license": "Apache-2.0",
|
15 |
"scores": {
|
16 |
"license_validation": 4,
|
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
"library_name": "sgl-project/sglang",
|
4 |
+
"version": "v0.1.8",
|
5 |
"language": "Python/C++",
|
6 |
"framework": "LLM Inference",
|
7 |
"completed_time": "2024-06-19T12:00:00Z",
|
8 |
"last_updated": "2024-06-19T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
+
"report_url": "https://github.com/LibVulnWatch/reports/raw/main/sglang.html",
|
12 |
"repository_url": "https://github.com/sgl-project/sglang",
|
13 |
+
"github_stars": 4800,
|
14 |
"license": "Apache-2.0",
|
15 |
"scores": {
|
16 |
"license_validation": 4,
|
assessment-results/smolagents.json
CHANGED
@@ -1,16 +1,16 @@
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
-
"library_name": "
|
4 |
-
"version": "
|
5 |
"language": "Python",
|
6 |
"framework": "Agent Framework",
|
7 |
"completed_time": "2024-06-12T12:00:00Z",
|
8 |
"last_updated": "2024-06-12T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
-
"report_url": "https://
|
12 |
-
"repository_url": "https://github.com/
|
13 |
-
"github_stars":
|
14 |
"license": "MIT",
|
15 |
"scores": {
|
16 |
"license_validation": 4,
|
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
+
"library_name": "tinygrad/SmolAgents",
|
4 |
+
"version": "v0.2.1",
|
5 |
"language": "Python",
|
6 |
"framework": "Agent Framework",
|
7 |
"completed_time": "2024-06-12T12:00:00Z",
|
8 |
"last_updated": "2024-06-12T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
+
"report_url": "https://github.com/LibVulnWatch/reports/raw/main/smolagents.html",
|
12 |
+
"repository_url": "https://github.com/tinygrad/SmolAgents",
|
13 |
+
"github_stars": 2800,
|
14 |
"license": "MIT",
|
15 |
"scores": {
|
16 |
"license_validation": 4,
|
assessment-results/stagehand.json
CHANGED
@@ -1,16 +1,16 @@
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
-
"library_name": "
|
4 |
-
"version": "
|
5 |
"language": "Python",
|
6 |
"framework": "Agent Framework",
|
7 |
"completed_time": "2024-06-11T12:00:00Z",
|
8 |
"last_updated": "2024-06-11T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
-
"report_url": "https://
|
12 |
-
"repository_url": "https://github.com/
|
13 |
-
"github_stars":
|
14 |
"license": "Apache-2.0 with Commons Clause",
|
15 |
"scores": {
|
16 |
"license_validation": 3,
|
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
+
"library_name": "langchain-ai/stagehand",
|
4 |
+
"version": "v0.0.12",
|
5 |
"language": "Python",
|
6 |
"framework": "Agent Framework",
|
7 |
"completed_time": "2024-06-11T12:00:00Z",
|
8 |
"last_updated": "2024-06-11T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
+
"report_url": "https://github.com/LibVulnWatch/reports/raw/main/stagehand.html",
|
12 |
+
"repository_url": "https://github.com/langchain-ai/stagehand",
|
13 |
+
"github_stars": 1500,
|
14 |
"license": "Apache-2.0 with Commons Clause",
|
15 |
"scores": {
|
16 |
"license_validation": 3,
|
assessment-results/tensorflow.json
CHANGED
@@ -1,22 +1,22 @@
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
"library_name": "tensorflow/tensorflow",
|
4 |
-
"version": "v2.
|
5 |
"language": "C++/Python",
|
6 |
-
"framework": "
|
7 |
"completed_time": "2024-06-23T12:00:00Z",
|
8 |
"last_updated": "2024-06-23T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
-
"report_url": "https://
|
12 |
"repository_url": "https://github.com/tensorflow/tensorflow",
|
13 |
-
"github_stars":
|
14 |
"license": "Apache-2.0",
|
15 |
"scores": {
|
16 |
"license_validation": 5,
|
17 |
"security_assessment": 1,
|
18 |
-
|
19 |
-
|
20 |
"regulatory_compliance": 3
|
21 |
},
|
22 |
"details": {
|
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
"library_name": "tensorflow/tensorflow",
|
4 |
+
"version": "v2.15.0",
|
5 |
"language": "C++/Python",
|
6 |
+
"framework": "Machine Learning",
|
7 |
"completed_time": "2024-06-23T12:00:00Z",
|
8 |
"last_updated": "2024-06-23T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
+
"report_url": "https://github.com/LibVulnWatch/reports/raw/main/tensorflow.html",
|
12 |
"repository_url": "https://github.com/tensorflow/tensorflow",
|
13 |
+
"github_stars": 182000,
|
14 |
"license": "Apache-2.0",
|
15 |
"scores": {
|
16 |
"license_validation": 5,
|
17 |
"security_assessment": 1,
|
18 |
+
"maintenance_health": 3,
|
19 |
+
"dependency_management": 1,
|
20 |
"regulatory_compliance": 3
|
21 |
},
|
22 |
"details": {
|
assessment-results/tensorrt.json
CHANGED
@@ -1,16 +1,16 @@
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
"library_name": "nvidia/TensorRT",
|
4 |
-
"version": "
|
5 |
"language": "C++/Python",
|
6 |
-
"framework": "
|
7 |
"completed_time": "2024-06-21T12:00:00Z",
|
8 |
"last_updated": "2024-06-21T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
-
"report_url": "https://
|
12 |
"repository_url": "https://github.com/NVIDIA/TensorRT",
|
13 |
-
"github_stars":
|
14 |
"license": "Proprietary with Open Components",
|
15 |
"scores": {
|
16 |
"license_validation": 3,
|
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
"library_name": "nvidia/TensorRT",
|
4 |
+
"version": "v9.1.0",
|
5 |
"language": "C++/Python",
|
6 |
+
"framework": "Machine Learning Inference",
|
7 |
"completed_time": "2024-06-21T12:00:00Z",
|
8 |
"last_updated": "2024-06-21T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
+
"report_url": "https://github.com/LibVulnWatch/reports/raw/main/tensorrt.html",
|
12 |
"repository_url": "https://github.com/NVIDIA/TensorRT",
|
13 |
+
"github_stars": 8500,
|
14 |
"license": "Proprietary with Open Components",
|
15 |
"scores": {
|
16 |
"license_validation": 3,
|
assessment-results/text_generation_inference.json
CHANGED
@@ -1,16 +1,16 @@
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
"library_name": "huggingface/text-generation-inference",
|
4 |
-
"version": "
|
5 |
"language": "Rust/Python",
|
6 |
"framework": "LLM Inference",
|
7 |
"completed_time": "2024-06-16T12:00:00Z",
|
8 |
"last_updated": "2024-06-16T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
-
"report_url": "https://
|
12 |
"repository_url": "https://github.com/huggingface/text-generation-inference",
|
13 |
-
"github_stars":
|
14 |
"license": "Apache-2.0",
|
15 |
"scores": {
|
16 |
"license_validation": 3,
|
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
"library_name": "huggingface/text-generation-inference",
|
4 |
+
"version": "v1.1.0",
|
5 |
"language": "Rust/Python",
|
6 |
"framework": "LLM Inference",
|
7 |
"completed_time": "2024-06-16T12:00:00Z",
|
8 |
"last_updated": "2024-06-16T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
+
"report_url": "https://github.com/LibVulnWatch/reports/raw/main/text_generation_inference.html",
|
12 |
"repository_url": "https://github.com/huggingface/text-generation-inference",
|
13 |
+
"github_stars": 5600,
|
14 |
"license": "Apache-2.0",
|
15 |
"scores": {
|
16 |
"license_validation": 3,
|
assessment-results/vllm.json
CHANGED
@@ -1,16 +1,16 @@
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
"library_name": "vllm-project/vllm",
|
4 |
-
"version": "v0.
|
5 |
"language": "Python/CUDA",
|
6 |
"framework": "LLM Inference",
|
7 |
"completed_time": "2024-06-18T12:00:00Z",
|
8 |
"last_updated": "2024-06-18T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
-
"report_url": "https://
|
12 |
"repository_url": "https://github.com/vllm-project/vllm",
|
13 |
-
"github_stars":
|
14 |
"license": "Apache-2.0",
|
15 |
"scores": {
|
16 |
"license_validation": 4,
|
|
|
1 |
{
|
2 |
"assessment": {
|
3 |
"library_name": "vllm-project/vllm",
|
4 |
+
"version": "v0.3.0",
|
5 |
"language": "Python/CUDA",
|
6 |
"framework": "LLM Inference",
|
7 |
"completed_time": "2024-06-18T12:00:00Z",
|
8 |
"last_updated": "2024-06-18T12:00:00Z",
|
9 |
"active_maintenance": true,
|
10 |
"independently_verified": true,
|
11 |
+
"report_url": "https://github.com/LibVulnWatch/reports/raw/main/vllm.html",
|
12 |
"repository_url": "https://github.com/vllm-project/vllm",
|
13 |
+
"github_stars": 12800,
|
14 |
"license": "Apache-2.0",
|
15 |
"scores": {
|
16 |
"license_validation": 4,
|
pyproject.toml
CHANGED
@@ -1,33 +1,9 @@
|
|
1 |
-
[project]
|
2 |
-
name = "libvulnwatchleaderboard"
|
3 |
-
version = "0.1.0"
|
4 |
-
description = "A vulnerability assessment leaderboard for libraries"
|
5 |
-
requires-python = ">=3.8"
|
6 |
-
dependencies = [
|
7 |
-
"APScheduler",
|
8 |
-
"black",
|
9 |
-
"datasets",
|
10 |
-
"gradio",
|
11 |
-
"gradio[oauth]",
|
12 |
-
"gradio_leaderboard==0.0.13",
|
13 |
-
"gradio_client",
|
14 |
-
"huggingface-hub>=0.18.0",
|
15 |
-
"matplotlib",
|
16 |
-
"numpy",
|
17 |
-
"pandas",
|
18 |
-
"python-dateutil",
|
19 |
-
"tqdm",
|
20 |
-
"transformers",
|
21 |
-
"tokenizers>=0.15.0",
|
22 |
-
"sentencepiece",
|
23 |
-
]
|
24 |
-
|
25 |
[tool.ruff]
|
26 |
# Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
|
27 |
select = ["E", "F"]
|
28 |
ignore = ["E501"] # line too long (black is taking care of this)
|
29 |
line-length = 119
|
30 |
-
fixable = ["A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W", "ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TID", "TRY", "UP", "YTT"]
|
31 |
|
32 |
[tool.isort]
|
33 |
profile = "black"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
[tool.ruff]
|
2 |
# Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
|
3 |
select = ["E", "F"]
|
4 |
ignore = ["E501"] # line too long (black is taking care of this)
|
5 |
line-length = 119
|
6 |
+
fixable = ["A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W", "ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP", "YTT"]
|
7 |
|
8 |
[tool.isort]
|
9 |
profile = "black"
|
src/about.py
CHANGED
@@ -12,11 +12,11 @@ class Task:
|
|
12 |
# ---------------------------------------------------
|
13 |
class Tasks(Enum):
|
14 |
# Risk domains from LibVulnWatch paper
|
15 |
-
license = Task("license_validation", "score", "License
|
16 |
-
security = Task("security_assessment", "score", "Security
|
17 |
-
maintenance = Task("maintenance_health", "score", "Maintenance
|
18 |
-
dependency = Task("dependency_management", "score", "Dependency
|
19 |
-
regulatory = Task("regulatory_compliance", "score", "Regulatory
|
20 |
|
21 |
NUM_FEWSHOT = 0 # Not relevant for vulnerability assessment
|
22 |
# ---------------------------------------------------
|
@@ -28,32 +28,44 @@ TITLE = """<h1 align="center" id="space-title">LibVulnWatch: Vulnerability Asses
|
|
28 |
|
29 |
# What does your leaderboard evaluate?
|
30 |
INTRODUCTION_TEXT = """
|
31 |
-
##
|
32 |
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
-
|
36 |
-
• **Security Assessment** – CVEs, patch latency, exploit primitives
|
37 |
-
• **Maintenance Health** – bus-factor, release cadence, contributor diversity
|
38 |
-
• **Dependency Management** – transitive risk, SBOM completeness
|
39 |
-
• **Regulatory Compliance** – privacy/export controls, policy documentation
|
40 |
-
|
41 |
-
In the paper we apply the framework to **20 popular libraries**, achieving **88 % coverage of OpenSSF Scorecard checks** and surfacing **up to 19 previously-unreported risks per library**.
|
42 |
-
Lower scores indicate lower risk, and the **Trust Score** is the equal-weight average of the five domains.
|
43 |
"""
|
44 |
|
45 |
# Which evaluations are you running? how can people reproduce what you have?
|
46 |
-
LLM_BENCHMARKS_TEXT = """
|
47 |
-
##
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
-
|
|
|
|
|
|
|
50 |
|
51 |
-
|
52 |
-
|
53 |
-
3️⃣ **Metadata agents** – GitHub mining, release-cadence modelling, community health
|
54 |
-
4️⃣ **Policy agents** – mapping evidence to NIST SSDF, EU AI Act, and related frameworks
|
55 |
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
"""
|
58 |
|
59 |
EVALUATION_QUEUE_TEXT = """
|
@@ -84,18 +96,11 @@ If your library shows as "FAILED" in the assessment queue, check that:
|
|
84 |
"""
|
85 |
|
86 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
87 |
-
CITATION_BUTTON_TEXT = r"""
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
}
|
94 |
-
|
95 |
-
@inproceedings{anonymous2025libvulnwatch,
|
96 |
-
title={LibVulnWatch: A Deep Assessment Agent System and Leaderboard for Uncovering Hidden Vulnerabilities in Open-Source {AI} Libraries},
|
97 |
-
author={Zekun Wu and Seonglae Cho and Umar Mohammed and CRISTIAN ENRIQUE MUNOZ VILLALOBOS and Kleyton Da Costa and Xin Guan and Theo King and Ze Wang and Emre Kazim and Adriano Koshiyama},
|
98 |
-
booktitle={ICML Workshop on Technical AI Governance (TAIG)},
|
99 |
-
year={2025},
|
100 |
-
url={https://openreview.net/forum?id=MHhrr8QHgR}
|
101 |
-
}"""
|
|
|
12 |
# ---------------------------------------------------
|
13 |
class Tasks(Enum):
|
14 |
# Risk domains from LibVulnWatch paper
|
15 |
+
license = Task("license_validation", "score", "License Risk")
|
16 |
+
security = Task("security_assessment", "score", "Security Risk")
|
17 |
+
maintenance = Task("maintenance_health", "score", "Maintenance Risk")
|
18 |
+
dependency = Task("dependency_management", "score", "Dependency Risk")
|
19 |
+
regulatory = Task("regulatory_compliance", "score", "Regulatory Risk")
|
20 |
|
21 |
NUM_FEWSHOT = 0 # Not relevant for vulnerability assessment
|
22 |
# ---------------------------------------------------
|
|
|
28 |
|
29 |
# What does your leaderboard evaluate?
|
30 |
INTRODUCTION_TEXT = """
|
31 |
+
## Systematic Vulnerability Assessment and Leaderboard Tracking for Open-Source AI Libraries
|
32 |
|
33 |
+
This leaderboard provides continuous vulnerability assessment for open-source AI libraries across five critical risk domains:
|
34 |
+
- **License Validation**: Legal risks based on license type, compatibility, and requirements
|
35 |
+
- **Security Assessment**: Vulnerability severity and patch responsiveness
|
36 |
+
- **Maintenance Health**: Sustainability and governance practices
|
37 |
+
- **Dependency Management**: Vulnerability inheritance and supply chain security
|
38 |
+
- **Regulatory Compliance**: Compliance readiness for various frameworks
|
39 |
|
40 |
+
Lower scores indicate fewer vulnerabilities and lower risk. The Trust Score is an equal-weighted average of all five domains, providing a balanced assessment of overall library trustworthiness.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
"""
|
42 |
|
43 |
# Which evaluations are you running? how can people reproduce what you have?
|
44 |
+
LLM_BENCHMARKS_TEXT = f"""
|
45 |
+
## How LibVulnWatch Works
|
46 |
+
|
47 |
+
Our assessment methodology evaluates libraries through:
|
48 |
+
1. **Static Analysis**: Code review, license parsing, and documentation examination
|
49 |
+
2. **Dynamic Analysis**: Vulnerability scanning, dependency checking, and API testing
|
50 |
+
3. **Metadata Analysis**: Repository metrics, contributor patterns, and release cadence
|
51 |
+
|
52 |
+
Each library receives a risk score (0-10) in each domain, with lower scores indicating lower risk.
|
53 |
|
54 |
+
## Reproducibility
|
55 |
+
To reproduce our assessment for a specific library:
|
56 |
+
```python
|
57 |
+
from libvulnwatch import VulnerabilityAssessor
|
58 |
|
59 |
+
# Initialize the assessor
|
60 |
+
assessor = VulnerabilityAssessor()
|
|
|
|
|
61 |
|
62 |
+
# Run assessment on a library
|
63 |
+
results = assessor.assess_library("organization/library_name")
|
64 |
+
|
65 |
+
# View detailed results
|
66 |
+
print(results.risk_scores)
|
67 |
+
print(results.detailed_findings)
|
68 |
+
```
|
69 |
"""
|
70 |
|
71 |
EVALUATION_QUEUE_TEXT = """
|
|
|
96 |
"""
|
97 |
|
98 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
99 |
+
CITATION_BUTTON_TEXT = r"""
|
100 |
+
@article{LibVulnWatch2025,
|
101 |
+
title={LibVulnWatch: Systematic Vulnerability Assessment and Leaderboard Tracking for Open-Source AI Libraries},
|
102 |
+
author={First Author and Second Author},
|
103 |
+
journal={ICML 2025 Technical AI Governance Workshop},
|
104 |
+
year={2025}
|
105 |
}
|
106 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/display/css_html_js.py
CHANGED
@@ -38,14 +38,6 @@ custom_css = """
|
|
38 |
padding: 0px;
|
39 |
}
|
40 |
|
41 |
-
.gradio-container {
|
42 |
-
max-height: fit-content;
|
43 |
-
}
|
44 |
-
|
45 |
-
.container {
|
46 |
-
height: fit-content;
|
47 |
-
}
|
48 |
-
|
49 |
/* Limit the width of the first AutoEvalColumn so that names don't expand too much */
|
50 |
#leaderboard-table td:nth-child(2),
|
51 |
#leaderboard-table th:nth-child(2) {
|
|
|
38 |
padding: 0px;
|
39 |
}
|
40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
/* Limit the width of the first AutoEvalColumn so that names don't expand too much */
|
42 |
#leaderboard-table td:nth-child(2),
|
43 |
#leaderboard-table th:nth-child(2) {
|
src/display/formatting.py
CHANGED
@@ -1,5 +1,8 @@
|
|
1 |
"""Helper functions to style our gradio elements"""
|
2 |
|
|
|
|
|
|
|
3 |
def model_hyperlink(link, model_name):
|
4 |
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
|
5 |
|
@@ -10,23 +13,21 @@ def make_clickable_model(model_name):
|
|
10 |
|
11 |
|
12 |
def make_clickable_report(report_url):
|
13 |
-
"""
|
14 |
-
return
|
|
|
15 |
|
16 |
|
17 |
def styled_error(error):
|
18 |
-
"
|
19 |
-
return f'<span style="color: red">❌ Error:</span> {error}'
|
20 |
|
21 |
|
22 |
def styled_warning(warn):
|
23 |
-
"
|
24 |
-
return f'<span style="color: orange">⚠️ Warning:</span> {warn}'
|
25 |
|
26 |
|
27 |
def styled_message(message):
|
28 |
-
"
|
29 |
-
return f'<span style="color: green">✅ Success:</span> {message}'
|
30 |
|
31 |
|
32 |
def has_no_nan_values(df, columns):
|
@@ -47,6 +48,21 @@ def make_clickable_library(library_name: str) -> str:
|
|
47 |
return f'<a href="{github_url}" target="_blank">{library_name}</a>'
|
48 |
|
49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
# Risk severity coloring for risk scores
|
51 |
def colorize_risk_score(score):
|
52 |
"""
|
|
|
1 |
"""Helper functions to style our gradio elements"""
|
2 |
|
3 |
+
import re
|
4 |
+
import os
|
5 |
+
|
6 |
def model_hyperlink(link, model_name):
|
7 |
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
|
8 |
|
|
|
13 |
|
14 |
|
15 |
def make_clickable_report(report_url):
|
16 |
+
"""Return the direct URL to the assessment report without any formatting"""
|
17 |
+
# Just return the URL string directly
|
18 |
+
return report_url
|
19 |
|
20 |
|
21 |
def styled_error(error):
|
22 |
+
return f"<p style='color: red; font-size: 20px; text-align: center;'>{error}</p>"
|
|
|
23 |
|
24 |
|
25 |
def styled_warning(warn):
|
26 |
+
return f"<p style='color: orange; font-size: 20px; text-align: center;'>{warn}</p>"
|
|
|
27 |
|
28 |
|
29 |
def styled_message(message):
|
30 |
+
return f"<p style='color: green; font-size: 20px; text-align: center;'>{message}</p>"
|
|
|
31 |
|
32 |
|
33 |
def has_no_nan_values(df, columns):
|
|
|
48 |
return f'<a href="{github_url}" target="_blank">{library_name}</a>'
|
49 |
|
50 |
|
51 |
+
def styled_message(message) -> str:
|
52 |
+
"""Format a message with a green header"""
|
53 |
+
return f'<span style="color: green">✅ Success:</span> {message}'
|
54 |
+
|
55 |
+
|
56 |
+
def styled_warning(message) -> str:
|
57 |
+
"""Format a warning message with an orange header"""
|
58 |
+
return f'<span style="color: orange">⚠️ Warning:</span> {message}'
|
59 |
+
|
60 |
+
|
61 |
+
def styled_error(message) -> str:
|
62 |
+
"""Format an error message with a red header"""
|
63 |
+
return f'<span style="color: red">❌ Error:</span> {message}'
|
64 |
+
|
65 |
+
|
66 |
# Risk severity coloring for risk scores
|
67 |
def colorize_risk_score(score):
|
68 |
"""
|
src/display/utils.py
CHANGED
@@ -1,14 +1,19 @@
|
|
1 |
-
from dataclasses import dataclass
|
2 |
from enum import Enum
|
3 |
|
|
|
|
|
4 |
from src.about import Tasks
|
5 |
-
|
|
|
|
|
6 |
|
7 |
|
8 |
# These classes are for user facing column names,
|
9 |
# to avoid having to change them all around the code
|
10 |
# when a modif is needed
|
11 |
-
|
|
|
12 |
name: str
|
13 |
type: str
|
14 |
displayed_by_default: bool
|
@@ -16,61 +21,38 @@ class ColumnContent(BaseModel):
|
|
16 |
never_hidden: bool = False
|
17 |
|
18 |
## Leaderboard columns
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
auto_eval_column_attrs = AutoEvalColumn(
|
42 |
-
library_type_symbol=ColumnContent(name="T", type="str", displayed_by_default=True, never_hidden=True),
|
43 |
-
library=ColumnContent(name="Library", type="markdown", displayed_by_default=True, never_hidden=True),
|
44 |
-
overall_risk=ColumnContent(name="Trust Score", type="number", displayed_by_default=True),
|
45 |
-
# Task columns from Tasks enum
|
46 |
-
license=ColumnContent(name="License Rating", type="number", displayed_by_default=True),
|
47 |
-
security=ColumnContent(name="Security Rating", type="number", displayed_by_default=True),
|
48 |
-
maintenance=ColumnContent(name="Maintenance Rating", type="number", displayed_by_default=True),
|
49 |
-
dependency=ColumnContent(name="Dependency Rating", type="number", displayed_by_default=True),
|
50 |
-
regulatory=ColumnContent(name="Regulatory Rating", type="number", displayed_by_default=True),
|
51 |
-
# Library information
|
52 |
-
library_type=ColumnContent(name="Type", type="str", displayed_by_default=False),
|
53 |
-
framework=ColumnContent(name="Framework", type="str", displayed_by_default=False),
|
54 |
-
version=ColumnContent(name="Version", type="str", displayed_by_default=False, hidden=True),
|
55 |
-
language=ColumnContent(name="Language", type="str", displayed_by_default=False),
|
56 |
-
license_name=ColumnContent(name="License", type="str", displayed_by_default=True),
|
57 |
-
stars=ColumnContent(name="GitHub ⭐", type="number", displayed_by_default=False),
|
58 |
-
availability=ColumnContent(name="Active Maintenance", type="bool", displayed_by_default=True),
|
59 |
-
report_url=ColumnContent(name="Report", type="markdown", displayed_by_default=True),
|
60 |
-
last_update=ColumnContent(name="Last Update", type="str", displayed_by_default=False),
|
61 |
-
verified=ColumnContent(name="Verified", type="bool", displayed_by_default=False),
|
62 |
-
)
|
63 |
-
|
64 |
|
65 |
## For the queue columns in the submission tab
|
66 |
@dataclass(frozen=True)
|
67 |
class EvalQueueColumn: # Queue column
|
68 |
-
library = ColumnContent(
|
69 |
-
version = ColumnContent(
|
70 |
-
language = ColumnContent(
|
71 |
-
framework = ColumnContent(
|
72 |
-
library_type = ColumnContent(
|
73 |
-
status = ColumnContent(
|
74 |
|
75 |
## All the library information that we might need
|
76 |
@dataclass
|
@@ -81,27 +63,27 @@ class LibraryDetails:
|
|
81 |
|
82 |
|
83 |
class LibraryType(Enum):
|
84 |
-
ML = LibraryDetails(name="
|
85 |
-
LLM = LibraryDetails(name="
|
86 |
-
AGENT = LibraryDetails(name="
|
87 |
-
VIS = LibraryDetails(name="
|
88 |
-
GENERAL = LibraryDetails(name="
|
89 |
Unknown = LibraryDetails(name="", symbol="?")
|
90 |
|
91 |
def to_str(self, separator=" "):
|
92 |
return f"{self.value.symbol}{separator}{self.value.name}"
|
93 |
|
94 |
@staticmethod
|
95 |
-
def from_str(type
|
96 |
-
if "
|
97 |
return LibraryType.ML
|
98 |
-
if "
|
99 |
return LibraryType.LLM
|
100 |
-
if "
|
101 |
return LibraryType.AGENT
|
102 |
-
if "
|
103 |
return LibraryType.VIS
|
104 |
-
if "
|
105 |
return LibraryType.GENERAL
|
106 |
return LibraryType.Unknown
|
107 |
|
@@ -119,11 +101,11 @@ class AssessmentStatus(Enum):
|
|
119 |
Disputed = LibraryDetails("Disputed")
|
120 |
|
121 |
# Column selection
|
122 |
-
COLS = [
|
123 |
-
fields = AutoEvalColumn.model_fields
|
124 |
|
125 |
-
EVAL_COLS = [
|
126 |
-
EVAL_TYPES = [
|
127 |
|
128 |
# Task columns for benchmarking - use the display column names from the Tasks enum
|
129 |
BENCHMARK_COLS = [task.value.col_name for task in Tasks]
|
|
|
|
1 |
+
from dataclasses import dataclass, make_dataclass
|
2 |
from enum import Enum
|
3 |
|
4 |
+
import pandas as pd
|
5 |
+
|
6 |
from src.about import Tasks
|
7 |
+
|
8 |
+
def fields(raw_class):
|
9 |
+
return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
|
10 |
|
11 |
|
12 |
# These classes are for user facing column names,
|
13 |
# to avoid having to change them all around the code
|
14 |
# when a modif is needed
|
15 |
+
@dataclass
|
16 |
+
class ColumnContent:
|
17 |
name: str
|
18 |
type: str
|
19 |
displayed_by_default: bool
|
|
|
21 |
never_hidden: bool = False
|
22 |
|
23 |
## Leaderboard columns
|
24 |
+
auto_eval_column_dict = []
|
25 |
+
# Init
|
26 |
+
auto_eval_column_dict.append(["library_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
|
27 |
+
auto_eval_column_dict.append(["library", ColumnContent, ColumnContent("Library", "markdown", True, never_hidden=True)])
|
28 |
+
#Scores
|
29 |
+
auto_eval_column_dict.append(["overall_risk", ColumnContent, ColumnContent("Trust Score ⬇️", "number", True)])
|
30 |
+
for task in Tasks:
|
31 |
+
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
32 |
+
# Library information
|
33 |
+
auto_eval_column_dict.append(["library_type", ColumnContent, ColumnContent("Type", "str", False)])
|
34 |
+
auto_eval_column_dict.append(["framework", ColumnContent, ColumnContent("Framework", "str", False)])
|
35 |
+
auto_eval_column_dict.append(["version", ColumnContent, ColumnContent("Version", "str", False, False)])
|
36 |
+
auto_eval_column_dict.append(["language", ColumnContent, ColumnContent("Language", "str", False)])
|
37 |
+
auto_eval_column_dict.append(["license_name", ColumnContent, ColumnContent("License", "str", True)])
|
38 |
+
auto_eval_column_dict.append(["stars", ColumnContent, ColumnContent("GitHub ⭐", "number", False)])
|
39 |
+
auto_eval_column_dict.append(["last_update", ColumnContent, ColumnContent("Last Updated", "str", False)])
|
40 |
+
auto_eval_column_dict.append(["verified", ColumnContent, ColumnContent("Independently Verified", "bool", False)])
|
41 |
+
auto_eval_column_dict.append(["availability", ColumnContent, ColumnContent("Active Maintenance", "bool", True)])
|
42 |
+
auto_eval_column_dict.append(["report_url", ColumnContent, ColumnContent("Report", "str", True)])
|
43 |
+
|
44 |
+
# We use make dataclass to dynamically fill the scores from Tasks
|
45 |
+
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
## For the queue columns in the submission tab
|
48 |
@dataclass(frozen=True)
|
49 |
class EvalQueueColumn: # Queue column
|
50 |
+
library = ColumnContent("library", "markdown", True)
|
51 |
+
version = ColumnContent("version", "str", True)
|
52 |
+
language = ColumnContent("language", "str", True)
|
53 |
+
framework = ColumnContent("framework", "str", True)
|
54 |
+
library_type = ColumnContent("library_type", "str", True)
|
55 |
+
status = ColumnContent("status", "str", True)
|
56 |
|
57 |
## All the library information that we might need
|
58 |
@dataclass
|
|
|
63 |
|
64 |
|
65 |
class LibraryType(Enum):
|
66 |
+
ML = LibraryDetails(name="machine learning", symbol="🟢")
|
67 |
+
LLM = LibraryDetails(name="llm framework", symbol="🔶")
|
68 |
+
AGENT = LibraryDetails(name="agent framework", symbol="⭕")
|
69 |
+
VIS = LibraryDetails(name="visualization", symbol="🟦")
|
70 |
+
GENERAL = LibraryDetails(name="general ai", symbol="🟣")
|
71 |
Unknown = LibraryDetails(name="", symbol="?")
|
72 |
|
73 |
def to_str(self, separator=" "):
|
74 |
return f"{self.value.symbol}{separator}{self.value.name}"
|
75 |
|
76 |
@staticmethod
|
77 |
+
def from_str(type):
|
78 |
+
if "machine learning" in type or "🟢" in type:
|
79 |
return LibraryType.ML
|
80 |
+
if "llm framework" in type or "🔶" in type:
|
81 |
return LibraryType.LLM
|
82 |
+
if "agent framework" in type or "⭕" in type:
|
83 |
return LibraryType.AGENT
|
84 |
+
if "visualization" in type or "🟦" in type:
|
85 |
return LibraryType.VIS
|
86 |
+
if "general ai" in type or "🟣" in type:
|
87 |
return LibraryType.GENERAL
|
88 |
return LibraryType.Unknown
|
89 |
|
|
|
101 |
Disputed = LibraryDetails("Disputed")
|
102 |
|
103 |
# Column selection
|
104 |
+
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
|
|
|
105 |
|
106 |
+
EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
|
107 |
+
EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
|
108 |
|
109 |
# Task columns for benchmarking - use the display column names from the Tasks enum
|
110 |
BENCHMARK_COLS = [task.value.col_name for task in Tasks]
|
111 |
+
|
src/envs.py
CHANGED
@@ -2,30 +2,21 @@ import os
|
|
2 |
|
3 |
from huggingface_hub import HfApi
|
4 |
|
5 |
-
#
|
6 |
-
|
7 |
-
|
8 |
-
return False
|
9 |
-
return True
|
10 |
-
|
11 |
-
LOCAL_MODE = is_local_mode()
|
12 |
|
13 |
# Info to change for your repository
|
14 |
# ----------------------------------
|
15 |
# Get token from environment or use None in local mode
|
16 |
TOKEN = os.environ.get("HF_TOKEN") if not LOCAL_MODE else None
|
17 |
|
18 |
-
OWNER = "
|
19 |
# ----------------------------------
|
20 |
|
21 |
-
REPO_ID = f"{OWNER}/
|
22 |
-
QUEUE_REPO =
|
23 |
-
RESULTS_REPO =
|
24 |
-
|
25 |
-
if not LOCAL_MODE:
|
26 |
-
REPO_ID = str(os.environ.get("SPACE_ID"))
|
27 |
-
QUEUE_REPO = REPO_ID
|
28 |
-
RESULTS_REPO = REPO_ID
|
29 |
|
30 |
# If you setup a cache later, just change HF_HOME
|
31 |
CACHE_PATH=os.getenv("HF_HOME", ".")
|
|
|
2 |
|
3 |
from huggingface_hub import HfApi
|
4 |
|
5 |
+
# Run in local mode (no Hugging Face connection required)
|
6 |
+
# Set to True when developing locally without HF credentials
|
7 |
+
LOCAL_MODE = True
|
|
|
|
|
|
|
|
|
8 |
|
9 |
# Info to change for your repository
|
10 |
# ----------------------------------
|
11 |
# Get token from environment or use None in local mode
|
12 |
TOKEN = os.environ.get("HF_TOKEN") if not LOCAL_MODE else None
|
13 |
|
14 |
+
OWNER = "libvulnwatch" # Change to your org - don't forget to create a results and request dataset, with the correct format!
|
15 |
# ----------------------------------
|
16 |
|
17 |
+
REPO_ID = f"{OWNER}/leaderboard"
|
18 |
+
QUEUE_REPO = f"{OWNER}/vulnerability-requests"
|
19 |
+
RESULTS_REPO = f"{OWNER}/vulnerability-assessments"
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
# If you setup a cache later, just change HF_HOME
|
22 |
CACHE_PATH=os.getenv("HF_HOME", ".")
|
src/leaderboard/read_evals.py
CHANGED
@@ -1,32 +1,28 @@
|
|
1 |
import glob
|
2 |
import json
|
|
|
3 |
import os
|
|
|
4 |
from datetime import datetime
|
5 |
-
from pydantic import BaseModel
|
6 |
-
|
7 |
-
from src.display.formatting import make_clickable_library, make_clickable_report
|
8 |
-
from src.display.utils import auto_eval_column_attrs, LibraryType, Tasks, Language
|
9 |
|
|
|
10 |
|
11 |
-
|
12 |
-
|
13 |
-
if datetime_str.endswith('Z'):
|
14 |
-
datetime_str = datetime_str[:-1] + '+00:00'
|
15 |
-
return datetime.fromisoformat(datetime_str)
|
16 |
|
17 |
|
18 |
-
|
|
|
19 |
"""Represents one full vulnerability assessment. Built from a combination of the result and request file for a given library.
|
20 |
"""
|
21 |
assessment_id: str # Unique identifier
|
22 |
-
library_name: str
|
23 |
org: str
|
24 |
repo: str
|
25 |
version: str
|
26 |
results: dict # Risk scores
|
27 |
framework: str = ""
|
28 |
language: Language = Language.Other
|
29 |
-
language_str: str = "" # Original language string to support multiple languages
|
30 |
library_type: LibraryType = LibraryType.Unknown
|
31 |
license: str = "?"
|
32 |
stars: int = 0
|
@@ -36,7 +32,7 @@ class AssessmentResult(BaseModel):
|
|
36 |
report_url: str = "" # URL to detailed assessment report
|
37 |
|
38 |
@classmethod
|
39 |
-
def init_from_json_file(
|
40 |
"""Initializes the assessment result from a JSON file"""
|
41 |
with open(json_filepath) as fp:
|
42 |
data = json.load(fp)
|
@@ -47,7 +43,7 @@ class AssessmentResult(BaseModel):
|
|
47 |
org_and_repo = library_name.split("/", 1)
|
48 |
|
49 |
if len(org_and_repo) == 1:
|
50 |
-
org =
|
51 |
repo = org_and_repo[0]
|
52 |
assessment_id = f"{repo}_{assessment.get('version', '')}"
|
53 |
else:
|
@@ -66,27 +62,19 @@ class AssessmentResult(BaseModel):
|
|
66 |
# Library metadata
|
67 |
framework = assessment.get("framework", "")
|
68 |
language_str = assessment.get("language", "Other")
|
69 |
-
|
70 |
-
# Handle multiple languages separated by /
|
71 |
-
if "/" in language_str:
|
72 |
-
language_parts = [lang.strip() for lang in language_str.split("/")]
|
73 |
-
# Store the full string but parse the first language for enum
|
74 |
-
language = next((lang for lang in Language if lang.value.name == language_parts[0]), Language.Other)
|
75 |
-
else:
|
76 |
-
language = next((lang for lang in Language if lang.value.name == language_str), Language.Other)
|
77 |
|
78 |
# Availability and verification
|
79 |
last_update = assessment.get("last_updated", "")
|
80 |
if last_update:
|
81 |
try:
|
82 |
# Format date for display
|
83 |
-
dt =
|
84 |
last_update = dt.strftime("%Y-%m-%d")
|
85 |
-
except
|
86 |
-
print(e)
|
87 |
pass
|
88 |
|
89 |
-
return
|
90 |
assessment_id=assessment_id,
|
91 |
library_name=library_name,
|
92 |
org=org,
|
@@ -95,7 +83,6 @@ class AssessmentResult(BaseModel):
|
|
95 |
results=risk_scores,
|
96 |
framework=framework,
|
97 |
language=language,
|
98 |
-
language_str=language_str,
|
99 |
license=assessment.get("license", "?"),
|
100 |
availability=assessment.get("active_maintenance", True),
|
101 |
verified=assessment.get("independently_verified", False),
|
@@ -103,6 +90,18 @@ class AssessmentResult(BaseModel):
|
|
103 |
report_url=assessment.get("report_url", ""),
|
104 |
)
|
105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
def to_dict(self):
|
107 |
"""Converts the Assessment Result to a dict compatible with our dataframe display"""
|
108 |
# Calculate Trust Score as equal-weight average
|
@@ -124,24 +123,22 @@ class AssessmentResult(BaseModel):
|
|
124 |
weight_sum += weight
|
125 |
|
126 |
trust_score = risk_sum / weight_sum if weight_sum > 0 else 10
|
127 |
-
# Round to 1 decimal place
|
128 |
-
trust_score = round(trust_score, 1)
|
129 |
|
130 |
data_dict = {
|
131 |
"assessment_id": self.assessment_id, # not a column, just a save name
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
}
|
146 |
|
147 |
# Add task-specific risk scores - map to display column names
|
@@ -150,25 +147,11 @@ class AssessmentResult(BaseModel):
|
|
150 |
benchmark_key = task_enum.benchmark # e.g., "license_validation"
|
151 |
col_name = task_enum.col_name # Use the display name, e.g., "License Risk"
|
152 |
risk_score = self.results.get(benchmark_key, 10) # Default to highest risk
|
153 |
-
|
154 |
-
data_dict[col_name] = round(risk_score, 1)
|
155 |
|
156 |
return data_dict
|
157 |
|
158 |
|
159 |
-
def update_with_request_file(self, assessment_filepath):
|
160 |
-
"""Finds the relevant request file for the current library and updates info with it"""
|
161 |
-
try:
|
162 |
-
with open(assessment_filepath, "r") as f:
|
163 |
-
request = json.load(f)["assessment"]
|
164 |
-
self.library_type = LibraryType.from_str(request.get("framework", ""))
|
165 |
-
self.stars = request.get("github_stars", 0)
|
166 |
-
except Exception as e:
|
167 |
-
print(e)
|
168 |
-
print(f"Could not find request file for {self.library_name} version {self.version}")
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
def get_request_file_for_library(requests_path, library_name, version):
|
173 |
"""Selects the correct request file for a given library. Only keeps runs tagged as FINISHED"""
|
174 |
# Try multiple naming patterns for flexibility
|
@@ -219,9 +202,8 @@ def get_raw_assessment_results(results_path: str, requests_path: str) -> list[As
|
|
219 |
|
220 |
# Sort the files by date if they have date info
|
221 |
try:
|
222 |
-
files.sort(key=lambda x:
|
223 |
-
except
|
224 |
-
print(e)
|
225 |
pass
|
226 |
|
227 |
for file in files:
|
@@ -231,7 +213,7 @@ def get_raw_assessment_results(results_path: str, requests_path: str) -> list[As
|
|
231 |
for assessment_filepath in assessment_filepaths:
|
232 |
# Creation of result
|
233 |
assessment_result = AssessmentResult.init_from_json_file(assessment_filepath)
|
234 |
-
assessment_result.update_with_request_file(
|
235 |
|
236 |
# Store results of same eval together
|
237 |
assessment_id = assessment_result.assessment_id
|
|
|
1 |
import glob
|
2 |
import json
|
3 |
+
import math
|
4 |
import os
|
5 |
+
from dataclasses import dataclass
|
6 |
from datetime import datetime
|
|
|
|
|
|
|
|
|
7 |
|
8 |
+
import numpy as np
|
9 |
|
10 |
+
from src.display.formatting import make_clickable_library, make_clickable_report
|
11 |
+
from src.display.utils import AutoEvalColumn, LibraryType, Tasks, Language, AssessmentStatus
|
|
|
|
|
|
|
12 |
|
13 |
|
14 |
+
@dataclass
|
15 |
+
class AssessmentResult:
|
16 |
"""Represents one full vulnerability assessment. Built from a combination of the result and request file for a given library.
|
17 |
"""
|
18 |
assessment_id: str # Unique identifier
|
19 |
+
library_name: str # org/repo
|
20 |
org: str
|
21 |
repo: str
|
22 |
version: str
|
23 |
results: dict # Risk scores
|
24 |
framework: str = ""
|
25 |
language: Language = Language.Other
|
|
|
26 |
library_type: LibraryType = LibraryType.Unknown
|
27 |
license: str = "?"
|
28 |
stars: int = 0
|
|
|
32 |
report_url: str = "" # URL to detailed assessment report
|
33 |
|
34 |
@classmethod
|
35 |
+
def init_from_json_file(self, json_filepath):
|
36 |
"""Initializes the assessment result from a JSON file"""
|
37 |
with open(json_filepath) as fp:
|
38 |
data = json.load(fp)
|
|
|
43 |
org_and_repo = library_name.split("/", 1)
|
44 |
|
45 |
if len(org_and_repo) == 1:
|
46 |
+
org = None
|
47 |
repo = org_and_repo[0]
|
48 |
assessment_id = f"{repo}_{assessment.get('version', '')}"
|
49 |
else:
|
|
|
62 |
# Library metadata
|
63 |
framework = assessment.get("framework", "")
|
64 |
language_str = assessment.get("language", "Other")
|
65 |
+
language = next((lang for lang in Language if lang.value.name == language_str), Language.Other)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
# Availability and verification
|
68 |
last_update = assessment.get("last_updated", "")
|
69 |
if last_update:
|
70 |
try:
|
71 |
# Format date for display
|
72 |
+
dt = datetime.fromisoformat(last_update)
|
73 |
last_update = dt.strftime("%Y-%m-%d")
|
74 |
+
except:
|
|
|
75 |
pass
|
76 |
|
77 |
+
return self(
|
78 |
assessment_id=assessment_id,
|
79 |
library_name=library_name,
|
80 |
org=org,
|
|
|
83 |
results=risk_scores,
|
84 |
framework=framework,
|
85 |
language=language,
|
|
|
86 |
license=assessment.get("license", "?"),
|
87 |
availability=assessment.get("active_maintenance", True),
|
88 |
verified=assessment.get("independently_verified", False),
|
|
|
90 |
report_url=assessment.get("report_url", ""),
|
91 |
)
|
92 |
|
93 |
+
def update_with_request_file(self, requests_path):
|
94 |
+
"""Finds the relevant request file for the current library and updates info with it"""
|
95 |
+
request_file = get_request_file_for_library(requests_path, self.library_name, self.version)
|
96 |
+
|
97 |
+
try:
|
98 |
+
with open(request_file, "r") as f:
|
99 |
+
request = json.load(f)
|
100 |
+
self.library_type = LibraryType.from_str(request.get("library_type", ""))
|
101 |
+
self.stars = request.get("stars", 0)
|
102 |
+
except Exception:
|
103 |
+
print(f"Could not find request file for {self.library_name} version {self.version}")
|
104 |
+
|
105 |
def to_dict(self):
|
106 |
"""Converts the Assessment Result to a dict compatible with our dataframe display"""
|
107 |
# Calculate Trust Score as equal-weight average
|
|
|
123 |
weight_sum += weight
|
124 |
|
125 |
trust_score = risk_sum / weight_sum if weight_sum > 0 else 10
|
|
|
|
|
126 |
|
127 |
data_dict = {
|
128 |
"assessment_id": self.assessment_id, # not a column, just a save name
|
129 |
+
AutoEvalColumn.library_type.name: self.library_type.value.name,
|
130 |
+
AutoEvalColumn.library_type_symbol.name: self.library_type.value.symbol,
|
131 |
+
AutoEvalColumn.language.name: self.language.value.name,
|
132 |
+
AutoEvalColumn.framework.name: self.framework,
|
133 |
+
AutoEvalColumn.library.name: make_clickable_library(self.library_name),
|
134 |
+
AutoEvalColumn.version.name: self.version,
|
135 |
+
AutoEvalColumn.overall_risk.name: trust_score,
|
136 |
+
AutoEvalColumn.license_name.name: self.license,
|
137 |
+
AutoEvalColumn.stars.name: self.stars,
|
138 |
+
AutoEvalColumn.last_update.name: self.last_update,
|
139 |
+
AutoEvalColumn.verified.name: self.verified,
|
140 |
+
AutoEvalColumn.availability.name: self.availability,
|
141 |
+
AutoEvalColumn.report_url.name: make_clickable_report(self.report_url),
|
142 |
}
|
143 |
|
144 |
# Add task-specific risk scores - map to display column names
|
|
|
147 |
benchmark_key = task_enum.benchmark # e.g., "license_validation"
|
148 |
col_name = task_enum.col_name # Use the display name, e.g., "License Risk"
|
149 |
risk_score = self.results.get(benchmark_key, 10) # Default to highest risk
|
150 |
+
data_dict[col_name] = risk_score
|
|
|
151 |
|
152 |
return data_dict
|
153 |
|
154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
def get_request_file_for_library(requests_path, library_name, version):
|
156 |
"""Selects the correct request file for a given library. Only keeps runs tagged as FINISHED"""
|
157 |
# Try multiple naming patterns for flexibility
|
|
|
202 |
|
203 |
# Sort the files by date if they have date info
|
204 |
try:
|
205 |
+
files.sort(key=lambda x: datetime.fromisoformat(json.loads(open(os.path.join(root, x)).read())["assessment"]["completed_time"]), reverse=True)
|
206 |
+
except:
|
|
|
207 |
pass
|
208 |
|
209 |
for file in files:
|
|
|
213 |
for assessment_filepath in assessment_filepaths:
|
214 |
# Creation of result
|
215 |
assessment_result = AssessmentResult.init_from_json_file(assessment_filepath)
|
216 |
+
assessment_result.update_with_request_file(requests_path)
|
217 |
|
218 |
# Store results of same eval together
|
219 |
assessment_id = assessment_result.assessment_id
|
src/populate.py
CHANGED
@@ -2,8 +2,9 @@
|
|
2 |
|
3 |
import pandas as pd
|
4 |
|
5 |
-
from src.display.utils import
|
6 |
from src.leaderboard.read_evals import get_raw_assessment_results
|
|
|
7 |
|
8 |
|
9 |
def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
|
@@ -33,9 +34,9 @@ def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_co
|
|
33 |
print(f"Warning: Column '{col}' missing, adding empty column")
|
34 |
all_df[col] = 10.0 # Default to highest risk
|
35 |
|
36 |
-
# Sort by Trust Score (ascending -
|
37 |
-
if
|
38 |
-
all_df = all_df.sort_values(by=[
|
39 |
|
40 |
return all_df
|
41 |
|
@@ -68,16 +69,19 @@ def get_evaluation_queue_df(eval_requests_path, eval_cols):
|
|
68 |
finished_data = []
|
69 |
running_data = []
|
70 |
pending_data = []
|
71 |
-
|
72 |
for file_path in request_files:
|
73 |
try:
|
74 |
-
with open(file_path, "r"
|
75 |
data = json.load(f)
|
76 |
|
77 |
# Extract relevant fields
|
78 |
row = {
|
79 |
"library": data.get("library", ""),
|
|
|
80 |
"language": data.get("language", ""),
|
|
|
|
|
81 |
"status": data.get("status", "UNKNOWN")
|
82 |
}
|
83 |
|
@@ -88,8 +92,6 @@ def get_evaluation_queue_df(eval_requests_path, eval_cols):
|
|
88 |
running_data.append(row)
|
89 |
elif row["status"] == "PENDING":
|
90 |
pending_data.append(row)
|
91 |
-
elif row["status"] == "REJECTED":
|
92 |
-
rejected_data.append(row)
|
93 |
except Exception as e:
|
94 |
print(f"Error reading request file {file_path}: {e}")
|
95 |
continue
|
@@ -98,11 +100,11 @@ def get_evaluation_queue_df(eval_requests_path, eval_cols):
|
|
98 |
finished_df = pd.DataFrame(finished_data, columns=eval_cols)
|
99 |
running_df = pd.DataFrame(running_data, columns=eval_cols)
|
100 |
pending_df = pd.DataFrame(pending_data, columns=eval_cols)
|
101 |
-
|
102 |
-
return finished_df, running_df, pending_df
|
103 |
|
104 |
except Exception as e:
|
105 |
print(f"Error reading evaluation queue: {e}")
|
106 |
# Return empty dataframes
|
107 |
empty_df = pd.DataFrame(columns=eval_cols)
|
108 |
-
return empty_df.copy(), empty_df.copy(), empty_df.copy()
|
|
|
2 |
|
3 |
import pandas as pd
|
4 |
|
5 |
+
from src.display.utils import AutoEvalColumn
|
6 |
from src.leaderboard.read_evals import get_raw_assessment_results
|
7 |
+
from src.about import Tasks
|
8 |
|
9 |
|
10 |
def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
|
|
|
34 |
print(f"Warning: Column '{col}' missing, adding empty column")
|
35 |
all_df[col] = 10.0 # Default to highest risk
|
36 |
|
37 |
+
# Sort by Trust Score (ascending - lower is better)
|
38 |
+
if AutoEvalColumn.overall_risk.name in all_df.columns:
|
39 |
+
all_df = all_df.sort_values(by=[AutoEvalColumn.overall_risk.name])
|
40 |
|
41 |
return all_df
|
42 |
|
|
|
69 |
finished_data = []
|
70 |
running_data = []
|
71 |
pending_data = []
|
72 |
+
|
73 |
for file_path in request_files:
|
74 |
try:
|
75 |
+
with open(file_path, "r") as f:
|
76 |
data = json.load(f)
|
77 |
|
78 |
# Extract relevant fields
|
79 |
row = {
|
80 |
"library": data.get("library", ""),
|
81 |
+
"version": data.get("version", ""),
|
82 |
"language": data.get("language", ""),
|
83 |
+
"framework": data.get("framework", ""),
|
84 |
+
"library_type": data.get("library_type", ""),
|
85 |
"status": data.get("status", "UNKNOWN")
|
86 |
}
|
87 |
|
|
|
92 |
running_data.append(row)
|
93 |
elif row["status"] == "PENDING":
|
94 |
pending_data.append(row)
|
|
|
|
|
95 |
except Exception as e:
|
96 |
print(f"Error reading request file {file_path}: {e}")
|
97 |
continue
|
|
|
100 |
finished_df = pd.DataFrame(finished_data, columns=eval_cols)
|
101 |
running_df = pd.DataFrame(running_data, columns=eval_cols)
|
102 |
pending_df = pd.DataFrame(pending_data, columns=eval_cols)
|
103 |
+
|
104 |
+
return finished_df, running_df, pending_df
|
105 |
|
106 |
except Exception as e:
|
107 |
print(f"Error reading evaluation queue: {e}")
|
108 |
# Return empty dataframes
|
109 |
empty_df = pd.DataFrame(columns=eval_cols)
|
110 |
+
return empty_df.copy(), empty_df.copy(), empty_df.copy()
|
src/submission/check_validity.py
CHANGED
@@ -1,22 +1,58 @@
|
|
1 |
import json
|
2 |
import os
|
|
|
|
|
3 |
from collections import defaultdict
|
|
|
4 |
from typing import Dict, Tuple, Any, List, Set
|
5 |
|
6 |
-
def is_repository_valid(repo_name: str) -> Tuple[bool, str, Dict[str, Any]]:
|
7 |
"""
|
8 |
Checks if a GitHub repository is valid and accessible.
|
9 |
|
10 |
Args:
|
11 |
-
repo_name: The name of the repository
|
|
|
12 |
|
13 |
Returns:
|
14 |
Tuple of (is_valid, error_message, library_info)
|
15 |
"""
|
16 |
# Basic format validation
|
17 |
-
if not repo_name:
|
18 |
-
return False, "Repository name
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
def get_library_info(repo_data: Dict[str, Any]) -> Dict[str, Any]:
|
22 |
"""
|
|
|
1 |
import json
|
2 |
import os
|
3 |
+
import re
|
4 |
+
import requests
|
5 |
from collections import defaultdict
|
6 |
+
from datetime import datetime, timedelta, timezone
|
7 |
from typing import Dict, Tuple, Any, List, Set
|
8 |
|
9 |
+
def is_repository_valid(repo_name: str, repo_url: str) -> Tuple[bool, str, Dict[str, Any]]:
|
10 |
"""
|
11 |
Checks if a GitHub repository is valid and accessible.
|
12 |
|
13 |
Args:
|
14 |
+
repo_name: The name of the repository (org/repo format)
|
15 |
+
repo_url: URL to the repository
|
16 |
|
17 |
Returns:
|
18 |
Tuple of (is_valid, error_message, library_info)
|
19 |
"""
|
20 |
# Basic format validation
|
21 |
+
if not repo_name or "/" not in repo_name:
|
22 |
+
return False, "Repository name must be in the format 'organization/repository'", {}
|
23 |
+
|
24 |
+
# Check if GitHub URL
|
25 |
+
if repo_url and "github.com" in repo_url:
|
26 |
+
# Extract org and repo from URL if provided
|
27 |
+
try:
|
28 |
+
parts = repo_url.split("github.com/")[1].split("/")
|
29 |
+
org = parts[0]
|
30 |
+
repo = parts[1].split(".")[0] if "." in parts[1] else parts[1]
|
31 |
+
url_repo_name = f"{org}/{repo}"
|
32 |
+
|
33 |
+
# Check if URL matches repo_name
|
34 |
+
if url_repo_name != repo_name:
|
35 |
+
return False, f"Repository name ({repo_name}) doesn't match the URL ({url_repo_name})", {}
|
36 |
+
except:
|
37 |
+
pass # Fall back to using repo_name
|
38 |
+
|
39 |
+
# Get repository information from GitHub API
|
40 |
+
org, repo = repo_name.split("/")
|
41 |
+
api_url = f"https://api.github.com/repos/{org}/{repo}"
|
42 |
+
|
43 |
+
try:
|
44 |
+
response = requests.get(api_url)
|
45 |
+
if response.status_code != 200:
|
46 |
+
return False, f"Repository not found or not accessible: {response.json().get('message', 'Unknown error')}", {}
|
47 |
+
|
48 |
+
# Parse repository data
|
49 |
+
repo_data = response.json()
|
50 |
+
library_info = get_library_info(repo_data)
|
51 |
+
|
52 |
+
return True, "", library_info
|
53 |
+
|
54 |
+
except Exception as e:
|
55 |
+
return False, f"Error accessing repository: {str(e)}", {}
|
56 |
|
57 |
def get_library_info(repo_data: Dict[str, Any]) -> Dict[str, Any]:
|
58 |
"""
|
src/submission/submit.py
CHANGED
@@ -1,35 +1,57 @@
|
|
1 |
import json
|
2 |
import os
|
|
|
|
|
3 |
import uuid
|
4 |
from datetime import datetime
|
5 |
from pathlib import Path
|
6 |
|
|
|
|
|
|
|
|
|
|
|
7 |
from src.display.formatting import styled_error, styled_warning, styled_message
|
8 |
-
from src.envs import API, EVAL_REQUESTS_PATH, QUEUE_REPO, LOCAL_MODE
|
9 |
-
from src.submission.check_validity import is_repository_valid
|
10 |
|
11 |
|
12 |
def add_new_eval(
|
13 |
library_name,
|
|
|
|
|
|
|
|
|
|
|
14 |
) -> str:
|
15 |
"""
|
16 |
Adds a new library to the assessment queue.
|
17 |
|
18 |
Args:
|
19 |
-
library_name: Name of the library
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
Returns:
|
22 |
A message indicating the status of the submission
|
23 |
"""
|
24 |
# Check if valid repository
|
25 |
-
is_valid, validity_message, library_info = is_repository_valid(library_name)
|
26 |
|
27 |
if not is_valid:
|
28 |
return styled_error(f"Invalid submission: {validity_message}")
|
29 |
|
|
|
|
|
|
|
|
|
|
|
30 |
# Create a unique identifier for the submission
|
31 |
uid = uuid.uuid4().hex[:6]
|
32 |
-
timestamp = datetime.now().
|
33 |
request_filename = f"{library_name.replace('/', '_')}_eval_request_{timestamp}_{uid}.json"
|
34 |
|
35 |
# Stars count and license info from library_info if available
|
@@ -39,6 +61,11 @@ def add_new_eval(
|
|
39 |
# Create the assessment request JSON
|
40 |
assessment_request = {
|
41 |
"library": library_name,
|
|
|
|
|
|
|
|
|
|
|
42 |
"license": license_name,
|
43 |
"stars": stars,
|
44 |
"status": "PENDING",
|
@@ -57,7 +84,7 @@ def add_new_eval(
|
|
57 |
|
58 |
# If in local mode, don't try to upload to HF
|
59 |
if LOCAL_MODE:
|
60 |
-
return styled_message(f"Library '{library_name}' has been added to the local assessment queue! Assessment ID: {uid}")
|
61 |
|
62 |
# Try to upload to HF if not in local mode
|
63 |
try:
|
@@ -65,12 +92,12 @@ def add_new_eval(
|
|
65 |
path = Path(request_file_path)
|
66 |
API.upload_file(
|
67 |
path_or_fileobj=path,
|
68 |
-
path_in_repo=
|
69 |
repo_id=QUEUE_REPO,
|
70 |
-
repo_type="
|
71 |
)
|
72 |
|
73 |
-
return styled_message(f"Library '{library_name}' has been added to the assessment queue! Assessment ID: {uid}")
|
74 |
|
75 |
except Exception as e:
|
76 |
return styled_warning(f"Saved locally but failed to upload to Hugging Face: {str(e)}")
|
|
|
1 |
import json
|
2 |
import os
|
3 |
+
import re
|
4 |
+
import time
|
5 |
import uuid
|
6 |
from datetime import datetime
|
7 |
from pathlib import Path
|
8 |
|
9 |
+
import huggingface_hub
|
10 |
+
import requests
|
11 |
+
from huggingface_hub import HfApi
|
12 |
+
|
13 |
+
from src.display.utils import LibraryType, Language, AssessmentStatus
|
14 |
from src.display.formatting import styled_error, styled_warning, styled_message
|
15 |
+
from src.envs import API, EVAL_REQUESTS_PATH, QUEUE_REPO, TOKEN, LOCAL_MODE
|
16 |
+
from src.submission.check_validity import is_repository_valid, get_library_info
|
17 |
|
18 |
|
19 |
def add_new_eval(
|
20 |
library_name,
|
21 |
+
library_version,
|
22 |
+
repository_url,
|
23 |
+
language,
|
24 |
+
framework,
|
25 |
+
library_type_str,
|
26 |
) -> str:
|
27 |
"""
|
28 |
Adds a new library to the assessment queue.
|
29 |
|
30 |
Args:
|
31 |
+
library_name: Name of the library (org/repo format)
|
32 |
+
library_version: Version of the library
|
33 |
+
repository_url: URL to the repository
|
34 |
+
language: Programming language
|
35 |
+
framework: Related framework/ecosystem
|
36 |
+
library_type_str: Type of AI library
|
37 |
|
38 |
Returns:
|
39 |
A message indicating the status of the submission
|
40 |
"""
|
41 |
# Check if valid repository
|
42 |
+
is_valid, validity_message, library_info = is_repository_valid(library_name, repository_url)
|
43 |
|
44 |
if not is_valid:
|
45 |
return styled_error(f"Invalid submission: {validity_message}")
|
46 |
|
47 |
+
# Parse library type
|
48 |
+
library_type = LibraryType.from_str(library_type_str)
|
49 |
+
if library_type == LibraryType.Unknown:
|
50 |
+
return styled_error("Please select a valid library type.")
|
51 |
+
|
52 |
# Create a unique identifier for the submission
|
53 |
uid = uuid.uuid4().hex[:6]
|
54 |
+
timestamp = datetime.now().isoformat()
|
55 |
request_filename = f"{library_name.replace('/', '_')}_eval_request_{timestamp}_{uid}.json"
|
56 |
|
57 |
# Stars count and license info from library_info if available
|
|
|
61 |
# Create the assessment request JSON
|
62 |
assessment_request = {
|
63 |
"library": library_name,
|
64 |
+
"version": library_version,
|
65 |
+
"repository_url": repository_url,
|
66 |
+
"language": language,
|
67 |
+
"framework": framework,
|
68 |
+
"library_type": library_type.value.name,
|
69 |
"license": license_name,
|
70 |
"stars": stars,
|
71 |
"status": "PENDING",
|
|
|
84 |
|
85 |
# If in local mode, don't try to upload to HF
|
86 |
if LOCAL_MODE:
|
87 |
+
return styled_message(f"Library '{library_name}' (version {library_version}) has been added to the local assessment queue! Assessment ID: {uid}")
|
88 |
|
89 |
# Try to upload to HF if not in local mode
|
90 |
try:
|
|
|
92 |
path = Path(request_file_path)
|
93 |
API.upload_file(
|
94 |
path_or_fileobj=path,
|
95 |
+
path_in_repo=request_filename,
|
96 |
repo_id=QUEUE_REPO,
|
97 |
+
repo_type="dataset",
|
98 |
)
|
99 |
|
100 |
+
return styled_message(f"Library '{library_name}' (version {library_version}) has been added to the assessment queue! Assessment ID: {uid}")
|
101 |
|
102 |
except Exception as e:
|
103 |
return styled_warning(f"Saved locally but failed to upload to Hugging Face: {str(e)}")
|
uv.lock
DELETED
The diff for this file is too large to render.
See raw diff
|
|