refactor: update table columns

#1
README.md CHANGED
@@ -7,7 +7,7 @@ sdk: gradio
7
  app_file: app.py
8
  pinned: true
9
  license: mit
10
- short_description: Vulnerability scores for AI libraries (ACL '25, ICML '25)
11
  sdk_version: 5.19.0
12
  ---
13
 
@@ -46,5 +46,3 @@ You'll find
46
  - the main table' columns names and properties in `src/display/utils.py`
47
  - the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
48
  - the logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`
49
-
50
- > **LibVulnWatch** was presented at the **ACL 2025 Student Research Workshop** and accepted to the **ICML 2025 Technical AI Governance workshop**. The system uncovers hidden security, licensing, maintenance, dependency and regulatory risks in popular AI libraries and publishes a public leaderboard for transparent ecosystem monitoring.
 
7
  app_file: app.py
8
  pinned: true
9
  license: mit
10
+ short_description: Duplicate this leaderboard to initialize your own!
11
  sdk_version: 5.19.0
12
  ---
13
 
 
46
  - the main table' columns names and properties in `src/display/utils.py`
47
  - the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
48
  - the logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`
 
 
app.py CHANGED
@@ -1,10 +1,10 @@
1
  import gradio as gr
2
- from gradio.components import Dataframe
3
  from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
4
  import pandas as pd
5
  from apscheduler.schedulers.background import BackgroundScheduler
 
6
  import os
7
- from gradio.themes import Soft
8
 
9
  from src.about import (
10
  CITATION_BUTTON_LABEL,
@@ -22,9 +22,12 @@ from src.display.utils import (
22
  EVAL_COLS,
23
  EVAL_TYPES,
24
  AutoEvalColumn,
25
- auto_eval_column_attrs
 
 
 
26
  )
27
- from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, REPO_ID, LOCAL_MODE
28
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
29
  from src.submission.submit import add_new_eval
30
 
@@ -48,50 +51,42 @@ def initialize_data_directories():
48
  os.makedirs(EVAL_REQUESTS_PATH, exist_ok=True)
49
  os.makedirs(EVAL_RESULTS_PATH, exist_ok=True)
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  # Initialize data
52
  initialize_data_directories()
53
 
54
  # Load data for leaderboard
55
  LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
56
 
57
- # Extract unique languages for filtering
58
- def get_unique_languages(df):
59
- """Extract all unique individual languages from the Language column"""
60
- if df.empty or auto_eval_column_attrs.language.name not in df.columns:
61
- return []
62
-
63
- all_languages = set()
64
- for value in df[auto_eval_column_attrs.language.name].unique():
65
- if isinstance(value, str):
66
- if "/" in value:
67
- languages = [lang.strip() for lang in value.split("/")]
68
- all_languages.update(languages)
69
- else:
70
- all_languages.add(value.strip())
71
-
72
- return sorted(list(all_languages))
73
-
74
- # Create a mapping for language filtering
75
- UNIQUE_LANGUAGES = get_unique_languages(LEADERBOARD_DF)
76
-
77
- # Create a special column for individual language filtering
78
- if not LEADERBOARD_DF.empty:
79
- # Create a column that contains all individual languages as a list
80
- LEADERBOARD_DF["_languages_list"] = LEADERBOARD_DF[auto_eval_column_attrs.language.name].apply(
81
- lambda x: [lang.strip() for lang in str(x).split("/")] if pd.notna(x) else []
82
- )
83
-
84
- # Create a text version of Active Maintenance for checkboxgroup filtering
85
- LEADERBOARD_DF["_maintenance_filter"] = LEADERBOARD_DF[auto_eval_column_attrs.availability.name].apply(
86
- lambda x: "Active" if x else "Inactive"
87
- )
88
-
89
  # Load queue data
90
  (
91
  finished_eval_queue_df,
92
  running_eval_queue_df,
93
  pending_eval_queue_df,
94
- rejected_eval_queue_df,
95
  ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
96
 
97
  def init_leaderboard(dataframe):
@@ -99,53 +94,40 @@ def init_leaderboard(dataframe):
99
  if dataframe is None or dataframe.empty:
100
  # Create an empty dataframe with the expected columns
101
  all_columns = COLS + [task.value.col_name for task in Tasks]
102
- empty_df = pd.DataFrame(columns=pd.Index(all_columns))
103
  print("Warning: Leaderboard DataFrame is empty. Using empty dataframe.")
104
  dataframe = empty_df
105
 
106
- # Create filter columns list with proper typing
107
- filter_columns = []
108
-
109
- # 1. Library types
110
- filter_columns.append(ColumnFilter(auto_eval_column_attrs.library_type.name, type="checkboxgroup", label="Library types"))
111
-
112
- # 2. Programming Language (checkboxgroup - OR filtering)
113
- filter_columns.append(ColumnFilter(auto_eval_column_attrs.language.name, type="checkboxgroup", label="Programming Language"))
114
-
115
- # 3. GitHub Stars
116
- filter_columns.append(ColumnFilter(
117
- auto_eval_column_attrs.stars.name,
118
- type="slider",
119
- min=0,
120
- max=50000,
121
- label="GitHub Stars",
122
- ))
123
-
124
- # 4. Maintenance Status (checkboxgroup - separate from languages)
125
- filter_columns.append(ColumnFilter("_maintenance_filter", type="checkboxgroup", label="Maintenance Status"))
126
-
127
- # Hide columns
128
- hidden_columns = [getattr(auto_eval_column_attrs, field).name for field in AutoEvalColumn.model_fields if getattr(auto_eval_column_attrs, field).hidden]
129
- hidden_columns.extend(["_languages_list", "_maintenance_filter", "_original_language"]) # Hide helper columns
130
-
131
  return Leaderboard(
132
  value=dataframe,
133
- datatype="markdown",
134
  select_columns=SelectColumns(
135
- default_selection=[getattr(auto_eval_column_attrs, field).name for field in AutoEvalColumn.model_fields if getattr(auto_eval_column_attrs, field).displayed_by_default],
136
- cant_deselect=[getattr(auto_eval_column_attrs, field).name for field in AutoEvalColumn.model_fields if getattr(auto_eval_column_attrs, field).never_hidden],
137
  label="Select Columns to Display:",
138
  ),
139
- search_columns=[auto_eval_column_attrs.library.name, auto_eval_column_attrs.license_name.name],
140
- hide_columns=hidden_columns,
141
- filter_columns=filter_columns, # type: ignore
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  bool_checkboxgroup_label="Filter libraries",
143
  interactive=False,
144
  )
145
 
146
 
147
- demo = gr.Blocks(css=custom_css, theme=Soft())
148
- # demo = gr.Blocks(css=custom_css, theme=Soft(font=["sans-serif"], font_mono=["monospace"]))
149
  with demo:
150
  gr.HTML(TITLE)
151
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
@@ -168,7 +150,7 @@ with demo:
168
  open=False,
169
  ):
170
  with gr.Row():
171
- finished_eval_table = Dataframe(
172
  value=finished_eval_queue_df,
173
  headers=EVAL_COLS,
174
  datatype=EVAL_TYPES,
@@ -179,7 +161,7 @@ with demo:
179
  open=False,
180
  ):
181
  with gr.Row():
182
- running_eval_table = Dataframe(
183
  value=running_eval_queue_df,
184
  headers=EVAL_COLS,
185
  datatype=EVAL_TYPES,
@@ -191,7 +173,7 @@ with demo:
191
  open=False,
192
  ):
193
  with gr.Row():
194
- pending_eval_table = Dataframe(
195
  value=pending_eval_queue_df,
196
  headers=EVAL_COLS,
197
  datatype=EVAL_TYPES,
@@ -202,7 +184,26 @@ with demo:
202
 
203
  with gr.Row():
204
  with gr.Column():
205
- library_name_textbox = gr.Textbox(label="Library name")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
 
207
  submit_button = gr.Button("Submit for Assessment")
208
  submission_result = gr.Markdown()
@@ -210,18 +211,23 @@ with demo:
210
  add_new_eval,
211
  [
212
  library_name_textbox,
 
 
 
 
 
213
  ],
214
  submission_result,
215
  )
216
 
217
  with gr.Row():
218
- with gr.Accordion("📙 Citation", open=True):
219
- citation_button = gr.Code(
220
  value=CITATION_BUTTON_TEXT,
221
  label=CITATION_BUTTON_LABEL,
222
- lines=14,
223
  elem_id="citation-button",
224
- language="yaml",
225
  )
226
 
227
  # Only schedule space restarts if not in local mode
 
1
  import gradio as gr
 
2
  from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
3
  import pandas as pd
4
  from apscheduler.schedulers.background import BackgroundScheduler
5
+ from huggingface_hub import snapshot_download
6
  import os
7
+ import shutil
8
 
9
  from src.about import (
10
  CITATION_BUTTON_LABEL,
 
22
  EVAL_COLS,
23
  EVAL_TYPES,
24
  AutoEvalColumn,
25
+ LibraryType,
26
+ fields,
27
+ Language,
28
+ AssessmentStatus
29
  )
30
+ from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN, LOCAL_MODE
31
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
32
  from src.submission.submit import add_new_eval
33
 
 
51
  os.makedirs(EVAL_REQUESTS_PATH, exist_ok=True)
52
  os.makedirs(EVAL_RESULTS_PATH, exist_ok=True)
53
 
54
+ if LOCAL_MODE:
55
+ print("Running in local mode, using local directories only")
56
+ return
57
+
58
+ # Try to download from HF if not in local mode
59
+ try:
60
+ print(f"Downloading request data from {QUEUE_REPO} to {EVAL_REQUESTS_PATH}")
61
+ snapshot_download(
62
+ repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset",
63
+ tqdm_class=None, etag_timeout=30, token=TOKEN
64
+ )
65
+ except Exception as e:
66
+ print(f"Failed to download request data: {e}")
67
+ print("Using local data only")
68
+
69
+ try:
70
+ print(f"Downloading result data from {RESULTS_REPO} to {EVAL_RESULTS_PATH}")
71
+ snapshot_download(
72
+ repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset",
73
+ tqdm_class=None, etag_timeout=30, token=TOKEN
74
+ )
75
+ except Exception as e:
76
+ print(f"Failed to download result data: {e}")
77
+ print("Using local data only")
78
+
79
  # Initialize data
80
  initialize_data_directories()
81
 
82
  # Load data for leaderboard
83
  LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  # Load queue data
86
  (
87
  finished_eval_queue_df,
88
  running_eval_queue_df,
89
  pending_eval_queue_df,
 
90
  ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
91
 
92
  def init_leaderboard(dataframe):
 
94
  if dataframe is None or dataframe.empty:
95
  # Create an empty dataframe with the expected columns
96
  all_columns = COLS + [task.value.col_name for task in Tasks]
97
+ empty_df = pd.DataFrame(columns=all_columns)
98
  print("Warning: Leaderboard DataFrame is empty. Using empty dataframe.")
99
  dataframe = empty_df
100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  return Leaderboard(
102
  value=dataframe,
103
+ datatype=[c.type for c in fields(AutoEvalColumn)],
104
  select_columns=SelectColumns(
105
+ default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
106
+ cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
107
  label="Select Columns to Display:",
108
  ),
109
+ search_columns=[AutoEvalColumn.library.name, AutoEvalColumn.license_name.name],
110
+ hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
111
+ filter_columns=[
112
+ ColumnFilter(AutoEvalColumn.library_type.name, type="checkboxgroup", label="Library types"),
113
+ ColumnFilter(AutoEvalColumn.language.name, type="checkboxgroup", label="Programming Language"),
114
+ ColumnFilter(
115
+ AutoEvalColumn.stars.name,
116
+ type="slider",
117
+ min=0,
118
+ max=50000,
119
+ label="GitHub Stars",
120
+ ),
121
+ ColumnFilter(
122
+ AutoEvalColumn.availability.name, type="boolean", label="Show only active libraries", default=True
123
+ ),
124
+ ],
125
  bool_checkboxgroup_label="Filter libraries",
126
  interactive=False,
127
  )
128
 
129
 
130
+ demo = gr.Blocks(css=custom_css)
 
131
  with demo:
132
  gr.HTML(TITLE)
133
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
 
150
  open=False,
151
  ):
152
  with gr.Row():
153
+ finished_eval_table = gr.components.Dataframe(
154
  value=finished_eval_queue_df,
155
  headers=EVAL_COLS,
156
  datatype=EVAL_TYPES,
 
161
  open=False,
162
  ):
163
  with gr.Row():
164
+ running_eval_table = gr.components.Dataframe(
165
  value=running_eval_queue_df,
166
  headers=EVAL_COLS,
167
  datatype=EVAL_TYPES,
 
173
  open=False,
174
  ):
175
  with gr.Row():
176
+ pending_eval_table = gr.components.Dataframe(
177
  value=pending_eval_queue_df,
178
  headers=EVAL_COLS,
179
  datatype=EVAL_TYPES,
 
184
 
185
  with gr.Row():
186
  with gr.Column():
187
+ library_name_textbox = gr.Textbox(label="Library name (org/repo format)")
188
+ library_version_textbox = gr.Textbox(label="Version", placeholder="v1.0.0")
189
+ library_type = gr.Dropdown(
190
+ choices=[t.to_str(" : ") for t in LibraryType if t != LibraryType.Unknown],
191
+ label="Library type",
192
+ multiselect=False,
193
+ value=None,
194
+ interactive=True,
195
+ )
196
+
197
+ with gr.Column():
198
+ language = gr.Dropdown(
199
+ choices=[i.value.name for i in Language if i != Language.Other],
200
+ label="Programming Language",
201
+ multiselect=False,
202
+ value="Python",
203
+ interactive=True,
204
+ )
205
+ framework = gr.Textbox(label="Framework/Ecosystem (e.g., PyTorch, React)")
206
+ repository_url = gr.Textbox(label="Repository URL")
207
 
208
  submit_button = gr.Button("Submit for Assessment")
209
  submission_result = gr.Markdown()
 
211
  add_new_eval,
212
  [
213
  library_name_textbox,
214
+ library_version_textbox,
215
+ repository_url,
216
+ language,
217
+ framework,
218
+ library_type,
219
  ],
220
  submission_result,
221
  )
222
 
223
  with gr.Row():
224
+ with gr.Accordion("📙 Citation", open=False):
225
+ citation_button = gr.Textbox(
226
  value=CITATION_BUTTON_TEXT,
227
  label=CITATION_BUTTON_LABEL,
228
+ lines=20,
229
  elem_id="citation-button",
230
+ show_copy_button=True,
231
  )
232
 
233
  # Only schedule space restarts if not in local mode
assessment-queue/pytorch_pytorch_eval_request_FINISHED_v2.1.0.json CHANGED
@@ -3,8 +3,8 @@
3
  "version": "v2.1.0",
4
  "repository_url": "https://github.com/pytorch/pytorch",
5
  "language": "Python",
6
- "framework": "ML Framework",
7
- "library_type": "ML Framework",
8
  "license": "BSD-3",
9
  "stars": 72300,
10
  "status": "FINISHED",
 
3
  "version": "v2.1.0",
4
  "repository_url": "https://github.com/pytorch/pytorch",
5
  "language": "Python",
6
+ "framework": "Machine Learning",
7
+ "library_type": "machine learning",
8
  "license": "BSD-3",
9
  "stars": 72300,
10
  "status": "FINISHED",
assessment-queue/pytorch_pytorch_eval_request_timestamp_def456.json CHANGED
@@ -3,8 +3,8 @@
3
  "version": "v2.1.0",
4
  "repository_url": "https://github.com/pytorch/pytorch",
5
  "language": "Python",
6
- "framework": "ML Framework",
7
- "library_type": "ML Framework",
8
  "license": "BSD-3",
9
  "stars": 72300,
10
  "status": "FINISHED",
 
3
  "version": "v2.1.0",
4
  "repository_url": "https://github.com/pytorch/pytorch",
5
  "language": "Python",
6
+ "framework": "Machine Learning",
7
+ "library_type": "machine learning",
8
  "license": "BSD-3",
9
  "stars": 72300,
10
  "status": "FINISHED",
assessment-results/agent_development_kit.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "assessment": {
3
- "library_name": "google/adk-python",
4
- "version": "v1.4.2",
5
  "language": "Python",
6
  "framework": "Agent Framework",
7
  "completed_time": "2024-06-07T12:00:00Z",
8
  "last_updated": "2024-06-07T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
- "report_url": "https://981526092.github.io/LibVulnWatch/google_adk-python_v1.4.2.html",
12
- "repository_url": "https://github.com/google/adk-python",
13
  "github_stars": 3800,
14
  "license": "MIT",
15
  "scores": {
 
1
  {
2
  "assessment": {
3
+ "library_name": "microsoft/agent-development-kit",
4
+ "version": "v0.2.0",
5
  "language": "Python",
6
  "framework": "Agent Framework",
7
  "completed_time": "2024-06-07T12:00:00Z",
8
  "last_updated": "2024-06-07T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
+ "report_url": "https://github.com/LibVulnWatch/reports/raw/main/agent_development_kit.html",
12
+ "repository_url": "https://github.com/microsoft/agent-development-kit",
13
  "github_stars": 3800,
14
  "license": "MIT",
15
  "scores": {
assessment-results/browser_use.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "assessment": {
3
- "library_name": "browser-use/browser-use",
4
- "version": "v0.3.2",
5
  "language": "Python",
6
  "framework": "Agent Framework",
7
  "completed_time": "2024-06-09T12:00:00Z",
8
  "last_updated": "2024-06-09T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
- "report_url": "https://981526092.github.io/LibVulnWatch/browser_use_browser-use_v0.3.2.html",
12
- "repository_url": "https://github.com/browser-use/browser-use",
13
  "github_stars": 3200,
14
  "license": "MIT",
15
  "scores": {
 
1
  {
2
  "assessment": {
3
+ "library_name": "langchain-ai/browser-use",
4
+ "version": "v0.5.1",
5
  "language": "Python",
6
  "framework": "Agent Framework",
7
  "completed_time": "2024-06-09T12:00:00Z",
8
  "last_updated": "2024-06-09T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
+ "report_url": "https://github.com/LibVulnWatch/reports/raw/main/browser_use.html",
12
+ "repository_url": "https://github.com/langchain-ai/browser-use",
13
  "github_stars": 3200,
14
  "license": "MIT",
15
  "scores": {
assessment-results/composio.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "assessment": {
3
- "library_name": "ComposableHQ/composio",
4
- "version": "v0.7.19",
5
  "language": "Python",
6
  "framework": "Agent Framework",
7
  "completed_time": "2024-06-10T12:00:00Z",
8
  "last_updated": "2024-06-10T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
- "report_url": "https://981526092.github.io/LibVulnWatch/ComposableHQ_composio_v0.7.19.html",
12
- "repository_url": "https://github.com/ComposableHQ/composio",
13
  "github_stars": 1200,
14
  "license": "MIT",
15
  "scores": {
 
1
  {
2
  "assessment": {
3
+ "library_name": "ComposableAI/composio",
4
+ "version": "v0.4.2",
5
  "language": "Python",
6
  "framework": "Agent Framework",
7
  "completed_time": "2024-06-10T12:00:00Z",
8
  "last_updated": "2024-06-10T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
+ "report_url": "https://github.com/LibVulnWatch/reports/raw/main/composio.html",
12
+ "repository_url": "https://github.com/ComposableAI/composio",
13
  "github_stars": 1200,
14
  "license": "MIT",
15
  "scores": {
assessment-results/crewai.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "assessment": {
3
- "library_name": "crewAIInc/crewAI",
4
- "version": "v0.130.0",
5
  "language": "Python",
6
  "framework": "Agent Framework",
7
  "completed_time": "2024-06-15T12:00:00Z",
8
  "last_updated": "2024-06-15T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
- "report_url": "https://981526092.github.io/LibVulnWatch/crewaiinc_crewai_v0.130.0.html",
12
- "repository_url": "https://github.com/crewAIInc/crewAI",
13
  "github_stars": 8200,
14
  "license": "MIT",
15
  "scores": {
 
1
  {
2
  "assessment": {
3
+ "library_name": "joaomdmoura/crewAI",
4
+ "version": "v0.9.4",
5
  "language": "Python",
6
  "framework": "Agent Framework",
7
  "completed_time": "2024-06-15T12:00:00Z",
8
  "last_updated": "2024-06-15T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
+ "report_url": "https://github.com/LibVulnWatch/reports/raw/main/crewai.html",
12
+ "repository_url": "https://github.com/joaomdmoura/crewAI",
13
  "github_stars": 8200,
14
  "license": "MIT",
15
  "scores": {
assessment-results/{huggingface_transformers.json → huggingface_candle.json} RENAMED
@@ -1,23 +1,23 @@
1
  {
2
  "assessment": {
3
- "library_name": "huggingface/transformers",
4
- "version": "v4.52.4",
5
- "language": "Python",
6
- "framework": "ML Framework",
7
  "completed_time": "2024-06-22T12:00:00Z",
8
  "last_updated": "2024-06-22T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
- "report_url": "https://981526092.github.io/LibVulnWatch/huggingface_transformers_v4.52.4.html",
12
- "repository_url": "https://github.com/huggingface/transformers",
13
- "github_stars": 146000,
14
  "license": "Apache-2.0",
15
  "scores": {
16
- "license_validation": 5,
17
- "security_assessment": 1,
18
- "maintenance_health": 4,
19
  "dependency_management": 1,
20
- "regulatory_compliance": 3
21
  },
22
  "details": {
23
  "license_validation": {
 
1
  {
2
  "assessment": {
3
+ "library_name": "huggingface/candle",
4
+ "version": "v0.3.2",
5
+ "language": "Rust",
6
+ "framework": "Machine Learning",
7
  "completed_time": "2024-06-22T12:00:00Z",
8
  "last_updated": "2024-06-22T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
+ "report_url": "https://github.com/LibVulnWatch/reports/raw/main/huggingface_candle.html",
12
+ "repository_url": "https://github.com/huggingface/candle",
13
+ "github_stars": 12500,
14
  "license": "Apache-2.0",
15
  "scores": {
16
+ "license_validation": 4,
17
+ "security_assessment": 2,
18
+ "maintenance_health": 2,
19
  "dependency_management": 1,
20
+ "regulatory_compliance": 2
21
  },
22
  "details": {
23
  "license_validation": {
assessment-results/jax.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "assessment": {
3
- "library_name": "jax-ml/jax",
4
  "version": "v0.4.23",
5
  "language": "Python",
6
- "framework": "ML Framework",
7
  "completed_time": "2024-06-24T12:00:00Z",
8
  "last_updated": "2024-06-24T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
- "report_url": "https://981526092.github.io/LibVulnWatch/jax-ml_jax_v0.4.23.html",
12
- "repository_url": "https://github.com/jax-ml/jax",
13
- "github_stars": 32604,
14
  "license": "Apache-2.0",
15
  "scores": {
16
  "license_validation": 5,
 
1
  {
2
  "assessment": {
3
+ "library_name": "google/jax",
4
  "version": "v0.4.23",
5
  "language": "Python",
6
+ "framework": "Machine Learning",
7
  "completed_time": "2024-06-24T12:00:00Z",
8
  "last_updated": "2024-06-24T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
+ "report_url": "https://github.com/LibVulnWatch/reports/raw/main/jax.html",
12
+ "repository_url": "https://github.com/google/jax",
13
+ "github_stars": 36000,
14
  "license": "Apache-2.0",
15
  "scores": {
16
  "license_validation": 5,
assessment-results/langchain.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "assessment": {
3
  "library_name": "langchain-ai/langchain",
4
- "version": "v0.3.66",
5
  "language": "Python",
6
  "framework": "LLM Orchestration",
7
  "completed_time": "2024-06-17T12:00:00Z",
8
  "last_updated": "2024-06-17T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
- "report_url": "https://981526092.github.io/LibVulnWatch/langchain-ai_langchain_v0.3.66.html",
12
  "repository_url": "https://github.com/langchain-ai/langchain",
13
- "github_stars": 111000,
14
  "license": "MIT",
15
  "scores": {
16
  "license_validation": 5,
 
1
  {
2
  "assessment": {
3
  "library_name": "langchain-ai/langchain",
4
+ "version": "v0.1.0",
5
  "language": "Python",
6
  "framework": "LLM Orchestration",
7
  "completed_time": "2024-06-17T12:00:00Z",
8
  "last_updated": "2024-06-17T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
+ "report_url": "https://github.com/LibVulnWatch/reports/raw/main/langchain.html",
12
  "repository_url": "https://github.com/langchain-ai/langchain",
13
+ "github_stars": 79000,
14
  "license": "MIT",
15
  "scores": {
16
  "license_validation": 5,
assessment-results/langgraph.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "assessment": {
3
  "library_name": "langchain-ai/langgraph",
4
- "version": "v2.1.0",
5
  "language": "Python",
6
  "framework": "Agent Framework",
7
  "completed_time": "2024-06-13T12:00:00Z",
8
  "last_updated": "2024-06-13T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
- "report_url": "https://981526092.github.io/LibVulnWatch/langchain-ai_langgraph_v2.1.0.html",
12
  "repository_url": "https://github.com/langchain-ai/langgraph",
13
- "github_stars": 14700,
14
  "license": "Proprietary",
15
  "scores": {
16
  "license_validation": 1,
 
1
  {
2
  "assessment": {
3
  "library_name": "langchain-ai/langgraph",
4
+ "version": "v0.0.20",
5
  "language": "Python",
6
  "framework": "Agent Framework",
7
  "completed_time": "2024-06-13T12:00:00Z",
8
  "last_updated": "2024-06-13T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
+ "report_url": "https://github.com/LibVulnWatch/reports/raw/main/langgraph.html",
12
  "repository_url": "https://github.com/langchain-ai/langgraph",
13
+ "github_stars": 4500,
14
  "license": "Proprietary",
15
  "scores": {
16
  "license_validation": 1,
assessment-results/llamaindex.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "assessment": {
3
- "library_name": "run-llama/llama_index",
4
- "version": "v0.12.43",
5
  "language": "Python",
6
  "framework": "LLM Orchestration",
7
  "completed_time": "2024-06-20T12:00:00Z",
8
  "last_updated": "2024-06-20T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
- "report_url": "https://981526092.github.io/LibVulnWatch/run-llama_llama_index_v0.12.43.html",
12
- "repository_url": "https://github.com/run-llama/llama_index",
13
- "github_stars": 42500,
14
  "license": "MIT",
15
  "scores": {
16
  "license_validation": 4,
 
1
  {
2
  "assessment": {
3
+ "library_name": "jerryjliu/llama_index",
4
+ "version": "v0.9.14",
5
  "language": "Python",
6
  "framework": "LLM Orchestration",
7
  "completed_time": "2024-06-20T12:00:00Z",
8
  "last_updated": "2024-06-20T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
+ "report_url": "https://github.com/LibVulnWatch/reports/raw/main/llamaindex.html",
12
+ "repository_url": "https://github.com/jerryjliu/llama_index",
13
+ "github_stars": 27000,
14
  "license": "MIT",
15
  "scores": {
16
  "license_validation": 4,
assessment-results/metagpt.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "assessment": {
3
- "library_name": "FoundationAgents/MetaGPT",
4
- "version": "v0.8.1",
5
  "language": "Python",
6
  "framework": "Agent Framework",
7
  "completed_time": "2024-06-14T12:00:00Z",
8
  "last_updated": "2024-06-14T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
- "report_url": "https://981526092.github.io/LibVulnWatch/foundationagents_metagpt_v0.8.1.html",
12
- "repository_url": "https://github.com/FoundationAgents/MetaGPT",
13
- "github_stars": 56700,
14
  "license": "MIT",
15
  "scores": {
16
  "license_validation": 4,
 
1
  {
2
  "assessment": {
3
+ "library_name": "geekan/MetaGPT",
4
+ "version": "v0.7.0",
5
  "language": "Python",
6
  "framework": "Agent Framework",
7
  "completed_time": "2024-06-14T12:00:00Z",
8
  "last_updated": "2024-06-14T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
+ "report_url": "https://github.com/LibVulnWatch/reports/raw/main/metagpt.html",
12
+ "repository_url": "https://github.com/geekan/MetaGPT",
13
+ "github_stars": 32500,
14
  "license": "MIT",
15
  "scores": {
16
  "license_validation": 4,
assessment-results/onnx.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "assessment": {
3
  "library_name": "onnx/onnx",
4
- "version": "v1.18.0",
5
  "language": "C++/Python",
6
- "framework": "ML Framework",
7
  "completed_time": "2024-06-22T11:00:00Z",
8
  "last_updated": "2024-06-22T11:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
- "report_url": "https://981526092.github.io/LibVulnWatch/onnx_onnx_v1.18.0.html",
12
  "repository_url": "https://github.com/onnx/onnx",
13
- "github_stars": 19100,
14
  "license": "MIT",
15
  "scores": {
16
  "license_validation": 4,
 
1
  {
2
  "assessment": {
3
  "library_name": "onnx/onnx",
4
+ "version": "v1.15.0",
5
  "language": "C++/Python",
6
+ "framework": "Machine Learning",
7
  "completed_time": "2024-06-22T11:00:00Z",
8
  "last_updated": "2024-06-22T11:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
+ "report_url": "https://github.com/LibVulnWatch/reports/raw/main/onnx.html",
12
  "repository_url": "https://github.com/onnx/onnx",
13
+ "github_stars": 16200,
14
  "license": "MIT",
15
  "scores": {
16
  "license_validation": 4,
assessment-results/pydantic_ai.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "assessment": {
3
  "library_name": "pydantic/pydantic-ai",
4
- "version": "v0.3.2",
5
  "language": "Python",
6
  "framework": "Agent Framework",
7
  "completed_time": "2024-06-08T12:00:00Z",
8
  "last_updated": "2024-06-08T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
- "report_url": "https://981526092.github.io/LibVulnWatch/pydantic_pydantic-ai_v0.3.2.html",
12
  "repository_url": "https://github.com/pydantic/pydantic-ai",
13
- "github_stars": 10400,
14
  "license": "MIT",
15
  "scores": {
16
  "license_validation": 5,
 
1
  {
2
  "assessment": {
3
  "library_name": "pydantic/pydantic-ai",
4
+ "version": "v0.7.0",
5
  "language": "Python",
6
  "framework": "Agent Framework",
7
  "completed_time": "2024-06-08T12:00:00Z",
8
  "last_updated": "2024-06-08T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
+ "report_url": "https://github.com/LibVulnWatch/reports/raw/main/pydantic_ai.html",
12
  "repository_url": "https://github.com/pydantic/pydantic-ai",
13
+ "github_stars": 5800,
14
  "license": "MIT",
15
  "scores": {
16
  "license_validation": 5,
assessment-results/pytorch.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "assessment": {
3
  "library_name": "pytorch/pytorch",
4
- "version": "v2.7.1",
5
  "language": "C++/Python",
6
- "framework": "ML Framework",
7
  "completed_time": "2024-06-25T12:00:00Z",
8
  "last_updated": "2024-06-25T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
- "report_url": "https://981526092.github.io/LibVulnWatch/pytorch_pytorch_v2.7.1.html",
12
  "repository_url": "https://github.com/pytorch/pytorch",
13
- "github_stars": 91000,
14
  "license": "BSD-3-Clause",
15
  "scores": {
16
  "license_validation": 5,
 
1
  {
2
  "assessment": {
3
  "library_name": "pytorch/pytorch",
4
+ "version": "v2.2.1",
5
  "language": "C++/Python",
6
+ "framework": "Machine Learning",
7
  "completed_time": "2024-06-25T12:00:00Z",
8
  "last_updated": "2024-06-25T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
+ "report_url": "https://github.com/LibVulnWatch/reports/raw/main/pytorch.html",
12
  "repository_url": "https://github.com/pytorch/pytorch",
13
+ "github_stars": 74500,
14
  "license": "BSD-3-Clause",
15
  "scores": {
16
  "license_validation": 5,
assessment-results/sglang.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "assessment": {
3
  "library_name": "sgl-project/sglang",
4
- "version": "v0.4.7",
5
  "language": "Python/C++",
6
  "framework": "LLM Inference",
7
  "completed_time": "2024-06-19T12:00:00Z",
8
  "last_updated": "2024-06-19T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
- "report_url": "https://981526092.github.io/LibVulnWatch/sgl-project_sglang_v0.4.7.html",
12
  "repository_url": "https://github.com/sgl-project/sglang",
13
- "github_stars": 15400,
14
  "license": "Apache-2.0",
15
  "scores": {
16
  "license_validation": 4,
 
1
  {
2
  "assessment": {
3
  "library_name": "sgl-project/sglang",
4
+ "version": "v0.1.8",
5
  "language": "Python/C++",
6
  "framework": "LLM Inference",
7
  "completed_time": "2024-06-19T12:00:00Z",
8
  "last_updated": "2024-06-19T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
+ "report_url": "https://github.com/LibVulnWatch/reports/raw/main/sglang.html",
12
  "repository_url": "https://github.com/sgl-project/sglang",
13
+ "github_stars": 4800,
14
  "license": "Apache-2.0",
15
  "scores": {
16
  "license_validation": 4,
assessment-results/smolagents.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "assessment": {
3
- "library_name": "huggingface/smolagents",
4
- "version": "v1.19.0",
5
  "language": "Python",
6
  "framework": "Agent Framework",
7
  "completed_time": "2024-06-12T12:00:00Z",
8
  "last_updated": "2024-06-12T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
- "report_url": "https://981526092.github.io/LibVulnWatch/huggingface_smolagents_v1.19.0.html",
12
- "repository_url": "https://github.com/huggingface/smolagents",
13
- "github_stars": 20500,
14
  "license": "MIT",
15
  "scores": {
16
  "license_validation": 4,
 
1
  {
2
  "assessment": {
3
+ "library_name": "tinygrad/SmolAgents",
4
+ "version": "v0.2.1",
5
  "language": "Python",
6
  "framework": "Agent Framework",
7
  "completed_time": "2024-06-12T12:00:00Z",
8
  "last_updated": "2024-06-12T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
+ "report_url": "https://github.com/LibVulnWatch/reports/raw/main/smolagents.html",
12
+ "repository_url": "https://github.com/tinygrad/SmolAgents",
13
+ "github_stars": 2800,
14
  "license": "MIT",
15
  "scores": {
16
  "license_validation": 4,
assessment-results/stagehand.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "assessment": {
3
- "library_name": "browserbase/stagehand",
4
- "version": "v2.3.1",
5
  "language": "Python",
6
  "framework": "Agent Framework",
7
  "completed_time": "2024-06-11T12:00:00Z",
8
  "last_updated": "2024-06-11T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
- "report_url": "https://981526092.github.io/LibVulnWatch/browserbase_stagehand_v2.3.1.html",
12
- "repository_url": "https://github.com/browserbase/stagehand",
13
- "github_stars": 12800,
14
  "license": "Apache-2.0 with Commons Clause",
15
  "scores": {
16
  "license_validation": 3,
 
1
  {
2
  "assessment": {
3
+ "library_name": "langchain-ai/stagehand",
4
+ "version": "v0.0.12",
5
  "language": "Python",
6
  "framework": "Agent Framework",
7
  "completed_time": "2024-06-11T12:00:00Z",
8
  "last_updated": "2024-06-11T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
+ "report_url": "https://github.com/LibVulnWatch/reports/raw/main/stagehand.html",
12
+ "repository_url": "https://github.com/langchain-ai/stagehand",
13
+ "github_stars": 1500,
14
  "license": "Apache-2.0 with Commons Clause",
15
  "scores": {
16
  "license_validation": 3,
assessment-results/tensorflow.json CHANGED
@@ -1,22 +1,22 @@
1
  {
2
  "assessment": {
3
  "library_name": "tensorflow/tensorflow",
4
- "version": "v2.19.0",
5
  "language": "C++/Python",
6
- "framework": "ML Framework",
7
  "completed_time": "2024-06-23T12:00:00Z",
8
  "last_updated": "2024-06-23T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
- "report_url": "https://981526092.github.io/LibVulnWatch/tensorflow_tensorflow_v2.19.0.html",
12
  "repository_url": "https://github.com/tensorflow/tensorflow",
13
- "github_stars": 190000,
14
  "license": "Apache-2.0",
15
  "scores": {
16
  "license_validation": 5,
17
  "security_assessment": 1,
18
- "maintenance_health": 3,
19
- "dependency_management": 1,
20
  "regulatory_compliance": 3
21
  },
22
  "details": {
 
1
  {
2
  "assessment": {
3
  "library_name": "tensorflow/tensorflow",
4
+ "version": "v2.15.0",
5
  "language": "C++/Python",
6
+ "framework": "Machine Learning",
7
  "completed_time": "2024-06-23T12:00:00Z",
8
  "last_updated": "2024-06-23T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
+ "report_url": "https://github.com/LibVulnWatch/reports/raw/main/tensorflow.html",
12
  "repository_url": "https://github.com/tensorflow/tensorflow",
13
+ "github_stars": 182000,
14
  "license": "Apache-2.0",
15
  "scores": {
16
  "license_validation": 5,
17
  "security_assessment": 1,
18
+ "maintenance_health": 3,
19
+ "dependency_management": 1,
20
  "regulatory_compliance": 3
21
  },
22
  "details": {
assessment-results/tensorrt.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "assessment": {
3
  "library_name": "nvidia/TensorRT",
4
- "version": "v10.12.0",
5
  "language": "C++/Python",
6
- "framework": "ML Framework Inference",
7
  "completed_time": "2024-06-21T12:00:00Z",
8
  "last_updated": "2024-06-21T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
- "report_url": "https://981526092.github.io/LibVulnWatch/nvidia_tensorrt_v10.12.0.html",
12
  "repository_url": "https://github.com/NVIDIA/TensorRT",
13
- "github_stars": 11700,
14
  "license": "Proprietary with Open Components",
15
  "scores": {
16
  "license_validation": 3,
 
1
  {
2
  "assessment": {
3
  "library_name": "nvidia/TensorRT",
4
+ "version": "v9.1.0",
5
  "language": "C++/Python",
6
+ "framework": "Machine Learning Inference",
7
  "completed_time": "2024-06-21T12:00:00Z",
8
  "last_updated": "2024-06-21T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
+ "report_url": "https://github.com/LibVulnWatch/reports/raw/main/tensorrt.html",
12
  "repository_url": "https://github.com/NVIDIA/TensorRT",
13
+ "github_stars": 8500,
14
  "license": "Proprietary with Open Components",
15
  "scores": {
16
  "license_validation": 3,
assessment-results/text_generation_inference.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "assessment": {
3
  "library_name": "huggingface/text-generation-inference",
4
- "version": "v3.3.4",
5
  "language": "Rust/Python",
6
  "framework": "LLM Inference",
7
  "completed_time": "2024-06-16T12:00:00Z",
8
  "last_updated": "2024-06-16T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
- "report_url": "https://981526092.github.io/LibVulnWatch/huggingface_text-generation-inference_v3.3.4.html",
12
  "repository_url": "https://github.com/huggingface/text-generation-inference",
13
- "github_stars": 10200,
14
  "license": "Apache-2.0",
15
  "scores": {
16
  "license_validation": 3,
 
1
  {
2
  "assessment": {
3
  "library_name": "huggingface/text-generation-inference",
4
+ "version": "v1.1.0",
5
  "language": "Rust/Python",
6
  "framework": "LLM Inference",
7
  "completed_time": "2024-06-16T12:00:00Z",
8
  "last_updated": "2024-06-16T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
+ "report_url": "https://github.com/LibVulnWatch/reports/raw/main/text_generation_inference.html",
12
  "repository_url": "https://github.com/huggingface/text-generation-inference",
13
+ "github_stars": 5600,
14
  "license": "Apache-2.0",
15
  "scores": {
16
  "license_validation": 3,
assessment-results/vllm.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "assessment": {
3
  "library_name": "vllm-project/vllm",
4
- "version": "v0.9.1",
5
  "language": "Python/CUDA",
6
  "framework": "LLM Inference",
7
  "completed_time": "2024-06-18T12:00:00Z",
8
  "last_updated": "2024-06-18T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
- "report_url": "https://981526092.github.io/LibVulnWatch/vllm-project_vllm_v0.9.1.html",
12
  "repository_url": "https://github.com/vllm-project/vllm",
13
- "github_stars": 50600,
14
  "license": "Apache-2.0",
15
  "scores": {
16
  "license_validation": 4,
 
1
  {
2
  "assessment": {
3
  "library_name": "vllm-project/vllm",
4
+ "version": "v0.3.0",
5
  "language": "Python/CUDA",
6
  "framework": "LLM Inference",
7
  "completed_time": "2024-06-18T12:00:00Z",
8
  "last_updated": "2024-06-18T12:00:00Z",
9
  "active_maintenance": true,
10
  "independently_verified": true,
11
+ "report_url": "https://github.com/LibVulnWatch/reports/raw/main/vllm.html",
12
  "repository_url": "https://github.com/vllm-project/vllm",
13
+ "github_stars": 12800,
14
  "license": "Apache-2.0",
15
  "scores": {
16
  "license_validation": 4,
pyproject.toml CHANGED
@@ -1,33 +1,9 @@
1
- [project]
2
- name = "libvulnwatchleaderboard"
3
- version = "0.1.0"
4
- description = "A vulnerability assessment leaderboard for libraries"
5
- requires-python = ">=3.8"
6
- dependencies = [
7
- "APScheduler",
8
- "black",
9
- "datasets",
10
- "gradio",
11
- "gradio[oauth]",
12
- "gradio_leaderboard==0.0.13",
13
- "gradio_client",
14
- "huggingface-hub>=0.18.0",
15
- "matplotlib",
16
- "numpy",
17
- "pandas",
18
- "python-dateutil",
19
- "tqdm",
20
- "transformers",
21
- "tokenizers>=0.15.0",
22
- "sentencepiece",
23
- ]
24
-
25
  [tool.ruff]
26
  # Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
27
  select = ["E", "F"]
28
  ignore = ["E501"] # line too long (black is taking care of this)
29
  line-length = 119
30
- fixable = ["A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W", "ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TID", "TRY", "UP", "YTT"]
31
 
32
  [tool.isort]
33
  profile = "black"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  [tool.ruff]
2
  # Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
3
  select = ["E", "F"]
4
  ignore = ["E501"] # line too long (black is taking care of this)
5
  line-length = 119
6
+ fixable = ["A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W", "ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP", "YTT"]
7
 
8
  [tool.isort]
9
  profile = "black"
src/about.py CHANGED
@@ -12,11 +12,11 @@ class Task:
12
  # ---------------------------------------------------
13
  class Tasks(Enum):
14
  # Risk domains from LibVulnWatch paper
15
- license = Task("license_validation", "score", "License Rating")
16
- security = Task("security_assessment", "score", "Security Rating")
17
- maintenance = Task("maintenance_health", "score", "Maintenance Rating")
18
- dependency = Task("dependency_management", "score", "Dependency Rating")
19
- regulatory = Task("regulatory_compliance", "score", "Regulatory Rating")
20
 
21
  NUM_FEWSHOT = 0 # Not relevant for vulnerability assessment
22
  # ---------------------------------------------------
@@ -28,32 +28,44 @@ TITLE = """<h1 align="center" id="space-title">LibVulnWatch: Vulnerability Asses
28
 
29
  # What does your leaderboard evaluate?
30
  INTRODUCTION_TEXT = """
31
- ## LibVulnWatch Continuous, Multi-Domain Risk Scoring for AI Libraries
32
 
33
- _As presented at the **ACL 2025 Student Research Workshop** and the **ICML 2025 Technical AI Governance (TAIG) workshop**_, LibVulnWatch provides an evidence-based, end-to-end pipeline that uncovers **hidden vulnerabilities** in open-source AI libraries across five governance-aligned domains:
 
 
 
 
 
34
 
35
- **License Validation** compatibility, provenance, obligations
36
- • **Security Assessment** – CVEs, patch latency, exploit primitives
37
- • **Maintenance Health** – bus-factor, release cadence, contributor diversity
38
- • **Dependency Management** – transitive risk, SBOM completeness
39
- • **Regulatory Compliance** – privacy/export controls, policy documentation
40
-
41
- In the paper we apply the framework to **20 popular libraries**, achieving **88 % coverage of OpenSSF Scorecard checks** and surfacing **up to 19 previously-unreported risks per library**.
42
- Lower scores indicate lower risk, and the **Trust Score** is the equal-weight average of the five domains.
43
  """
44
 
45
  # Which evaluations are you running? how can people reproduce what you have?
46
- LLM_BENCHMARKS_TEXT = """
47
- ## Methodology at a Glance
 
 
 
 
 
 
 
48
 
49
- LibVulnWatch orchestrates a **graph of specialised agents** powered by large language models. Each agent contributes one evidence layer and writes structured findings to a shared memory:
 
 
 
50
 
51
- 1️⃣ **Static agents** – licence parsing, secret scanning, call-graph reachability
52
- 2️⃣ **Dynamic agents** – fuzzing harnesses, dependency-confusion probes, CVE replay
53
- 3️⃣ **Metadata agents** – GitHub mining, release-cadence modelling, community health
54
- 4️⃣ **Policy agents** – mapping evidence to NIST SSDF, EU AI Act, and related frameworks
55
 
56
- The aggregator agent converts raw findings into 0–10 scores per domain, producing a reproducible JSON result that is **88 % compatible with OpenSSF Scorecard checks**. All artefacts (SBOMs, logs, annotated evidence) are archived and linked in the public report.
 
 
 
 
 
 
57
  """
58
 
59
  EVALUATION_QUEUE_TEXT = """
@@ -84,18 +96,11 @@ If your library shows as "FAILED" in the assessment queue, check that:
84
  """
85
 
86
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
87
- CITATION_BUTTON_TEXT = r"""@inproceedings{wu2025libvulnwatch,
88
- title={LibVulnWatch: A Deep Assessment Agent System and Leaderboard for Uncovering Hidden Vulnerabilities in Open-Source {AI} Libraries},
89
- author={Zekun Wu and Seonglae Cho and Umar Mohammed and CRISTIAN ENRIQUE MUNOZ VILLALOBOS and Kleyton Da Costa and Xin Guan and Theo King and Ze Wang and Emre Kazim and Adriano Koshiyama},
90
- booktitle={ACL 2025 Student Research Workshop},
91
- year={2025},
92
- url={https://openreview.net/forum?id=yQzYEAL0BT}
93
  }
94
-
95
- @inproceedings{anonymous2025libvulnwatch,
96
- title={LibVulnWatch: A Deep Assessment Agent System and Leaderboard for Uncovering Hidden Vulnerabilities in Open-Source {AI} Libraries},
97
- author={Zekun Wu and Seonglae Cho and Umar Mohammed and CRISTIAN ENRIQUE MUNOZ VILLALOBOS and Kleyton Da Costa and Xin Guan and Theo King and Ze Wang and Emre Kazim and Adriano Koshiyama},
98
- booktitle={ICML Workshop on Technical AI Governance (TAIG)},
99
- year={2025},
100
- url={https://openreview.net/forum?id=MHhrr8QHgR}
101
- }"""
 
12
  # ---------------------------------------------------
13
  class Tasks(Enum):
14
  # Risk domains from LibVulnWatch paper
15
+ license = Task("license_validation", "score", "License Risk")
16
+ security = Task("security_assessment", "score", "Security Risk")
17
+ maintenance = Task("maintenance_health", "score", "Maintenance Risk")
18
+ dependency = Task("dependency_management", "score", "Dependency Risk")
19
+ regulatory = Task("regulatory_compliance", "score", "Regulatory Risk")
20
 
21
  NUM_FEWSHOT = 0 # Not relevant for vulnerability assessment
22
  # ---------------------------------------------------
 
28
 
29
  # What does your leaderboard evaluate?
30
  INTRODUCTION_TEXT = """
31
+ ## Systematic Vulnerability Assessment and Leaderboard Tracking for Open-Source AI Libraries
32
 
33
+ This leaderboard provides continuous vulnerability assessment for open-source AI libraries across five critical risk domains:
34
+ - **License Validation**: Legal risks based on license type, compatibility, and requirements
35
+ - **Security Assessment**: Vulnerability severity and patch responsiveness
36
+ - **Maintenance Health**: Sustainability and governance practices
37
+ - **Dependency Management**: Vulnerability inheritance and supply chain security
38
+ - **Regulatory Compliance**: Compliance readiness for various frameworks
39
 
40
+ Lower scores indicate fewer vulnerabilities and lower risk. The Trust Score is an equal-weighted average of all five domains, providing a balanced assessment of overall library trustworthiness.
 
 
 
 
 
 
 
41
  """
42
 
43
  # Which evaluations are you running? how can people reproduce what you have?
44
+ LLM_BENCHMARKS_TEXT = f"""
45
+ ## How LibVulnWatch Works
46
+
47
+ Our assessment methodology evaluates libraries through:
48
+ 1. **Static Analysis**: Code review, license parsing, and documentation examination
49
+ 2. **Dynamic Analysis**: Vulnerability scanning, dependency checking, and API testing
50
+ 3. **Metadata Analysis**: Repository metrics, contributor patterns, and release cadence
51
+
52
+ Each library receives a risk score (0-10) in each domain, with lower scores indicating lower risk.
53
 
54
+ ## Reproducibility
55
+ To reproduce our assessment for a specific library:
56
+ ```python
57
+ from libvulnwatch import VulnerabilityAssessor
58
 
59
+ # Initialize the assessor
60
+ assessor = VulnerabilityAssessor()
 
 
61
 
62
+ # Run assessment on a library
63
+ results = assessor.assess_library("organization/library_name")
64
+
65
+ # View detailed results
66
+ print(results.risk_scores)
67
+ print(results.detailed_findings)
68
+ ```
69
  """
70
 
71
  EVALUATION_QUEUE_TEXT = """
 
96
  """
97
 
98
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
99
+ CITATION_BUTTON_TEXT = r"""
100
+ @article{LibVulnWatch2025,
101
+ title={LibVulnWatch: Systematic Vulnerability Assessment and Leaderboard Tracking for Open-Source AI Libraries},
102
+ author={First Author and Second Author},
103
+ journal={ICML 2025 Technical AI Governance Workshop},
104
+ year={2025}
105
  }
106
+ """
 
 
 
 
 
 
 
src/display/css_html_js.py CHANGED
@@ -38,14 +38,6 @@ custom_css = """
38
  padding: 0px;
39
  }
40
 
41
- .gradio-container {
42
- max-height: fit-content;
43
- }
44
-
45
- .container {
46
- height: fit-content;
47
- }
48
-
49
  /* Limit the width of the first AutoEvalColumn so that names don't expand too much */
50
  #leaderboard-table td:nth-child(2),
51
  #leaderboard-table th:nth-child(2) {
 
38
  padding: 0px;
39
  }
40
 
 
 
 
 
 
 
 
 
41
  /* Limit the width of the first AutoEvalColumn so that names don't expand too much */
42
  #leaderboard-table td:nth-child(2),
43
  #leaderboard-table th:nth-child(2) {
src/display/formatting.py CHANGED
@@ -1,5 +1,8 @@
1
  """Helper functions to style our gradio elements"""
2
 
 
 
 
3
  def model_hyperlink(link, model_name):
4
  return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
5
 
@@ -10,23 +13,21 @@ def make_clickable_model(model_name):
10
 
11
 
12
  def make_clickable_report(report_url):
13
- """Create a clickable HTML link for assessment reports"""
14
- return f'<a href="{report_url}" target="_blank">View Report</a>'
 
15
 
16
 
17
  def styled_error(error):
18
- """Format an error message with a red header"""
19
- return f'<span style="color: red">❌ Error:</span> {error}'
20
 
21
 
22
  def styled_warning(warn):
23
- """Format a warning message with an orange header"""
24
- return f'<span style="color: orange">⚠️ Warning:</span> {warn}'
25
 
26
 
27
  def styled_message(message):
28
- """Format a message with a green header"""
29
- return f'<span style="color: green">✅ Success:</span> {message}'
30
 
31
 
32
  def has_no_nan_values(df, columns):
@@ -47,6 +48,21 @@ def make_clickable_library(library_name: str) -> str:
47
  return f'<a href="{github_url}" target="_blank">{library_name}</a>'
48
 
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  # Risk severity coloring for risk scores
51
  def colorize_risk_score(score):
52
  """
 
1
  """Helper functions to style our gradio elements"""
2
 
3
+ import re
4
+ import os
5
+
6
  def model_hyperlink(link, model_name):
7
  return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
8
 
 
13
 
14
 
15
  def make_clickable_report(report_url):
16
+ """Return the direct URL to the assessment report without any formatting"""
17
+ # Just return the URL string directly
18
+ return report_url
19
 
20
 
21
  def styled_error(error):
22
+ return f"<p style='color: red; font-size: 20px; text-align: center;'>{error}</p>"
 
23
 
24
 
25
  def styled_warning(warn):
26
+ return f"<p style='color: orange; font-size: 20px; text-align: center;'>{warn}</p>"
 
27
 
28
 
29
  def styled_message(message):
30
+ return f"<p style='color: green; font-size: 20px; text-align: center;'>{message}</p>"
 
31
 
32
 
33
  def has_no_nan_values(df, columns):
 
48
  return f'<a href="{github_url}" target="_blank">{library_name}</a>'
49
 
50
 
51
+ def styled_message(message) -> str:
52
+ """Format a message with a green header"""
53
+ return f'<span style="color: green">✅ Success:</span> {message}'
54
+
55
+
56
+ def styled_warning(message) -> str:
57
+ """Format a warning message with an orange header"""
58
+ return f'<span style="color: orange">⚠️ Warning:</span> {message}'
59
+
60
+
61
+ def styled_error(message) -> str:
62
+ """Format an error message with a red header"""
63
+ return f'<span style="color: red">❌ Error:</span> {message}'
64
+
65
+
66
  # Risk severity coloring for risk scores
67
  def colorize_risk_score(score):
68
  """
src/display/utils.py CHANGED
@@ -1,14 +1,19 @@
1
- from dataclasses import dataclass
2
  from enum import Enum
3
 
 
 
4
  from src.about import Tasks
5
- from pydantic import BaseModel
 
 
6
 
7
 
8
  # These classes are for user facing column names,
9
  # to avoid having to change them all around the code
10
  # when a modif is needed
11
- class ColumnContent(BaseModel):
 
12
  name: str
13
  type: str
14
  displayed_by_default: bool
@@ -16,61 +21,38 @@ class ColumnContent(BaseModel):
16
  never_hidden: bool = False
17
 
18
  ## Leaderboard columns
19
- class AutoEvalColumn(BaseModel):
20
- library_type_symbol: ColumnContent
21
- library: ColumnContent
22
- overall_risk: ColumnContent
23
- # Task columns
24
- license: ColumnContent
25
- security: ColumnContent
26
- maintenance: ColumnContent
27
- dependency: ColumnContent
28
- regulatory: ColumnContent
29
- # Library information
30
- library_type: ColumnContent
31
- framework: ColumnContent
32
- version: ColumnContent
33
- language: ColumnContent
34
- license_name: ColumnContent
35
- stars: ColumnContent
36
- availability: ColumnContent
37
- report_url: ColumnContent
38
- last_update: ColumnContent
39
- verified: ColumnContent
40
-
41
- auto_eval_column_attrs = AutoEvalColumn(
42
- library_type_symbol=ColumnContent(name="T", type="str", displayed_by_default=True, never_hidden=True),
43
- library=ColumnContent(name="Library", type="markdown", displayed_by_default=True, never_hidden=True),
44
- overall_risk=ColumnContent(name="Trust Score", type="number", displayed_by_default=True),
45
- # Task columns from Tasks enum
46
- license=ColumnContent(name="License Rating", type="number", displayed_by_default=True),
47
- security=ColumnContent(name="Security Rating", type="number", displayed_by_default=True),
48
- maintenance=ColumnContent(name="Maintenance Rating", type="number", displayed_by_default=True),
49
- dependency=ColumnContent(name="Dependency Rating", type="number", displayed_by_default=True),
50
- regulatory=ColumnContent(name="Regulatory Rating", type="number", displayed_by_default=True),
51
- # Library information
52
- library_type=ColumnContent(name="Type", type="str", displayed_by_default=False),
53
- framework=ColumnContent(name="Framework", type="str", displayed_by_default=False),
54
- version=ColumnContent(name="Version", type="str", displayed_by_default=False, hidden=True),
55
- language=ColumnContent(name="Language", type="str", displayed_by_default=False),
56
- license_name=ColumnContent(name="License", type="str", displayed_by_default=True),
57
- stars=ColumnContent(name="GitHub ⭐", type="number", displayed_by_default=False),
58
- availability=ColumnContent(name="Active Maintenance", type="bool", displayed_by_default=True),
59
- report_url=ColumnContent(name="Report", type="markdown", displayed_by_default=True),
60
- last_update=ColumnContent(name="Last Update", type="str", displayed_by_default=False),
61
- verified=ColumnContent(name="Verified", type="bool", displayed_by_default=False),
62
- )
63
-
64
 
65
  ## For the queue columns in the submission tab
66
  @dataclass(frozen=True)
67
  class EvalQueueColumn: # Queue column
68
- library = ColumnContent(name="library", type="markdown", displayed_by_default=True)
69
- version = ColumnContent(name="version", type="str", displayed_by_default=True)
70
- language = ColumnContent(name="language", type="str", displayed_by_default=True)
71
- framework = ColumnContent(name="framework", type="str", displayed_by_default=True)
72
- library_type = ColumnContent(name="library_type", type="str", displayed_by_default=True)
73
- status = ColumnContent(name="status", type="str", displayed_by_default=True)
74
 
75
  ## All the library information that we might need
76
  @dataclass
@@ -81,27 +63,27 @@ class LibraryDetails:
81
 
82
 
83
  class LibraryType(Enum):
84
- ML = LibraryDetails(name="ML Framework", symbol="🟢")
85
- LLM = LibraryDetails(name="LLM Framework", symbol="🔶")
86
- AGENT = LibraryDetails(name="Agent Framework", symbol="⭕")
87
- VIS = LibraryDetails(name="LLM Inference", symbol="🟦")
88
- GENERAL = LibraryDetails(name="LLM Orchestration", symbol="🟣")
89
  Unknown = LibraryDetails(name="", symbol="?")
90
 
91
  def to_str(self, separator=" "):
92
  return f"{self.value.symbol}{separator}{self.value.name}"
93
 
94
  @staticmethod
95
- def from_str(type: str) -> "LibraryType":
96
- if "ML Framework" in type or "🟢" in type:
97
  return LibraryType.ML
98
- if "LLM Framework" in type or "🔶" in type:
99
  return LibraryType.LLM
100
- if "Agent Framework" in type or "⭕" in type:
101
  return LibraryType.AGENT
102
- if "LLM Inference" in type or "🟦" in type:
103
  return LibraryType.VIS
104
- if "LLM Orchestration" in type or "🟣" in type:
105
  return LibraryType.GENERAL
106
  return LibraryType.Unknown
107
 
@@ -119,11 +101,11 @@ class AssessmentStatus(Enum):
119
  Disputed = LibraryDetails("Disputed")
120
 
121
  # Column selection
122
- COLS = [getattr(auto_eval_column_attrs, field).name for field in AutoEvalColumn.model_fields if not getattr(auto_eval_column_attrs, field).hidden]
123
- fields = AutoEvalColumn.model_fields
124
 
125
- EVAL_COLS = [getattr(EvalQueueColumn, field).name for field in vars(EvalQueueColumn) if not field.startswith('_')]
126
- EVAL_TYPES = [getattr(EvalQueueColumn, field).type for field in vars(EvalQueueColumn) if not field.startswith('_')]
127
 
128
  # Task columns for benchmarking - use the display column names from the Tasks enum
129
  BENCHMARK_COLS = [task.value.col_name for task in Tasks]
 
 
1
+ from dataclasses import dataclass, make_dataclass
2
  from enum import Enum
3
 
4
+ import pandas as pd
5
+
6
  from src.about import Tasks
7
+
8
+ def fields(raw_class):
9
+ return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
10
 
11
 
12
  # These classes are for user facing column names,
13
  # to avoid having to change them all around the code
14
  # when a modif is needed
15
+ @dataclass
16
+ class ColumnContent:
17
  name: str
18
  type: str
19
  displayed_by_default: bool
 
21
  never_hidden: bool = False
22
 
23
  ## Leaderboard columns
24
+ auto_eval_column_dict = []
25
+ # Init
26
+ auto_eval_column_dict.append(["library_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
27
+ auto_eval_column_dict.append(["library", ColumnContent, ColumnContent("Library", "markdown", True, never_hidden=True)])
28
+ #Scores
29
+ auto_eval_column_dict.append(["overall_risk", ColumnContent, ColumnContent("Trust Score ⬇️", "number", True)])
30
+ for task in Tasks:
31
+ auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
32
+ # Library information
33
+ auto_eval_column_dict.append(["library_type", ColumnContent, ColumnContent("Type", "str", False)])
34
+ auto_eval_column_dict.append(["framework", ColumnContent, ColumnContent("Framework", "str", False)])
35
+ auto_eval_column_dict.append(["version", ColumnContent, ColumnContent("Version", "str", False, False)])
36
+ auto_eval_column_dict.append(["language", ColumnContent, ColumnContent("Language", "str", False)])
37
+ auto_eval_column_dict.append(["license_name", ColumnContent, ColumnContent("License", "str", True)])
38
+ auto_eval_column_dict.append(["stars", ColumnContent, ColumnContent("GitHub ⭐", "number", False)])
39
+ auto_eval_column_dict.append(["last_update", ColumnContent, ColumnContent("Last Updated", "str", False)])
40
+ auto_eval_column_dict.append(["verified", ColumnContent, ColumnContent("Independently Verified", "bool", False)])
41
+ auto_eval_column_dict.append(["availability", ColumnContent, ColumnContent("Active Maintenance", "bool", True)])
42
+ auto_eval_column_dict.append(["report_url", ColumnContent, ColumnContent("Report", "str", True)])
43
+
44
+ # We use make dataclass to dynamically fill the scores from Tasks
45
+ AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  ## For the queue columns in the submission tab
48
  @dataclass(frozen=True)
49
  class EvalQueueColumn: # Queue column
50
+ library = ColumnContent("library", "markdown", True)
51
+ version = ColumnContent("version", "str", True)
52
+ language = ColumnContent("language", "str", True)
53
+ framework = ColumnContent("framework", "str", True)
54
+ library_type = ColumnContent("library_type", "str", True)
55
+ status = ColumnContent("status", "str", True)
56
 
57
  ## All the library information that we might need
58
  @dataclass
 
63
 
64
 
65
  class LibraryType(Enum):
66
+ ML = LibraryDetails(name="machine learning", symbol="🟢")
67
+ LLM = LibraryDetails(name="llm framework", symbol="🔶")
68
+ AGENT = LibraryDetails(name="agent framework", symbol="⭕")
69
+ VIS = LibraryDetails(name="visualization", symbol="🟦")
70
+ GENERAL = LibraryDetails(name="general ai", symbol="🟣")
71
  Unknown = LibraryDetails(name="", symbol="?")
72
 
73
  def to_str(self, separator=" "):
74
  return f"{self.value.symbol}{separator}{self.value.name}"
75
 
76
  @staticmethod
77
+ def from_str(type):
78
+ if "machine learning" in type or "🟢" in type:
79
  return LibraryType.ML
80
+ if "llm framework" in type or "🔶" in type:
81
  return LibraryType.LLM
82
+ if "agent framework" in type or "⭕" in type:
83
  return LibraryType.AGENT
84
+ if "visualization" in type or "🟦" in type:
85
  return LibraryType.VIS
86
+ if "general ai" in type or "🟣" in type:
87
  return LibraryType.GENERAL
88
  return LibraryType.Unknown
89
 
 
101
  Disputed = LibraryDetails("Disputed")
102
 
103
  # Column selection
104
+ COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
 
105
 
106
+ EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
107
+ EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
108
 
109
  # Task columns for benchmarking - use the display column names from the Tasks enum
110
  BENCHMARK_COLS = [task.value.col_name for task in Tasks]
111
+
src/envs.py CHANGED
@@ -2,30 +2,21 @@ import os
2
 
3
  from huggingface_hub import HfApi
4
 
5
- # Dynamically determine if we're running in local mode
6
- def is_local_mode():
7
- if os.environ.get("SPACE_AUTHOR_NAME") and os.environ.get("SPACE_REPO_NAME") and os.environ.get("HF_TOKEN") and os.environ.get("SPACE_ID"):
8
- return False
9
- return True
10
-
11
- LOCAL_MODE = is_local_mode()
12
 
13
  # Info to change for your repository
14
  # ----------------------------------
15
  # Get token from environment or use None in local mode
16
  TOKEN = os.environ.get("HF_TOKEN") if not LOCAL_MODE else None
17
 
18
- OWNER = "holistic-ai" # Change to your org - don't forget to create a results and request dataset, with the correct format!
19
  # ----------------------------------
20
 
21
- REPO_ID = f"{OWNER}/LibVulnWatch"
22
- QUEUE_REPO = REPO_ID # Use the same repository
23
- RESULTS_REPO = REPO_ID # Use the same repository
24
-
25
- if not LOCAL_MODE:
26
- REPO_ID = str(os.environ.get("SPACE_ID"))
27
- QUEUE_REPO = REPO_ID
28
- RESULTS_REPO = REPO_ID
29
 
30
  # If you setup a cache later, just change HF_HOME
31
  CACHE_PATH=os.getenv("HF_HOME", ".")
 
2
 
3
  from huggingface_hub import HfApi
4
 
5
+ # Run in local mode (no Hugging Face connection required)
6
+ # Set to True when developing locally without HF credentials
7
+ LOCAL_MODE = True
 
 
 
 
8
 
9
  # Info to change for your repository
10
  # ----------------------------------
11
  # Get token from environment or use None in local mode
12
  TOKEN = os.environ.get("HF_TOKEN") if not LOCAL_MODE else None
13
 
14
+ OWNER = "libvulnwatch" # Change to your org - don't forget to create a results and request dataset, with the correct format!
15
  # ----------------------------------
16
 
17
+ REPO_ID = f"{OWNER}/leaderboard"
18
+ QUEUE_REPO = f"{OWNER}/vulnerability-requests"
19
+ RESULTS_REPO = f"{OWNER}/vulnerability-assessments"
 
 
 
 
 
20
 
21
  # If you setup a cache later, just change HF_HOME
22
  CACHE_PATH=os.getenv("HF_HOME", ".")
src/leaderboard/read_evals.py CHANGED
@@ -1,32 +1,28 @@
1
  import glob
2
  import json
 
3
  import os
 
4
  from datetime import datetime
5
- from pydantic import BaseModel
6
-
7
- from src.display.formatting import make_clickable_library, make_clickable_report
8
- from src.display.utils import auto_eval_column_attrs, LibraryType, Tasks, Language
9
 
 
10
 
11
- def parse_iso_datetime(datetime_str: str) -> datetime:
12
- """Parse ISO format datetime string, handling 'Z' UTC timezone indicator"""
13
- if datetime_str.endswith('Z'):
14
- datetime_str = datetime_str[:-1] + '+00:00'
15
- return datetime.fromisoformat(datetime_str)
16
 
17
 
18
- class AssessmentResult(BaseModel):
 
19
  """Represents one full vulnerability assessment. Built from a combination of the result and request file for a given library.
20
  """
21
  assessment_id: str # Unique identifier
22
- library_name: str
23
  org: str
24
  repo: str
25
  version: str
26
  results: dict # Risk scores
27
  framework: str = ""
28
  language: Language = Language.Other
29
- language_str: str = "" # Original language string to support multiple languages
30
  library_type: LibraryType = LibraryType.Unknown
31
  license: str = "?"
32
  stars: int = 0
@@ -36,7 +32,7 @@ class AssessmentResult(BaseModel):
36
  report_url: str = "" # URL to detailed assessment report
37
 
38
  @classmethod
39
- def init_from_json_file(cls, json_filepath):
40
  """Initializes the assessment result from a JSON file"""
41
  with open(json_filepath) as fp:
42
  data = json.load(fp)
@@ -47,7 +43,7 @@ class AssessmentResult(BaseModel):
47
  org_and_repo = library_name.split("/", 1)
48
 
49
  if len(org_and_repo) == 1:
50
- org = ""
51
  repo = org_and_repo[0]
52
  assessment_id = f"{repo}_{assessment.get('version', '')}"
53
  else:
@@ -66,27 +62,19 @@ class AssessmentResult(BaseModel):
66
  # Library metadata
67
  framework = assessment.get("framework", "")
68
  language_str = assessment.get("language", "Other")
69
-
70
- # Handle multiple languages separated by /
71
- if "/" in language_str:
72
- language_parts = [lang.strip() for lang in language_str.split("/")]
73
- # Store the full string but parse the first language for enum
74
- language = next((lang for lang in Language if lang.value.name == language_parts[0]), Language.Other)
75
- else:
76
- language = next((lang for lang in Language if lang.value.name == language_str), Language.Other)
77
 
78
  # Availability and verification
79
  last_update = assessment.get("last_updated", "")
80
  if last_update:
81
  try:
82
  # Format date for display
83
- dt = parse_iso_datetime(last_update)
84
  last_update = dt.strftime("%Y-%m-%d")
85
- except Exception as e:
86
- print(e)
87
  pass
88
 
89
- return cls(
90
  assessment_id=assessment_id,
91
  library_name=library_name,
92
  org=org,
@@ -95,7 +83,6 @@ class AssessmentResult(BaseModel):
95
  results=risk_scores,
96
  framework=framework,
97
  language=language,
98
- language_str=language_str,
99
  license=assessment.get("license", "?"),
100
  availability=assessment.get("active_maintenance", True),
101
  verified=assessment.get("independently_verified", False),
@@ -103,6 +90,18 @@ class AssessmentResult(BaseModel):
103
  report_url=assessment.get("report_url", ""),
104
  )
105
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  def to_dict(self):
107
  """Converts the Assessment Result to a dict compatible with our dataframe display"""
108
  # Calculate Trust Score as equal-weight average
@@ -124,24 +123,22 @@ class AssessmentResult(BaseModel):
124
  weight_sum += weight
125
 
126
  trust_score = risk_sum / weight_sum if weight_sum > 0 else 10
127
- # Round to 1 decimal place
128
- trust_score = round(trust_score, 1)
129
 
130
  data_dict = {
131
  "assessment_id": self.assessment_id, # not a column, just a save name
132
- auto_eval_column_attrs.library_type.name: self.library_type.value.name,
133
- auto_eval_column_attrs.library_type_symbol.name: self.library_type.value.symbol,
134
- auto_eval_column_attrs.language.name: self.language_str if self.language_str else self.language.value.name,
135
- auto_eval_column_attrs.framework.name: self.framework,
136
- auto_eval_column_attrs.library.name: make_clickable_library(self.library_name),
137
- auto_eval_column_attrs.version.name: self.version,
138
- auto_eval_column_attrs.overall_risk.name: trust_score,
139
- auto_eval_column_attrs.license_name.name: self.license,
140
- auto_eval_column_attrs.stars.name: self.stars,
141
- auto_eval_column_attrs.last_update.name: self.last_update,
142
- auto_eval_column_attrs.verified.name: self.verified,
143
- auto_eval_column_attrs.availability.name: self.availability,
144
- auto_eval_column_attrs.report_url.name: make_clickable_report(self.report_url),
145
  }
146
 
147
  # Add task-specific risk scores - map to display column names
@@ -150,25 +147,11 @@ class AssessmentResult(BaseModel):
150
  benchmark_key = task_enum.benchmark # e.g., "license_validation"
151
  col_name = task_enum.col_name # Use the display name, e.g., "License Risk"
152
  risk_score = self.results.get(benchmark_key, 10) # Default to highest risk
153
- # Round to 1 decimal place
154
- data_dict[col_name] = round(risk_score, 1)
155
 
156
  return data_dict
157
 
158
 
159
- def update_with_request_file(self, assessment_filepath):
160
- """Finds the relevant request file for the current library and updates info with it"""
161
- try:
162
- with open(assessment_filepath, "r") as f:
163
- request = json.load(f)["assessment"]
164
- self.library_type = LibraryType.from_str(request.get("framework", ""))
165
- self.stars = request.get("github_stars", 0)
166
- except Exception as e:
167
- print(e)
168
- print(f"Could not find request file for {self.library_name} version {self.version}")
169
-
170
-
171
-
172
  def get_request_file_for_library(requests_path, library_name, version):
173
  """Selects the correct request file for a given library. Only keeps runs tagged as FINISHED"""
174
  # Try multiple naming patterns for flexibility
@@ -219,9 +202,8 @@ def get_raw_assessment_results(results_path: str, requests_path: str) -> list[As
219
 
220
  # Sort the files by date if they have date info
221
  try:
222
- files.sort(key=lambda x: parse_iso_datetime(json.loads(open(os.path.join(root, x)).read())["assessment"]["completed_time"]), reverse=True)
223
- except Exception as e:
224
- print(e)
225
  pass
226
 
227
  for file in files:
@@ -231,7 +213,7 @@ def get_raw_assessment_results(results_path: str, requests_path: str) -> list[As
231
  for assessment_filepath in assessment_filepaths:
232
  # Creation of result
233
  assessment_result = AssessmentResult.init_from_json_file(assessment_filepath)
234
- assessment_result.update_with_request_file(assessment_filepath)
235
 
236
  # Store results of same eval together
237
  assessment_id = assessment_result.assessment_id
 
1
  import glob
2
  import json
3
+ import math
4
  import os
5
+ from dataclasses import dataclass
6
  from datetime import datetime
 
 
 
 
7
 
8
+ import numpy as np
9
 
10
+ from src.display.formatting import make_clickable_library, make_clickable_report
11
+ from src.display.utils import AutoEvalColumn, LibraryType, Tasks, Language, AssessmentStatus
 
 
 
12
 
13
 
14
+ @dataclass
15
+ class AssessmentResult:
16
  """Represents one full vulnerability assessment. Built from a combination of the result and request file for a given library.
17
  """
18
  assessment_id: str # Unique identifier
19
+ library_name: str # org/repo
20
  org: str
21
  repo: str
22
  version: str
23
  results: dict # Risk scores
24
  framework: str = ""
25
  language: Language = Language.Other
 
26
  library_type: LibraryType = LibraryType.Unknown
27
  license: str = "?"
28
  stars: int = 0
 
32
  report_url: str = "" # URL to detailed assessment report
33
 
34
  @classmethod
35
+ def init_from_json_file(self, json_filepath):
36
  """Initializes the assessment result from a JSON file"""
37
  with open(json_filepath) as fp:
38
  data = json.load(fp)
 
43
  org_and_repo = library_name.split("/", 1)
44
 
45
  if len(org_and_repo) == 1:
46
+ org = None
47
  repo = org_and_repo[0]
48
  assessment_id = f"{repo}_{assessment.get('version', '')}"
49
  else:
 
62
  # Library metadata
63
  framework = assessment.get("framework", "")
64
  language_str = assessment.get("language", "Other")
65
+ language = next((lang for lang in Language if lang.value.name == language_str), Language.Other)
 
 
 
 
 
 
 
66
 
67
  # Availability and verification
68
  last_update = assessment.get("last_updated", "")
69
  if last_update:
70
  try:
71
  # Format date for display
72
+ dt = datetime.fromisoformat(last_update)
73
  last_update = dt.strftime("%Y-%m-%d")
74
+ except:
 
75
  pass
76
 
77
+ return self(
78
  assessment_id=assessment_id,
79
  library_name=library_name,
80
  org=org,
 
83
  results=risk_scores,
84
  framework=framework,
85
  language=language,
 
86
  license=assessment.get("license", "?"),
87
  availability=assessment.get("active_maintenance", True),
88
  verified=assessment.get("independently_verified", False),
 
90
  report_url=assessment.get("report_url", ""),
91
  )
92
 
93
+ def update_with_request_file(self, requests_path):
94
+ """Finds the relevant request file for the current library and updates info with it"""
95
+ request_file = get_request_file_for_library(requests_path, self.library_name, self.version)
96
+
97
+ try:
98
+ with open(request_file, "r") as f:
99
+ request = json.load(f)
100
+ self.library_type = LibraryType.from_str(request.get("library_type", ""))
101
+ self.stars = request.get("stars", 0)
102
+ except Exception:
103
+ print(f"Could not find request file for {self.library_name} version {self.version}")
104
+
105
  def to_dict(self):
106
  """Converts the Assessment Result to a dict compatible with our dataframe display"""
107
  # Calculate Trust Score as equal-weight average
 
123
  weight_sum += weight
124
 
125
  trust_score = risk_sum / weight_sum if weight_sum > 0 else 10
 
 
126
 
127
  data_dict = {
128
  "assessment_id": self.assessment_id, # not a column, just a save name
129
+ AutoEvalColumn.library_type.name: self.library_type.value.name,
130
+ AutoEvalColumn.library_type_symbol.name: self.library_type.value.symbol,
131
+ AutoEvalColumn.language.name: self.language.value.name,
132
+ AutoEvalColumn.framework.name: self.framework,
133
+ AutoEvalColumn.library.name: make_clickable_library(self.library_name),
134
+ AutoEvalColumn.version.name: self.version,
135
+ AutoEvalColumn.overall_risk.name: trust_score,
136
+ AutoEvalColumn.license_name.name: self.license,
137
+ AutoEvalColumn.stars.name: self.stars,
138
+ AutoEvalColumn.last_update.name: self.last_update,
139
+ AutoEvalColumn.verified.name: self.verified,
140
+ AutoEvalColumn.availability.name: self.availability,
141
+ AutoEvalColumn.report_url.name: make_clickable_report(self.report_url),
142
  }
143
 
144
  # Add task-specific risk scores - map to display column names
 
147
  benchmark_key = task_enum.benchmark # e.g., "license_validation"
148
  col_name = task_enum.col_name # Use the display name, e.g., "License Risk"
149
  risk_score = self.results.get(benchmark_key, 10) # Default to highest risk
150
+ data_dict[col_name] = risk_score
 
151
 
152
  return data_dict
153
 
154
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  def get_request_file_for_library(requests_path, library_name, version):
156
  """Selects the correct request file for a given library. Only keeps runs tagged as FINISHED"""
157
  # Try multiple naming patterns for flexibility
 
202
 
203
  # Sort the files by date if they have date info
204
  try:
205
+ files.sort(key=lambda x: datetime.fromisoformat(json.loads(open(os.path.join(root, x)).read())["assessment"]["completed_time"]), reverse=True)
206
+ except:
 
207
  pass
208
 
209
  for file in files:
 
213
  for assessment_filepath in assessment_filepaths:
214
  # Creation of result
215
  assessment_result = AssessmentResult.init_from_json_file(assessment_filepath)
216
+ assessment_result.update_with_request_file(requests_path)
217
 
218
  # Store results of same eval together
219
  assessment_id = assessment_result.assessment_id
src/populate.py CHANGED
@@ -2,8 +2,9 @@
2
 
3
  import pandas as pd
4
 
5
- from src.display.utils import auto_eval_column_attrs
6
  from src.leaderboard.read_evals import get_raw_assessment_results
 
7
 
8
 
9
  def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
@@ -33,9 +34,9 @@ def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_co
33
  print(f"Warning: Column '{col}' missing, adding empty column")
34
  all_df[col] = 10.0 # Default to highest risk
35
 
36
- # Sort by Trust Score (ascending - higher is better)
37
- if auto_eval_column_attrs.overall_risk.name in all_df.columns:
38
- all_df = all_df.sort_values(by=[auto_eval_column_attrs.overall_risk.name], ascending=False)
39
 
40
  return all_df
41
 
@@ -68,16 +69,19 @@ def get_evaluation_queue_df(eval_requests_path, eval_cols):
68
  finished_data = []
69
  running_data = []
70
  pending_data = []
71
- rejected_data = []
72
  for file_path in request_files:
73
  try:
74
- with open(file_path, "r", encoding="utf-8") as f:
75
  data = json.load(f)
76
 
77
  # Extract relevant fields
78
  row = {
79
  "library": data.get("library", ""),
 
80
  "language": data.get("language", ""),
 
 
81
  "status": data.get("status", "UNKNOWN")
82
  }
83
 
@@ -88,8 +92,6 @@ def get_evaluation_queue_df(eval_requests_path, eval_cols):
88
  running_data.append(row)
89
  elif row["status"] == "PENDING":
90
  pending_data.append(row)
91
- elif row["status"] == "REJECTED":
92
- rejected_data.append(row)
93
  except Exception as e:
94
  print(f"Error reading request file {file_path}: {e}")
95
  continue
@@ -98,11 +100,11 @@ def get_evaluation_queue_df(eval_requests_path, eval_cols):
98
  finished_df = pd.DataFrame(finished_data, columns=eval_cols)
99
  running_df = pd.DataFrame(running_data, columns=eval_cols)
100
  pending_df = pd.DataFrame(pending_data, columns=eval_cols)
101
- rejected_df = pd.DataFrame(rejected_data, columns=eval_cols)
102
- return finished_df, running_df, pending_df, rejected_df
103
 
104
  except Exception as e:
105
  print(f"Error reading evaluation queue: {e}")
106
  # Return empty dataframes
107
  empty_df = pd.DataFrame(columns=eval_cols)
108
- return empty_df.copy(), empty_df.copy(), empty_df.copy(), empty_df.copy()
 
2
 
3
  import pandas as pd
4
 
5
+ from src.display.utils import AutoEvalColumn
6
  from src.leaderboard.read_evals import get_raw_assessment_results
7
+ from src.about import Tasks
8
 
9
 
10
  def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
 
34
  print(f"Warning: Column '{col}' missing, adding empty column")
35
  all_df[col] = 10.0 # Default to highest risk
36
 
37
+ # Sort by Trust Score (ascending - lower is better)
38
+ if AutoEvalColumn.overall_risk.name in all_df.columns:
39
+ all_df = all_df.sort_values(by=[AutoEvalColumn.overall_risk.name])
40
 
41
  return all_df
42
 
 
69
  finished_data = []
70
  running_data = []
71
  pending_data = []
72
+
73
  for file_path in request_files:
74
  try:
75
+ with open(file_path, "r") as f:
76
  data = json.load(f)
77
 
78
  # Extract relevant fields
79
  row = {
80
  "library": data.get("library", ""),
81
+ "version": data.get("version", ""),
82
  "language": data.get("language", ""),
83
+ "framework": data.get("framework", ""),
84
+ "library_type": data.get("library_type", ""),
85
  "status": data.get("status", "UNKNOWN")
86
  }
87
 
 
92
  running_data.append(row)
93
  elif row["status"] == "PENDING":
94
  pending_data.append(row)
 
 
95
  except Exception as e:
96
  print(f"Error reading request file {file_path}: {e}")
97
  continue
 
100
  finished_df = pd.DataFrame(finished_data, columns=eval_cols)
101
  running_df = pd.DataFrame(running_data, columns=eval_cols)
102
  pending_df = pd.DataFrame(pending_data, columns=eval_cols)
103
+
104
+ return finished_df, running_df, pending_df
105
 
106
  except Exception as e:
107
  print(f"Error reading evaluation queue: {e}")
108
  # Return empty dataframes
109
  empty_df = pd.DataFrame(columns=eval_cols)
110
+ return empty_df.copy(), empty_df.copy(), empty_df.copy()
src/submission/check_validity.py CHANGED
@@ -1,22 +1,58 @@
1
  import json
2
  import os
 
 
3
  from collections import defaultdict
 
4
  from typing import Dict, Tuple, Any, List, Set
5
 
6
- def is_repository_valid(repo_name: str) -> Tuple[bool, str, Dict[str, Any]]:
7
  """
8
  Checks if a GitHub repository is valid and accessible.
9
 
10
  Args:
11
- repo_name: The name of the repository
 
12
 
13
  Returns:
14
  Tuple of (is_valid, error_message, library_info)
15
  """
16
  # Basic format validation
17
- if not repo_name:
18
- return False, "Repository name is required", {}
19
- return True, "", {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  def get_library_info(repo_data: Dict[str, Any]) -> Dict[str, Any]:
22
  """
 
1
  import json
2
  import os
3
+ import re
4
+ import requests
5
  from collections import defaultdict
6
+ from datetime import datetime, timedelta, timezone
7
  from typing import Dict, Tuple, Any, List, Set
8
 
9
+ def is_repository_valid(repo_name: str, repo_url: str) -> Tuple[bool, str, Dict[str, Any]]:
10
  """
11
  Checks if a GitHub repository is valid and accessible.
12
 
13
  Args:
14
+ repo_name: The name of the repository (org/repo format)
15
+ repo_url: URL to the repository
16
 
17
  Returns:
18
  Tuple of (is_valid, error_message, library_info)
19
  """
20
  # Basic format validation
21
+ if not repo_name or "/" not in repo_name:
22
+ return False, "Repository name must be in the format 'organization/repository'", {}
23
+
24
+ # Check if GitHub URL
25
+ if repo_url and "github.com" in repo_url:
26
+ # Extract org and repo from URL if provided
27
+ try:
28
+ parts = repo_url.split("github.com/")[1].split("/")
29
+ org = parts[0]
30
+ repo = parts[1].split(".")[0] if "." in parts[1] else parts[1]
31
+ url_repo_name = f"{org}/{repo}"
32
+
33
+ # Check if URL matches repo_name
34
+ if url_repo_name != repo_name:
35
+ return False, f"Repository name ({repo_name}) doesn't match the URL ({url_repo_name})", {}
36
+ except:
37
+ pass # Fall back to using repo_name
38
+
39
+ # Get repository information from GitHub API
40
+ org, repo = repo_name.split("/")
41
+ api_url = f"https://api.github.com/repos/{org}/{repo}"
42
+
43
+ try:
44
+ response = requests.get(api_url)
45
+ if response.status_code != 200:
46
+ return False, f"Repository not found or not accessible: {response.json().get('message', 'Unknown error')}", {}
47
+
48
+ # Parse repository data
49
+ repo_data = response.json()
50
+ library_info = get_library_info(repo_data)
51
+
52
+ return True, "", library_info
53
+
54
+ except Exception as e:
55
+ return False, f"Error accessing repository: {str(e)}", {}
56
 
57
  def get_library_info(repo_data: Dict[str, Any]) -> Dict[str, Any]:
58
  """
src/submission/submit.py CHANGED
@@ -1,35 +1,57 @@
1
  import json
2
  import os
 
 
3
  import uuid
4
  from datetime import datetime
5
  from pathlib import Path
6
 
 
 
 
 
 
7
  from src.display.formatting import styled_error, styled_warning, styled_message
8
- from src.envs import API, EVAL_REQUESTS_PATH, QUEUE_REPO, LOCAL_MODE
9
- from src.submission.check_validity import is_repository_valid
10
 
11
 
12
  def add_new_eval(
13
  library_name,
 
 
 
 
 
14
  ) -> str:
15
  """
16
  Adds a new library to the assessment queue.
17
 
18
  Args:
19
- library_name: Name of the library
 
 
 
 
 
20
 
21
  Returns:
22
  A message indicating the status of the submission
23
  """
24
  # Check if valid repository
25
- is_valid, validity_message, library_info = is_repository_valid(library_name)
26
 
27
  if not is_valid:
28
  return styled_error(f"Invalid submission: {validity_message}")
29
 
 
 
 
 
 
30
  # Create a unique identifier for the submission
31
  uid = uuid.uuid4().hex[:6]
32
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
33
  request_filename = f"{library_name.replace('/', '_')}_eval_request_{timestamp}_{uid}.json"
34
 
35
  # Stars count and license info from library_info if available
@@ -39,6 +61,11 @@ def add_new_eval(
39
  # Create the assessment request JSON
40
  assessment_request = {
41
  "library": library_name,
 
 
 
 
 
42
  "license": license_name,
43
  "stars": stars,
44
  "status": "PENDING",
@@ -57,7 +84,7 @@ def add_new_eval(
57
 
58
  # If in local mode, don't try to upload to HF
59
  if LOCAL_MODE:
60
- return styled_message(f"Library '{library_name}' has been added to the local assessment queue! Assessment ID: {uid}")
61
 
62
  # Try to upload to HF if not in local mode
63
  try:
@@ -65,12 +92,12 @@ def add_new_eval(
65
  path = Path(request_file_path)
66
  API.upload_file(
67
  path_or_fileobj=path,
68
- path_in_repo=f"assessment-queue/{request_filename}",
69
  repo_id=QUEUE_REPO,
70
- repo_type="space",
71
  )
72
 
73
- return styled_message(f"Library '{library_name}' has been added to the assessment queue! Assessment ID: {uid}")
74
 
75
  except Exception as e:
76
  return styled_warning(f"Saved locally but failed to upload to Hugging Face: {str(e)}")
 
1
  import json
2
  import os
3
+ import re
4
+ import time
5
  import uuid
6
  from datetime import datetime
7
  from pathlib import Path
8
 
9
+ import huggingface_hub
10
+ import requests
11
+ from huggingface_hub import HfApi
12
+
13
+ from src.display.utils import LibraryType, Language, AssessmentStatus
14
  from src.display.formatting import styled_error, styled_warning, styled_message
15
+ from src.envs import API, EVAL_REQUESTS_PATH, QUEUE_REPO, TOKEN, LOCAL_MODE
16
+ from src.submission.check_validity import is_repository_valid, get_library_info
17
 
18
 
19
  def add_new_eval(
20
  library_name,
21
+ library_version,
22
+ repository_url,
23
+ language,
24
+ framework,
25
+ library_type_str,
26
  ) -> str:
27
  """
28
  Adds a new library to the assessment queue.
29
 
30
  Args:
31
+ library_name: Name of the library (org/repo format)
32
+ library_version: Version of the library
33
+ repository_url: URL to the repository
34
+ language: Programming language
35
+ framework: Related framework/ecosystem
36
+ library_type_str: Type of AI library
37
 
38
  Returns:
39
  A message indicating the status of the submission
40
  """
41
  # Check if valid repository
42
+ is_valid, validity_message, library_info = is_repository_valid(library_name, repository_url)
43
 
44
  if not is_valid:
45
  return styled_error(f"Invalid submission: {validity_message}")
46
 
47
+ # Parse library type
48
+ library_type = LibraryType.from_str(library_type_str)
49
+ if library_type == LibraryType.Unknown:
50
+ return styled_error("Please select a valid library type.")
51
+
52
  # Create a unique identifier for the submission
53
  uid = uuid.uuid4().hex[:6]
54
+ timestamp = datetime.now().isoformat()
55
  request_filename = f"{library_name.replace('/', '_')}_eval_request_{timestamp}_{uid}.json"
56
 
57
  # Stars count and license info from library_info if available
 
61
  # Create the assessment request JSON
62
  assessment_request = {
63
  "library": library_name,
64
+ "version": library_version,
65
+ "repository_url": repository_url,
66
+ "language": language,
67
+ "framework": framework,
68
+ "library_type": library_type.value.name,
69
  "license": license_name,
70
  "stars": stars,
71
  "status": "PENDING",
 
84
 
85
  # If in local mode, don't try to upload to HF
86
  if LOCAL_MODE:
87
+ return styled_message(f"Library '{library_name}' (version {library_version}) has been added to the local assessment queue! Assessment ID: {uid}")
88
 
89
  # Try to upload to HF if not in local mode
90
  try:
 
92
  path = Path(request_file_path)
93
  API.upload_file(
94
  path_or_fileobj=path,
95
+ path_in_repo=request_filename,
96
  repo_id=QUEUE_REPO,
97
+ repo_type="dataset",
98
  )
99
 
100
+ return styled_message(f"Library '{library_name}' (version {library_version}) has been added to the assessment queue! Assessment ID: {uid}")
101
 
102
  except Exception as e:
103
  return styled_warning(f"Saved locally but failed to upload to Hugging Face: {str(e)}")
uv.lock DELETED
The diff for this file is too large to render. See raw diff