Spaces:

holistic-ai
/

LibVulnWatch

Running

App Files Files Community

refactor: update table columns

by seonglae-holistic - opened Jun 24

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+436

-6018

Files changed (35) hide show

README.md +1 -3
app.py +85 -79
assessment-queue/pytorch_pytorch_eval_request_FINISHED_v2.1.0.json +2 -2
assessment-queue/pytorch_pytorch_eval_request_timestamp_def456.json +2 -2
assessment-results/agent_development_kit.json +4 -4
assessment-results/browser_use.json +4 -4
assessment-results/composio.json +4 -4
assessment-results/crewai.json +4 -4
assessment-results/{huggingface_transformers.json → huggingface_candle.json} +11 -11
assessment-results/jax.json +5 -5
assessment-results/langchain.json +3 -3
assessment-results/langgraph.json +3 -3
assessment-results/llamaindex.json +5 -5
assessment-results/metagpt.json +5 -5
assessment-results/onnx.json +4 -4
assessment-results/pydantic_ai.json +3 -3
assessment-results/pytorch.json +4 -4
assessment-results/sglang.json +3 -3
assessment-results/smolagents.json +5 -5
assessment-results/stagehand.json +5 -5
assessment-results/tensorflow.json +6 -6
assessment-results/tensorrt.json +4 -4
assessment-results/text_generation_inference.json +3 -3
assessment-results/vllm.json +3 -3
pyproject.toml +1 -25
src/about.py +42 -37
src/display/css_html_js.py +0 -8
src/display/formatting.py +24 -8
src/display/utils.py +51 -69
src/envs.py +7 -16
src/leaderboard/read_evals.py +43 -61
src/populate.py +13 -11
src/submission/check_validity.py +41 -5
src/submission/submit.py +36 -9
uv.lock +0 -0

README.md CHANGED Viewed

@@ -7,7 +7,7 @@ sdk: gradio
 app_file: app.py
 pinned: true
 license: mit
-short_description: Vulnerability scores for AI libraries (ACL '25, ICML '25)
 sdk_version: 5.19.0
 ---
@@ -46,5 +46,3 @@ You'll find
 - the main table' columns names and properties in `src/display/utils.py`
 - the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
 - the logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`
-> **LibVulnWatch** was presented at the **ACL&nbsp;2025 Student Research Workshop** and accepted to the **ICML&nbsp;2025 Technical AI Governance workshop**. The system uncovers hidden security, licensing, maintenance, dependency and regulatory risks in popular AI libraries and publishes a public leaderboard for transparent ecosystem monitoring.

 app_file: app.py
 pinned: true
 license: mit
+short_description: Duplicate this leaderboard to initialize your own!
 sdk_version: 5.19.0
 ---
 - the main table' columns names and properties in `src/display/utils.py`
 - the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
 - the logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`

app.py CHANGED Viewed

@@ -1,10 +1,10 @@
 import gradio as gr
-from gradio.components import Dataframe
 from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
 import pandas as pd
 from apscheduler.schedulers.background import BackgroundScheduler
 import os
-from gradio.themes import Soft
 from src.about import (
     CITATION_BUTTON_LABEL,
@@ -22,9 +22,12 @@ from src.display.utils import (
     EVAL_COLS,
     EVAL_TYPES,
     AutoEvalColumn,
-    auto_eval_column_attrs
 )
-from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, REPO_ID, LOCAL_MODE
 from src.populate import get_evaluation_queue_df, get_leaderboard_df
 from src.submission.submit import add_new_eval
@@ -48,50 +51,42 @@ def initialize_data_directories():
     os.makedirs(EVAL_REQUESTS_PATH, exist_ok=True)
     os.makedirs(EVAL_RESULTS_PATH, exist_ok=True)
 # Initialize data
 initialize_data_directories()
 # Load data for leaderboard
 LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
-# Extract unique languages for filtering
-def get_unique_languages(df):
-    """Extract all unique individual languages from the Language column"""
-    if df.empty or auto_eval_column_attrs.language.name not in df.columns:
-        return []
-    all_languages = set()
-    for value in df[auto_eval_column_attrs.language.name].unique():
-        if isinstance(value, str):
-            if "/" in value:
-                languages = [lang.strip() for lang in value.split("/")]
-                all_languages.update(languages)
-            else:
-                all_languages.add(value.strip())
-    return sorted(list(all_languages))
-# Create a mapping for language filtering
-UNIQUE_LANGUAGES = get_unique_languages(LEADERBOARD_DF)
-# Create a special column for individual language filtering
-if not LEADERBOARD_DF.empty:
-    # Create a column that contains all individual languages as a list
-    LEADERBOARD_DF["_languages_list"] = LEADERBOARD_DF[auto_eval_column_attrs.language.name].apply(
-        lambda x: [lang.strip() for lang in str(x).split("/")] if pd.notna(x) else []
-    )
-    # Create a text version of Active Maintenance for checkboxgroup filtering
-    LEADERBOARD_DF["_maintenance_filter"] = LEADERBOARD_DF[auto_eval_column_attrs.availability.name].apply(
-        lambda x: "Active" if x else "Inactive"
-    )
 # Load queue data
 (
     finished_eval_queue_df,
     running_eval_queue_df,
     pending_eval_queue_df,
-    rejected_eval_queue_df,
 ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
 def init_leaderboard(dataframe):
@@ -99,53 +94,40 @@ def init_leaderboard(dataframe):
     if dataframe is None or dataframe.empty:
         # Create an empty dataframe with the expected columns
         all_columns = COLS + [task.value.col_name for task in Tasks]
-        empty_df = pd.DataFrame(columns=pd.Index(all_columns))
         print("Warning: Leaderboard DataFrame is empty. Using empty dataframe.")
         dataframe = empty_df
-    # Create filter columns list with proper typing
-    filter_columns = []
-    # 1. Library types
-    filter_columns.append(ColumnFilter(auto_eval_column_attrs.library_type.name, type="checkboxgroup", label="Library types"))
-    # 2. Programming Language (checkboxgroup - OR filtering)
-    filter_columns.append(ColumnFilter(auto_eval_column_attrs.language.name, type="checkboxgroup", label="Programming Language"))
-    # 3. GitHub Stars
-    filter_columns.append(ColumnFilter(
-        auto_eval_column_attrs.stars.name,
-        type="slider",
-        min=0,
-        max=50000,
-        label="GitHub Stars",
-    ))
-    # 4. Maintenance Status (checkboxgroup - separate from languages)
-    filter_columns.append(ColumnFilter("_maintenance_filter", type="checkboxgroup", label="Maintenance Status"))
-    # Hide columns
-    hidden_columns = [getattr(auto_eval_column_attrs, field).name for field in AutoEvalColumn.model_fields if getattr(auto_eval_column_attrs, field).hidden]
-    hidden_columns.extend(["_languages_list", "_maintenance_filter", "_original_language"])  # Hide helper columns
     return Leaderboard(
         value=dataframe,
-        datatype="markdown",
         select_columns=SelectColumns(
-            default_selection=[getattr(auto_eval_column_attrs, field).name for field in AutoEvalColumn.model_fields if getattr(auto_eval_column_attrs, field).displayed_by_default],
-            cant_deselect=[getattr(auto_eval_column_attrs, field).name for field in AutoEvalColumn.model_fields if getattr(auto_eval_column_attrs, field).never_hidden],
             label="Select Columns to Display:",
         ),
-        search_columns=[auto_eval_column_attrs.library.name, auto_eval_column_attrs.license_name.name],
-        hide_columns=hidden_columns,
-        filter_columns=filter_columns, # type: ignore
         bool_checkboxgroup_label="Filter libraries",
         interactive=False,
     )
-demo = gr.Blocks(css=custom_css, theme=Soft())
-# demo = gr.Blocks(css=custom_css, theme=Soft(font=["sans-serif"], font_mono=["monospace"]))
 with demo:
     gr.HTML(TITLE)
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
@@ -168,7 +150,7 @@ with demo:
                         open=False,
                     ):
                         with gr.Row():
-                            finished_eval_table = Dataframe(
                                 value=finished_eval_queue_df,
                                 headers=EVAL_COLS,
                                 datatype=EVAL_TYPES,
@@ -179,7 +161,7 @@ with demo:
                         open=False,
                     ):
                         with gr.Row():
-                            running_eval_table = Dataframe(
                                 value=running_eval_queue_df,
                                 headers=EVAL_COLS,
                                 datatype=EVAL_TYPES,
@@ -191,7 +173,7 @@ with demo:
                         open=False,
                     ):
                         with gr.Row():
-                            pending_eval_table = Dataframe(
                                 value=pending_eval_queue_df,
                                 headers=EVAL_COLS,
                                 datatype=EVAL_TYPES,
@@ -202,7 +184,26 @@ with demo:
             with gr.Row():
                 with gr.Column():
-                    library_name_textbox = gr.Textbox(label="Library name")
             submit_button = gr.Button("Submit for Assessment")
             submission_result = gr.Markdown()
@@ -210,18 +211,23 @@ with demo:
                 add_new_eval,
                 [
                     library_name_textbox,
                 ],
                 submission_result,
             )
     with gr.Row():
-        with gr.Accordion("📙 Citation", open=True):
-            citation_button = gr.Code(
                 value=CITATION_BUTTON_TEXT,
                 label=CITATION_BUTTON_LABEL,
-                lines=14,
                 elem_id="citation-button",
-                language="yaml",
             )
 # Only schedule space restarts if not in local mode

 import gradio as gr
 from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
 import pandas as pd
 from apscheduler.schedulers.background import BackgroundScheduler
+from huggingface_hub import snapshot_download
 import os
+import shutil
 from src.about import (
     CITATION_BUTTON_LABEL,
     EVAL_COLS,
     EVAL_TYPES,
     AutoEvalColumn,
+    LibraryType,
+    fields,
+    Language,
+    AssessmentStatus
 )
+from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN, LOCAL_MODE
 from src.populate import get_evaluation_queue_df, get_leaderboard_df
 from src.submission.submit import add_new_eval
     os.makedirs(EVAL_REQUESTS_PATH, exist_ok=True)
     os.makedirs(EVAL_RESULTS_PATH, exist_ok=True)
+    if LOCAL_MODE:
+        print("Running in local mode, using local directories only")
+        return
+    # Try to download from HF if not in local mode
+    try:
+        print(f"Downloading request data from {QUEUE_REPO} to {EVAL_REQUESTS_PATH}")
+        snapshot_download(
+            repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset",
+            tqdm_class=None, etag_timeout=30, token=TOKEN
+        )
+    except Exception as e:
+        print(f"Failed to download request data: {e}")
+        print("Using local data only")
+    try:
+        print(f"Downloading result data from {RESULTS_REPO} to {EVAL_RESULTS_PATH}")
+        snapshot_download(
+            repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset",
+            tqdm_class=None, etag_timeout=30, token=TOKEN
+        )
+    except Exception as e:
+        print(f"Failed to download result data: {e}")
+        print("Using local data only")
 # Initialize data
 initialize_data_directories()
 # Load data for leaderboard
 LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
 # Load queue data
 (
     finished_eval_queue_df,
     running_eval_queue_df,
     pending_eval_queue_df,
 ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
 def init_leaderboard(dataframe):
     if dataframe is None or dataframe.empty:
         # Create an empty dataframe with the expected columns
         all_columns = COLS + [task.value.col_name for task in Tasks]
+        empty_df = pd.DataFrame(columns=all_columns)
         print("Warning: Leaderboard DataFrame is empty. Using empty dataframe.")
         dataframe = empty_df
     return Leaderboard(
         value=dataframe,
+        datatype=[c.type for c in fields(AutoEvalColumn)],
         select_columns=SelectColumns(
+            default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
+            cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
             label="Select Columns to Display:",
         ),
+        search_columns=[AutoEvalColumn.library.name, AutoEvalColumn.license_name.name],
+        hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
+        filter_columns=[
+            ColumnFilter(AutoEvalColumn.library_type.name, type="checkboxgroup", label="Library types"),
+            ColumnFilter(AutoEvalColumn.language.name, type="checkboxgroup", label="Programming Language"),
+            ColumnFilter(
+                AutoEvalColumn.stars.name,
+                type="slider",
+                min=0,
+                max=50000,
+                label="GitHub Stars",
+            ),
+            ColumnFilter(
+                AutoEvalColumn.availability.name, type="boolean", label="Show only active libraries", default=True
+            ),
+        ],
         bool_checkboxgroup_label="Filter libraries",
         interactive=False,
     )
+demo = gr.Blocks(css=custom_css)
 with demo:
     gr.HTML(TITLE)
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
                         open=False,
                     ):
                         with gr.Row():
+                            finished_eval_table = gr.components.Dataframe(
                                 value=finished_eval_queue_df,
                                 headers=EVAL_COLS,
                                 datatype=EVAL_TYPES,
                         open=False,
                     ):
                         with gr.Row():
+                            running_eval_table = gr.components.Dataframe(
                                 value=running_eval_queue_df,
                                 headers=EVAL_COLS,
                                 datatype=EVAL_TYPES,
                         open=False,
                     ):
                         with gr.Row():
+                            pending_eval_table = gr.components.Dataframe(
                                 value=pending_eval_queue_df,
                                 headers=EVAL_COLS,
                                 datatype=EVAL_TYPES,
             with gr.Row():
                 with gr.Column():
+                    library_name_textbox = gr.Textbox(label="Library name (org/repo format)")
+                    library_version_textbox = gr.Textbox(label="Version", placeholder="v1.0.0")
+                    library_type = gr.Dropdown(
+                        choices=[t.to_str(" : ") for t in LibraryType if t != LibraryType.Unknown],
+                        label="Library type",
+                        multiselect=False,
+                        value=None,
+                        interactive=True,
+                    )
+                with gr.Column():
+                    language = gr.Dropdown(
+                        choices=[i.value.name for i in Language if i != Language.Other],
+                        label="Programming Language",
+                        multiselect=False,
+                        value="Python",
+                        interactive=True,
+                    )
+                    framework = gr.Textbox(label="Framework/Ecosystem (e.g., PyTorch, React)")
+                    repository_url = gr.Textbox(label="Repository URL")
             submit_button = gr.Button("Submit for Assessment")
             submission_result = gr.Markdown()
                 add_new_eval,
                 [
                     library_name_textbox,
+                    library_version_textbox,
+                    repository_url,
+                    language,
+                    framework,
+                    library_type,
                 ],
                 submission_result,
             )
     with gr.Row():
+        with gr.Accordion("📙 Citation", open=False):
+            citation_button = gr.Textbox(
                 value=CITATION_BUTTON_TEXT,
                 label=CITATION_BUTTON_LABEL,
+                lines=20,
                 elem_id="citation-button",
+                show_copy_button=True,
             )
 # Only schedule space restarts if not in local mode

assessment-queue/pytorch_pytorch_eval_request_FINISHED_v2.1.0.json CHANGED Viewed

@@ -3,8 +3,8 @@
   "version": "v2.1.0",
   "repository_url": "https://github.com/pytorch/pytorch",
   "language": "Python",
-  "framework": "ML Framework",
-  "library_type": "ML Framework",
   "license": "BSD-3",
   "stars": 72300,
   "status": "FINISHED",

   "version": "v2.1.0",
   "repository_url": "https://github.com/pytorch/pytorch",
   "language": "Python",
+  "framework": "Machine Learning",
+  "library_type": "machine learning",
   "license": "BSD-3",
   "stars": 72300,
   "status": "FINISHED",

assessment-queue/pytorch_pytorch_eval_request_timestamp_def456.json CHANGED Viewed

@@ -3,8 +3,8 @@
   "version": "v2.1.0",
   "repository_url": "https://github.com/pytorch/pytorch",
   "language": "Python",
-  "framework": "ML Framework",
-  "library_type": "ML Framework",
   "license": "BSD-3",
   "stars": 72300,
   "status": "FINISHED",

   "version": "v2.1.0",
   "repository_url": "https://github.com/pytorch/pytorch",
   "language": "Python",
+  "framework": "Machine Learning",
+  "library_type": "machine learning",
   "license": "BSD-3",
   "stars": 72300,
   "status": "FINISHED",

assessment-results/agent_development_kit.json CHANGED Viewed

@@ -1,15 +1,15 @@
 {
   "assessment": {
-    "library_name": "google/adk-python",
-    "version": "v1.4.2",
     "language": "Python",
     "framework": "Agent Framework",
     "completed_time": "2024-06-07T12:00:00Z",
     "last_updated": "2024-06-07T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
-    "report_url": "https://981526092.github.io/LibVulnWatch/google_adk-python_v1.4.2.html",
-    "repository_url": "https://github.com/google/adk-python",
     "github_stars": 3800,
     "license": "MIT",
     "scores": {

 {
   "assessment": {
+    "library_name": "microsoft/agent-development-kit",
+    "version": "v0.2.0",
     "language": "Python",
     "framework": "Agent Framework",
     "completed_time": "2024-06-07T12:00:00Z",
     "last_updated": "2024-06-07T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
+    "report_url": "https://github.com/LibVulnWatch/reports/raw/main/agent_development_kit.html",
+    "repository_url": "https://github.com/microsoft/agent-development-kit",
     "github_stars": 3800,
     "license": "MIT",
     "scores": {

assessment-results/browser_use.json CHANGED Viewed

@@ -1,15 +1,15 @@
 {
   "assessment": {
-    "library_name": "browser-use/browser-use",
-    "version": "v0.3.2",
     "language": "Python",
     "framework": "Agent Framework",
     "completed_time": "2024-06-09T12:00:00Z",
     "last_updated": "2024-06-09T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
-    "report_url": "https://981526092.github.io/LibVulnWatch/browser_use_browser-use_v0.3.2.html",
-    "repository_url": "https://github.com/browser-use/browser-use",
     "github_stars": 3200,
     "license": "MIT",
     "scores": {

 {
   "assessment": {
+    "library_name": "langchain-ai/browser-use",
+    "version": "v0.5.1",
     "language": "Python",
     "framework": "Agent Framework",
     "completed_time": "2024-06-09T12:00:00Z",
     "last_updated": "2024-06-09T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
+    "report_url": "https://github.com/LibVulnWatch/reports/raw/main/browser_use.html",
+    "repository_url": "https://github.com/langchain-ai/browser-use",
     "github_stars": 3200,
     "license": "MIT",
     "scores": {

assessment-results/composio.json CHANGED Viewed

@@ -1,15 +1,15 @@
 {
   "assessment": {
-    "library_name": "ComposableHQ/composio",
-    "version": "v0.7.19",
     "language": "Python",
     "framework": "Agent Framework",
     "completed_time": "2024-06-10T12:00:00Z",
     "last_updated": "2024-06-10T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
-    "report_url": "https://981526092.github.io/LibVulnWatch/ComposableHQ_composio_v0.7.19.html",
-    "repository_url": "https://github.com/ComposableHQ/composio",
     "github_stars": 1200,
     "license": "MIT",
     "scores": {

 {
   "assessment": {
+    "library_name": "ComposableAI/composio",
+    "version": "v0.4.2",
     "language": "Python",
     "framework": "Agent Framework",
     "completed_time": "2024-06-10T12:00:00Z",
     "last_updated": "2024-06-10T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
+    "report_url": "https://github.com/LibVulnWatch/reports/raw/main/composio.html",
+    "repository_url": "https://github.com/ComposableAI/composio",
     "github_stars": 1200,
     "license": "MIT",
     "scores": {

assessment-results/crewai.json CHANGED Viewed

@@ -1,15 +1,15 @@
 {
   "assessment": {
-    "library_name": "crewAIInc/crewAI",
-    "version": "v0.130.0",
     "language": "Python",
     "framework": "Agent Framework",
     "completed_time": "2024-06-15T12:00:00Z",
     "last_updated": "2024-06-15T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
-    "report_url": "https://981526092.github.io/LibVulnWatch/crewaiinc_crewai_v0.130.0.html",
-    "repository_url": "https://github.com/crewAIInc/crewAI",
     "github_stars": 8200,
     "license": "MIT",
     "scores": {

 {
   "assessment": {
+    "library_name": "joaomdmoura/crewAI",
+    "version": "v0.9.4",
     "language": "Python",
     "framework": "Agent Framework",
     "completed_time": "2024-06-15T12:00:00Z",
     "last_updated": "2024-06-15T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
+    "report_url": "https://github.com/LibVulnWatch/reports/raw/main/crewai.html",
+    "repository_url": "https://github.com/joaomdmoura/crewAI",
     "github_stars": 8200,
     "license": "MIT",
     "scores": {

assessment-results/{huggingface_transformers.json → huggingface_candle.json} RENAMED Viewed

@@ -1,23 +1,23 @@
 {
   "assessment": {
-    "library_name": "huggingface/transformers",
-    "version": "v4.52.4",
-    "language": "Python",
-    "framework": "ML Framework",
     "completed_time": "2024-06-22T12:00:00Z",
     "last_updated": "2024-06-22T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
-    "report_url": "https://981526092.github.io/LibVulnWatch/huggingface_transformers_v4.52.4.html",
-    "repository_url": "https://github.com/huggingface/transformers",
-    "github_stars": 146000,
     "license": "Apache-2.0",
     "scores": {
-      "license_validation": 5,
-      "security_assessment": 1,
-      "maintenance_health": 4,
       "dependency_management": 1,
-      "regulatory_compliance": 3
     },
     "details": {
       "license_validation": {

 {
   "assessment": {
+    "library_name": "huggingface/candle",
+    "version": "v0.3.2",
+    "language": "Rust",
+    "framework": "Machine Learning",
     "completed_time": "2024-06-22T12:00:00Z",
     "last_updated": "2024-06-22T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
+    "report_url": "https://github.com/LibVulnWatch/reports/raw/main/huggingface_candle.html",
+    "repository_url": "https://github.com/huggingface/candle",
+    "github_stars": 12500,
     "license": "Apache-2.0",
     "scores": {
+      "license_validation": 4,
+      "security_assessment": 2,
+      "maintenance_health": 2,
       "dependency_management": 1,
+      "regulatory_compliance": 2
     },
     "details": {
       "license_validation": {

assessment-results/jax.json CHANGED Viewed

@@ -1,16 +1,16 @@
 {
   "assessment": {
-    "library_name": "jax-ml/jax",
     "version": "v0.4.23",
     "language": "Python",
-    "framework": "ML Framework",
     "completed_time": "2024-06-24T12:00:00Z",
     "last_updated": "2024-06-24T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
-    "report_url": "https://981526092.github.io/LibVulnWatch/jax-ml_jax_v0.4.23.html",
-    "repository_url": "https://github.com/jax-ml/jax",
-    "github_stars": 32604,
     "license": "Apache-2.0",
     "scores": {
       "license_validation": 5,

 {
   "assessment": {
+    "library_name": "google/jax",
     "version": "v0.4.23",
     "language": "Python",
+    "framework": "Machine Learning",
     "completed_time": "2024-06-24T12:00:00Z",
     "last_updated": "2024-06-24T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
+    "report_url": "https://github.com/LibVulnWatch/reports/raw/main/jax.html",
+    "repository_url": "https://github.com/google/jax",
+    "github_stars": 36000,
     "license": "Apache-2.0",
     "scores": {
       "license_validation": 5,

assessment-results/langchain.json CHANGED Viewed

@@ -1,16 +1,16 @@
 {
   "assessment": {
     "library_name": "langchain-ai/langchain",
-    "version": "v0.3.66",
     "language": "Python",
     "framework": "LLM Orchestration",
     "completed_time": "2024-06-17T12:00:00Z",
     "last_updated": "2024-06-17T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
-    "report_url": "https://981526092.github.io/LibVulnWatch/langchain-ai_langchain_v0.3.66.html",
     "repository_url": "https://github.com/langchain-ai/langchain",
-    "github_stars": 111000,
     "license": "MIT",
     "scores": {
       "license_validation": 5,

 {
   "assessment": {
     "library_name": "langchain-ai/langchain",
+    "version": "v0.1.0",
     "language": "Python",
     "framework": "LLM Orchestration",
     "completed_time": "2024-06-17T12:00:00Z",
     "last_updated": "2024-06-17T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
+    "report_url": "https://github.com/LibVulnWatch/reports/raw/main/langchain.html",
     "repository_url": "https://github.com/langchain-ai/langchain",
+    "github_stars": 79000,
     "license": "MIT",
     "scores": {
       "license_validation": 5,

assessment-results/langgraph.json CHANGED Viewed

@@ -1,16 +1,16 @@
 {
   "assessment": {
     "library_name": "langchain-ai/langgraph",
-    "version": "v2.1.0",
     "language": "Python",
     "framework": "Agent Framework",
     "completed_time": "2024-06-13T12:00:00Z",
     "last_updated": "2024-06-13T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
-    "report_url": "https://981526092.github.io/LibVulnWatch/langchain-ai_langgraph_v2.1.0.html",
     "repository_url": "https://github.com/langchain-ai/langgraph",
-    "github_stars": 14700,
     "license": "Proprietary",
     "scores": {
       "license_validation": 1,

 {
   "assessment": {
     "library_name": "langchain-ai/langgraph",
+    "version": "v0.0.20",
     "language": "Python",
     "framework": "Agent Framework",
     "completed_time": "2024-06-13T12:00:00Z",
     "last_updated": "2024-06-13T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
+    "report_url": "https://github.com/LibVulnWatch/reports/raw/main/langgraph.html",
     "repository_url": "https://github.com/langchain-ai/langgraph",
+    "github_stars": 4500,
     "license": "Proprietary",
     "scores": {
       "license_validation": 1,

assessment-results/llamaindex.json CHANGED Viewed

@@ -1,16 +1,16 @@
 {
   "assessment": {
-    "library_name": "run-llama/llama_index",
-    "version": "v0.12.43",
     "language": "Python",
     "framework": "LLM Orchestration",
     "completed_time": "2024-06-20T12:00:00Z",
     "last_updated": "2024-06-20T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
-    "report_url": "https://981526092.github.io/LibVulnWatch/run-llama_llama_index_v0.12.43.html",
-    "repository_url": "https://github.com/run-llama/llama_index",
-    "github_stars": 42500,
     "license": "MIT",
     "scores": {
       "license_validation": 4,

 {
   "assessment": {
+    "library_name": "jerryjliu/llama_index",
+    "version": "v0.9.14",
     "language": "Python",
     "framework": "LLM Orchestration",
     "completed_time": "2024-06-20T12:00:00Z",
     "last_updated": "2024-06-20T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
+    "report_url": "https://github.com/LibVulnWatch/reports/raw/main/llamaindex.html",
+    "repository_url": "https://github.com/jerryjliu/llama_index",
+    "github_stars": 27000,
     "license": "MIT",
     "scores": {
       "license_validation": 4,

assessment-results/metagpt.json CHANGED Viewed

@@ -1,16 +1,16 @@
 {
   "assessment": {
-    "library_name": "FoundationAgents/MetaGPT",
-    "version": "v0.8.1",
     "language": "Python",
     "framework": "Agent Framework",
     "completed_time": "2024-06-14T12:00:00Z",
     "last_updated": "2024-06-14T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
-    "report_url": "https://981526092.github.io/LibVulnWatch/foundationagents_metagpt_v0.8.1.html",
-    "repository_url": "https://github.com/FoundationAgents/MetaGPT",
-    "github_stars": 56700,
     "license": "MIT",
     "scores": {
       "license_validation": 4,

 {
   "assessment": {
+    "library_name": "geekan/MetaGPT",
+    "version": "v0.7.0",
     "language": "Python",
     "framework": "Agent Framework",
     "completed_time": "2024-06-14T12:00:00Z",
     "last_updated": "2024-06-14T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
+    "report_url": "https://github.com/LibVulnWatch/reports/raw/main/metagpt.html",
+    "repository_url": "https://github.com/geekan/MetaGPT",
+    "github_stars": 32500,
     "license": "MIT",
     "scores": {
       "license_validation": 4,

assessment-results/onnx.json CHANGED Viewed

@@ -1,16 +1,16 @@
 {
   "assessment": {
     "library_name": "onnx/onnx",
-    "version": "v1.18.0",
     "language": "C++/Python",
-    "framework": "ML Framework",
     "completed_time": "2024-06-22T11:00:00Z",
     "last_updated": "2024-06-22T11:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
-    "report_url": "https://981526092.github.io/LibVulnWatch/onnx_onnx_v1.18.0.html",
     "repository_url": "https://github.com/onnx/onnx",
-    "github_stars": 19100,
     "license": "MIT",
     "scores": {
       "license_validation": 4,

 {
   "assessment": {
     "library_name": "onnx/onnx",
+    "version": "v1.15.0",
     "language": "C++/Python",
+    "framework": "Machine Learning",
     "completed_time": "2024-06-22T11:00:00Z",
     "last_updated": "2024-06-22T11:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
+    "report_url": "https://github.com/LibVulnWatch/reports/raw/main/onnx.html",
     "repository_url": "https://github.com/onnx/onnx",
+    "github_stars": 16200,
     "license": "MIT",
     "scores": {
       "license_validation": 4,

assessment-results/pydantic_ai.json CHANGED Viewed

@@ -1,16 +1,16 @@
 {
   "assessment": {
     "library_name": "pydantic/pydantic-ai",
-    "version": "v0.3.2",
     "language": "Python",
     "framework": "Agent Framework",
     "completed_time": "2024-06-08T12:00:00Z",
     "last_updated": "2024-06-08T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
-    "report_url": "https://981526092.github.io/LibVulnWatch/pydantic_pydantic-ai_v0.3.2.html",
     "repository_url": "https://github.com/pydantic/pydantic-ai",
-    "github_stars": 10400,
     "license": "MIT",
     "scores": {
       "license_validation": 5,

 {
   "assessment": {
     "library_name": "pydantic/pydantic-ai",
+    "version": "v0.7.0",
     "language": "Python",
     "framework": "Agent Framework",
     "completed_time": "2024-06-08T12:00:00Z",
     "last_updated": "2024-06-08T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
+    "report_url": "https://github.com/LibVulnWatch/reports/raw/main/pydantic_ai.html",
     "repository_url": "https://github.com/pydantic/pydantic-ai",
+    "github_stars": 5800,
     "license": "MIT",
     "scores": {
       "license_validation": 5,

assessment-results/pytorch.json CHANGED Viewed

@@ -1,16 +1,16 @@
 {
   "assessment": {
     "library_name": "pytorch/pytorch",
-    "version": "v2.7.1",
     "language": "C++/Python",
-    "framework": "ML Framework",
     "completed_time": "2024-06-25T12:00:00Z",
     "last_updated": "2024-06-25T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
-    "report_url": "https://981526092.github.io/LibVulnWatch/pytorch_pytorch_v2.7.1.html",
     "repository_url": "https://github.com/pytorch/pytorch",
-    "github_stars": 91000,
     "license": "BSD-3-Clause",
     "scores": {
       "license_validation": 5,

 {
   "assessment": {
     "library_name": "pytorch/pytorch",
+    "version": "v2.2.1",
     "language": "C++/Python",
+    "framework": "Machine Learning",
     "completed_time": "2024-06-25T12:00:00Z",
     "last_updated": "2024-06-25T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
+    "report_url": "https://github.com/LibVulnWatch/reports/raw/main/pytorch.html",
     "repository_url": "https://github.com/pytorch/pytorch",
+    "github_stars": 74500,
     "license": "BSD-3-Clause",
     "scores": {
       "license_validation": 5,

assessment-results/sglang.json CHANGED Viewed

@@ -1,16 +1,16 @@
 {
   "assessment": {
     "library_name": "sgl-project/sglang",
-    "version": "v0.4.7",
     "language": "Python/C++",
     "framework": "LLM Inference",
     "completed_time": "2024-06-19T12:00:00Z",
     "last_updated": "2024-06-19T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
-    "report_url": "https://981526092.github.io/LibVulnWatch/sgl-project_sglang_v0.4.7.html",
     "repository_url": "https://github.com/sgl-project/sglang",
-    "github_stars": 15400,
     "license": "Apache-2.0",
     "scores": {
       "license_validation": 4,

 {
   "assessment": {
     "library_name": "sgl-project/sglang",
+    "version": "v0.1.8",
     "language": "Python/C++",
     "framework": "LLM Inference",
     "completed_time": "2024-06-19T12:00:00Z",
     "last_updated": "2024-06-19T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
+    "report_url": "https://github.com/LibVulnWatch/reports/raw/main/sglang.html",
     "repository_url": "https://github.com/sgl-project/sglang",
+    "github_stars": 4800,
     "license": "Apache-2.0",
     "scores": {
       "license_validation": 4,

assessment-results/smolagents.json CHANGED Viewed

@@ -1,16 +1,16 @@
 {
   "assessment": {
-    "library_name": "huggingface/smolagents",
-    "version": "v1.19.0",
     "language": "Python",
     "framework": "Agent Framework",
     "completed_time": "2024-06-12T12:00:00Z",
     "last_updated": "2024-06-12T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
-    "report_url": "https://981526092.github.io/LibVulnWatch/huggingface_smolagents_v1.19.0.html",
-    "repository_url": "https://github.com/huggingface/smolagents",
-    "github_stars": 20500,
     "license": "MIT",
     "scores": {
       "license_validation": 4,

 {
   "assessment": {
+    "library_name": "tinygrad/SmolAgents",
+    "version": "v0.2.1",
     "language": "Python",
     "framework": "Agent Framework",
     "completed_time": "2024-06-12T12:00:00Z",
     "last_updated": "2024-06-12T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
+    "report_url": "https://github.com/LibVulnWatch/reports/raw/main/smolagents.html",
+    "repository_url": "https://github.com/tinygrad/SmolAgents",
+    "github_stars": 2800,
     "license": "MIT",
     "scores": {
       "license_validation": 4,

assessment-results/stagehand.json CHANGED Viewed

@@ -1,16 +1,16 @@
 {
   "assessment": {
-    "library_name": "browserbase/stagehand",
-    "version": "v2.3.1",
     "language": "Python",
     "framework": "Agent Framework",
     "completed_time": "2024-06-11T12:00:00Z",
     "last_updated": "2024-06-11T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
-    "report_url": "https://981526092.github.io/LibVulnWatch/browserbase_stagehand_v2.3.1.html",
-    "repository_url": "https://github.com/browserbase/stagehand",
-    "github_stars": 12800,
     "license": "Apache-2.0 with Commons Clause",
     "scores": {
       "license_validation": 3,

 {
   "assessment": {
+    "library_name": "langchain-ai/stagehand",
+    "version": "v0.0.12",
     "language": "Python",
     "framework": "Agent Framework",
     "completed_time": "2024-06-11T12:00:00Z",
     "last_updated": "2024-06-11T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
+    "report_url": "https://github.com/LibVulnWatch/reports/raw/main/stagehand.html",
+    "repository_url": "https://github.com/langchain-ai/stagehand",
+    "github_stars": 1500,
     "license": "Apache-2.0 with Commons Clause",
     "scores": {
       "license_validation": 3,

assessment-results/tensorflow.json CHANGED Viewed

@@ -1,22 +1,22 @@
 {
   "assessment": {
     "library_name": "tensorflow/tensorflow",
-    "version": "v2.19.0",
     "language": "C++/Python",
-    "framework": "ML Framework",
     "completed_time": "2024-06-23T12:00:00Z",
     "last_updated": "2024-06-23T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
-    "report_url": "https://981526092.github.io/LibVulnWatch/tensorflow_tensorflow_v2.19.0.html",
     "repository_url": "https://github.com/tensorflow/tensorflow",
-    "github_stars": 190000,
     "license": "Apache-2.0",
     "scores": {
       "license_validation": 5,
       "security_assessment": 1,
-        "maintenance_health": 3,
-        "dependency_management": 1,
       "regulatory_compliance": 3
     },
     "details": {

 {
   "assessment": {
     "library_name": "tensorflow/tensorflow",
+    "version": "v2.15.0",
     "language": "C++/Python",
+    "framework": "Machine Learning",
     "completed_time": "2024-06-23T12:00:00Z",
     "last_updated": "2024-06-23T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
+    "report_url": "https://github.com/LibVulnWatch/reports/raw/main/tensorflow.html",
     "repository_url": "https://github.com/tensorflow/tensorflow",
+    "github_stars": 182000,
     "license": "Apache-2.0",
     "scores": {
       "license_validation": 5,
       "security_assessment": 1,
+      "maintenance_health": 3,
+      "dependency_management": 1,
       "regulatory_compliance": 3
     },
     "details": {

assessment-results/tensorrt.json CHANGED Viewed

@@ -1,16 +1,16 @@
 {
   "assessment": {
     "library_name": "nvidia/TensorRT",
-    "version": "v10.12.0",
     "language": "C++/Python",
-    "framework": "ML Framework Inference",
     "completed_time": "2024-06-21T12:00:00Z",
     "last_updated": "2024-06-21T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
-    "report_url": "https://981526092.github.io/LibVulnWatch/nvidia_tensorrt_v10.12.0.html",
     "repository_url": "https://github.com/NVIDIA/TensorRT",
-    "github_stars": 11700,
     "license": "Proprietary with Open Components",
     "scores": {
       "license_validation": 3,

 {
   "assessment": {
     "library_name": "nvidia/TensorRT",
+    "version": "v9.1.0",
     "language": "C++/Python",
+    "framework": "Machine Learning Inference",
     "completed_time": "2024-06-21T12:00:00Z",
     "last_updated": "2024-06-21T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
+    "report_url": "https://github.com/LibVulnWatch/reports/raw/main/tensorrt.html",
     "repository_url": "https://github.com/NVIDIA/TensorRT",
+    "github_stars": 8500,
     "license": "Proprietary with Open Components",
     "scores": {
       "license_validation": 3,

assessment-results/text_generation_inference.json CHANGED Viewed

@@ -1,16 +1,16 @@
 {
   "assessment": {
     "library_name": "huggingface/text-generation-inference",
-    "version": "v3.3.4",
     "language": "Rust/Python",
     "framework": "LLM Inference",
     "completed_time": "2024-06-16T12:00:00Z",
     "last_updated": "2024-06-16T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
-    "report_url": "https://981526092.github.io/LibVulnWatch/huggingface_text-generation-inference_v3.3.4.html",
     "repository_url": "https://github.com/huggingface/text-generation-inference",
-    "github_stars": 10200,
     "license": "Apache-2.0",
     "scores": {
       "license_validation": 3,

 {
   "assessment": {
     "library_name": "huggingface/text-generation-inference",
+    "version": "v1.1.0",
     "language": "Rust/Python",
     "framework": "LLM Inference",
     "completed_time": "2024-06-16T12:00:00Z",
     "last_updated": "2024-06-16T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
+    "report_url": "https://github.com/LibVulnWatch/reports/raw/main/text_generation_inference.html",
     "repository_url": "https://github.com/huggingface/text-generation-inference",
+    "github_stars": 5600,
     "license": "Apache-2.0",
     "scores": {
       "license_validation": 3,

assessment-results/vllm.json CHANGED Viewed

@@ -1,16 +1,16 @@
 {
   "assessment": {
     "library_name": "vllm-project/vllm",
-    "version": "v0.9.1",
     "language": "Python/CUDA",
     "framework": "LLM Inference",
     "completed_time": "2024-06-18T12:00:00Z",
     "last_updated": "2024-06-18T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
-    "report_url": "https://981526092.github.io/LibVulnWatch/vllm-project_vllm_v0.9.1.html",
     "repository_url": "https://github.com/vllm-project/vllm",
-    "github_stars": 50600,
     "license": "Apache-2.0",
     "scores": {
       "license_validation": 4,

 {
   "assessment": {
     "library_name": "vllm-project/vllm",
+    "version": "v0.3.0",
     "language": "Python/CUDA",
     "framework": "LLM Inference",
     "completed_time": "2024-06-18T12:00:00Z",
     "last_updated": "2024-06-18T12:00:00Z",
     "active_maintenance": true,
     "independently_verified": true,
+    "report_url": "https://github.com/LibVulnWatch/reports/raw/main/vllm.html",
     "repository_url": "https://github.com/vllm-project/vllm",
+    "github_stars": 12800,
     "license": "Apache-2.0",
     "scores": {
       "license_validation": 4,

pyproject.toml CHANGED Viewed

@@ -1,33 +1,9 @@
-[project]
-name = "libvulnwatchleaderboard"
-version = "0.1.0"
-description = "A vulnerability assessment leaderboard for libraries"
-requires-python = ">=3.8"
-dependencies = [
-    "APScheduler",
-    "black",
-    "datasets",
-    "gradio",
-    "gradio[oauth]",
-    "gradio_leaderboard==0.0.13",
-    "gradio_client",
-    "huggingface-hub>=0.18.0",
-    "matplotlib",
-    "numpy",
-    "pandas",
-    "python-dateutil",
-    "tqdm",
-    "transformers",
-    "tokenizers>=0.15.0",
-    "sentencepiece",
-]
 [tool.ruff]
 # Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
 select = ["E", "F"]
 ignore = ["E501"] # line too long (black is taking care of this)
 line-length = 119
-fixable = ["A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W", "ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TID", "TRY", "UP", "YTT"]
 [tool.isort]
 profile = "black"

 [tool.ruff]
 # Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
 select = ["E", "F"]
 ignore = ["E501"] # line too long (black is taking care of this)
 line-length = 119
+fixable = ["A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W", "ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP", "YTT"]
 [tool.isort]
 profile = "black"

src/about.py CHANGED Viewed

@@ -12,11 +12,11 @@ class Task:
 # ---------------------------------------------------
 class Tasks(Enum):
     # Risk domains from LibVulnWatch paper
-    license = Task("license_validation", "score", "License Rating")
-    security = Task("security_assessment", "score", "Security Rating")
-    maintenance = Task("maintenance_health", "score", "Maintenance Rating")
-    dependency = Task("dependency_management", "score", "Dependency Rating")
-    regulatory = Task("regulatory_compliance", "score", "Regulatory Rating")
 NUM_FEWSHOT = 0 # Not relevant for vulnerability assessment
 # ---------------------------------------------------
@@ -28,32 +28,44 @@ TITLE = """<h1 align="center" id="space-title">LibVulnWatch: Vulnerability Asses
 # What does your leaderboard evaluate?
 INTRODUCTION_TEXT = """
-## LibVulnWatch – Continuous, Multi-Domain Risk Scoring for AI Libraries
-_As presented at the **ACL 2025 Student Research Workshop** and the **ICML 2025 Technical AI Governance (TAIG) workshop**_, LibVulnWatch provides an evidence-based, end-to-end pipeline that uncovers **hidden vulnerabilities** in open-source AI libraries across five governance-aligned domains:
-• **License Validation** – compatibility, provenance, obligations
-• **Security Assessment** – CVEs, patch latency, exploit primitives
-• **Maintenance Health** – bus-factor, release cadence, contributor diversity
-• **Dependency Management** – transitive risk, SBOM completeness
-• **Regulatory Compliance** – privacy/export controls, policy documentation
-In the paper we apply the framework to **20 popular libraries**, achieving **88 % coverage of OpenSSF Scorecard checks** and surfacing **up to 19 previously-unreported risks per library**.
-Lower scores indicate lower risk, and the **Trust Score** is the equal-weight average of the five domains.
 """
 # Which evaluations are you running? how can people reproduce what you have?
-LLM_BENCHMARKS_TEXT = """
-## Methodology at a Glance
-LibVulnWatch orchestrates a **graph of specialised agents** powered by large language models. Each agent contributes one evidence layer and writes structured findings to a shared memory:
-1️⃣ **Static agents** – licence parsing, secret scanning, call-graph reachability
-2️⃣ **Dynamic agents** – fuzzing harnesses, dependency-confusion probes, CVE replay
-3️⃣ **Metadata agents** – GitHub mining, release-cadence modelling, community health
-4️⃣ **Policy agents** – mapping evidence to NIST SSDF, EU AI Act, and related frameworks
-The aggregator agent converts raw findings into 0–10 scores per domain, producing a reproducible JSON result that is **88 % compatible with OpenSSF Scorecard checks**. All artefacts (SBOMs, logs, annotated evidence) are archived and linked in the public report.
 """
 EVALUATION_QUEUE_TEXT = """
@@ -84,18 +96,11 @@ If your library shows as "FAILED" in the assessment queue, check that:
 """
 CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
-CITATION_BUTTON_TEXT = r"""@inproceedings{wu2025libvulnwatch,
-  title={LibVulnWatch: A Deep Assessment Agent System and Leaderboard for Uncovering Hidden Vulnerabilities in Open-Source {AI} Libraries},
-  author={Zekun Wu and Seonglae Cho and Umar Mohammed and CRISTIAN ENRIQUE MUNOZ VILLALOBOS and Kleyton Da Costa and Xin Guan and Theo King and Ze Wang and Emre Kazim and Adriano Koshiyama},
-  booktitle={ACL 2025 Student Research Workshop},
-  year={2025},
-  url={https://openreview.net/forum?id=yQzYEAL0BT}
 }
-@inproceedings{anonymous2025libvulnwatch,
-  title={LibVulnWatch: A Deep Assessment Agent System and Leaderboard for Uncovering Hidden Vulnerabilities in Open-Source {AI} Libraries},
-  author={Zekun Wu and Seonglae Cho and Umar Mohammed and CRISTIAN ENRIQUE MUNOZ VILLALOBOS and Kleyton Da Costa and Xin Guan and Theo King and Ze Wang and Emre Kazim and Adriano Koshiyama},
-  booktitle={ICML Workshop on Technical AI Governance (TAIG)},
-  year={2025},
-  url={https://openreview.net/forum?id=MHhrr8QHgR}
-}"""

 # ---------------------------------------------------
 class Tasks(Enum):
     # Risk domains from LibVulnWatch paper
+    license = Task("license_validation", "score", "License Risk")
+    security = Task("security_assessment", "score", "Security Risk")
+    maintenance = Task("maintenance_health", "score", "Maintenance Risk")
+    dependency = Task("dependency_management", "score", "Dependency Risk")
+    regulatory = Task("regulatory_compliance", "score", "Regulatory Risk")
 NUM_FEWSHOT = 0 # Not relevant for vulnerability assessment
 # ---------------------------------------------------
 # What does your leaderboard evaluate?
 INTRODUCTION_TEXT = """
+## Systematic Vulnerability Assessment and Leaderboard Tracking for Open-Source AI Libraries
+This leaderboard provides continuous vulnerability assessment for open-source AI libraries across five critical risk domains:
+- **License Validation**: Legal risks based on license type, compatibility, and requirements
+- **Security Assessment**: Vulnerability severity and patch responsiveness
+- **Maintenance Health**: Sustainability and governance practices
+- **Dependency Management**: Vulnerability inheritance and supply chain security
+- **Regulatory Compliance**: Compliance readiness for various frameworks
+Lower scores indicate fewer vulnerabilities and lower risk. The Trust Score is an equal-weighted average of all five domains, providing a balanced assessment of overall library trustworthiness.
 """
 # Which evaluations are you running? how can people reproduce what you have?
+LLM_BENCHMARKS_TEXT = f"""
+## How LibVulnWatch Works
+Our assessment methodology evaluates libraries through:
+1. **Static Analysis**: Code review, license parsing, and documentation examination
+2. **Dynamic Analysis**: Vulnerability scanning, dependency checking, and API testing
+3. **Metadata Analysis**: Repository metrics, contributor patterns, and release cadence
+Each library receives a risk score (0-10) in each domain, with lower scores indicating lower risk.
+## Reproducibility
+To reproduce our assessment for a specific library:
+```python
+from libvulnwatch import VulnerabilityAssessor
+# Initialize the assessor
+assessor = VulnerabilityAssessor()
+# Run assessment on a library
+results = assessor.assess_library("organization/library_name")
+# View detailed results
+print(results.risk_scores)
+print(results.detailed_findings)
+```
 """
 EVALUATION_QUEUE_TEXT = """
 """
 CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
+CITATION_BUTTON_TEXT = r"""
+@article{LibVulnWatch2025,
+  title={LibVulnWatch: Systematic Vulnerability Assessment and Leaderboard Tracking for Open-Source AI Libraries},
+  author={First Author and Second Author},
+  journal={ICML 2025 Technical AI Governance Workshop},
+  year={2025}
 }
+"""

src/display/css_html_js.py CHANGED Viewed

@@ -38,14 +38,6 @@ custom_css = """
     padding: 0px;
 }
-.gradio-container {
-    max-height: fit-content;
-}
-.container {
-    height: fit-content;
-}
 /* Limit the width of the first AutoEvalColumn so that names don't expand too much */
 #leaderboard-table td:nth-child(2),
 #leaderboard-table th:nth-child(2) {

     padding: 0px;
 }
 /* Limit the width of the first AutoEvalColumn so that names don't expand too much */
 #leaderboard-table td:nth-child(2),
 #leaderboard-table th:nth-child(2) {

src/display/formatting.py CHANGED Viewed

@@ -1,5 +1,8 @@
 """Helper functions to style our gradio elements"""
 def model_hyperlink(link, model_name):
     return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
@@ -10,23 +13,21 @@ def make_clickable_model(model_name):
 def make_clickable_report(report_url):
-    """Create a clickable HTML link for assessment reports"""
-    return f'<a href="{report_url}" target="_blank">View Report</a>'
 def styled_error(error):
-    """Format an error message with a red header"""
-    return f'<span style="color: red">❌ Error:</span> {error}'
 def styled_warning(warn):
-    """Format a warning message with an orange header"""
-    return f'<span style="color: orange">⚠️ Warning:</span> {warn}'
 def styled_message(message):
-    """Format a message with a green header"""
-    return f'<span style="color: green">✅ Success:</span> {message}'
 def has_no_nan_values(df, columns):
@@ -47,6 +48,21 @@ def make_clickable_library(library_name: str) -> str:
     return f'<a href="{github_url}" target="_blank">{library_name}</a>'
 # Risk severity coloring for risk scores
 def colorize_risk_score(score):
     """

 """Helper functions to style our gradio elements"""
+import re
+import os
 def model_hyperlink(link, model_name):
     return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
 def make_clickable_report(report_url):
+    """Return the direct URL to the assessment report without any formatting"""
+    # Just return the URL string directly
+    return report_url
 def styled_error(error):
+    return f"<p style='color: red; font-size: 20px; text-align: center;'>{error}</p>"
 def styled_warning(warn):
+    return f"<p style='color: orange; font-size: 20px; text-align: center;'>{warn}</p>"
 def styled_message(message):
+    return f"<p style='color: green; font-size: 20px; text-align: center;'>{message}</p>"
 def has_no_nan_values(df, columns):
     return f'<a href="{github_url}" target="_blank">{library_name}</a>'
+def styled_message(message) -> str:
+    """Format a message with a green header"""
+    return f'<span style="color: green">✅ Success:</span> {message}'
+def styled_warning(message) -> str:
+    """Format a warning message with an orange header"""
+    return f'<span style="color: orange">⚠️ Warning:</span> {message}'
+def styled_error(message) -> str:
+    """Format an error message with a red header"""
+    return f'<span style="color: red">❌ Error:</span> {message}'
 # Risk severity coloring for risk scores
 def colorize_risk_score(score):
     """

src/display/utils.py CHANGED Viewed

@@ -1,14 +1,19 @@
-from dataclasses import dataclass
 from enum import Enum
 from src.about import Tasks
-from pydantic import BaseModel
 # These classes are for user facing column names,
 # to avoid having to change them all around the code
 # when a modif is needed
-class ColumnContent(BaseModel):
     name: str
     type: str
     displayed_by_default: bool
@@ -16,61 +21,38 @@ class ColumnContent(BaseModel):
     never_hidden: bool = False
 ## Leaderboard columns
-class AutoEvalColumn(BaseModel):
-    library_type_symbol: ColumnContent
-    library: ColumnContent
-    overall_risk: ColumnContent
-    # Task columns
-    license: ColumnContent
-    security: ColumnContent
-    maintenance: ColumnContent
-    dependency: ColumnContent
-    regulatory: ColumnContent
-    # Library information
-    library_type: ColumnContent
-    framework: ColumnContent
-    version: ColumnContent
-    language: ColumnContent
-    license_name: ColumnContent
-    stars: ColumnContent
-    availability: ColumnContent
-    report_url: ColumnContent
-    last_update: ColumnContent
-    verified: ColumnContent
-auto_eval_column_attrs = AutoEvalColumn(
-    library_type_symbol=ColumnContent(name="T", type="str", displayed_by_default=True, never_hidden=True),
-    library=ColumnContent(name="Library", type="markdown", displayed_by_default=True, never_hidden=True),
-    overall_risk=ColumnContent(name="Trust Score", type="number", displayed_by_default=True),
-    # Task columns from Tasks enum
-    license=ColumnContent(name="License Rating", type="number", displayed_by_default=True),
-    security=ColumnContent(name="Security Rating", type="number", displayed_by_default=True),
-    maintenance=ColumnContent(name="Maintenance Rating", type="number", displayed_by_default=True),
-    dependency=ColumnContent(name="Dependency Rating", type="number", displayed_by_default=True),
-    regulatory=ColumnContent(name="Regulatory Rating", type="number", displayed_by_default=True),
-    # Library information
-    library_type=ColumnContent(name="Type", type="str", displayed_by_default=False),
-    framework=ColumnContent(name="Framework", type="str", displayed_by_default=False),
-    version=ColumnContent(name="Version", type="str", displayed_by_default=False, hidden=True),
-    language=ColumnContent(name="Language", type="str", displayed_by_default=False),
-    license_name=ColumnContent(name="License", type="str", displayed_by_default=True),
-    stars=ColumnContent(name="GitHub ⭐", type="number", displayed_by_default=False),
-    availability=ColumnContent(name="Active Maintenance", type="bool", displayed_by_default=True),
-    report_url=ColumnContent(name="Report", type="markdown", displayed_by_default=True),
-    last_update=ColumnContent(name="Last Update", type="str", displayed_by_default=False),
-    verified=ColumnContent(name="Verified", type="bool", displayed_by_default=False),
-)
 ## For the queue columns in the submission tab
 @dataclass(frozen=True)
 class EvalQueueColumn:  # Queue column
-    library = ColumnContent(name="library", type="markdown", displayed_by_default=True)
-    version = ColumnContent(name="version", type="str", displayed_by_default=True)
-    language = ColumnContent(name="language", type="str", displayed_by_default=True)
-    framework = ColumnContent(name="framework", type="str", displayed_by_default=True)
-    library_type = ColumnContent(name="library_type", type="str", displayed_by_default=True)
-    status = ColumnContent(name="status", type="str", displayed_by_default=True)
 ## All the library information that we might need
 @dataclass
@@ -81,27 +63,27 @@ class LibraryDetails:
 class LibraryType(Enum):
-    ML = LibraryDetails(name="ML Framework", symbol="🟢")
-    LLM = LibraryDetails(name="LLM Framework", symbol="🔶")
-    AGENT = LibraryDetails(name="Agent Framework", symbol="⭕")
-    VIS = LibraryDetails(name="LLM Inference", symbol="🟦")
-    GENERAL = LibraryDetails(name="LLM Orchestration", symbol="🟣")
     Unknown = LibraryDetails(name="", symbol="?")
     def to_str(self, separator=" "):
         return f"{self.value.symbol}{separator}{self.value.name}"
     @staticmethod
-    def from_str(type: str) -> "LibraryType":
-        if "ML Framework" in type or "🟢" in type:
             return LibraryType.ML
-        if "LLM Framework" in type or "🔶" in type:
             return LibraryType.LLM
-        if "Agent Framework" in type or "⭕" in type:
             return LibraryType.AGENT
-        if "LLM Inference" in type or "🟦" in type:
             return LibraryType.VIS
-        if "LLM Orchestration" in type or "🟣" in type:
             return LibraryType.GENERAL
         return LibraryType.Unknown
@@ -119,11 +101,11 @@ class AssessmentStatus(Enum):
     Disputed = LibraryDetails("Disputed")
 # Column selection
-COLS = [getattr(auto_eval_column_attrs, field).name for field in AutoEvalColumn.model_fields if not getattr(auto_eval_column_attrs, field).hidden]
-fields = AutoEvalColumn.model_fields
-EVAL_COLS = [getattr(EvalQueueColumn, field).name for field in vars(EvalQueueColumn) if not field.startswith('_')]
-EVAL_TYPES = [getattr(EvalQueueColumn, field).type for field in vars(EvalQueueColumn) if not field.startswith('_')]
 # Task columns for benchmarking - use the display column names from the Tasks enum
 BENCHMARK_COLS = [task.value.col_name for task in Tasks]

+from dataclasses import dataclass, make_dataclass
 from enum import Enum
+import pandas as pd
 from src.about import Tasks
+def fields(raw_class):
+    return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
 # These classes are for user facing column names,
 # to avoid having to change them all around the code
 # when a modif is needed
+@dataclass
+class ColumnContent:
     name: str
     type: str
     displayed_by_default: bool
     never_hidden: bool = False
 ## Leaderboard columns
+auto_eval_column_dict = []
+# Init
+auto_eval_column_dict.append(["library_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
+auto_eval_column_dict.append(["library", ColumnContent, ColumnContent("Library", "markdown", True, never_hidden=True)])
+#Scores
+auto_eval_column_dict.append(["overall_risk", ColumnContent, ColumnContent("Trust Score ⬇️", "number", True)])
+for task in Tasks:
+    auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
+# Library information
+auto_eval_column_dict.append(["library_type", ColumnContent, ColumnContent("Type", "str", False)])
+auto_eval_column_dict.append(["framework", ColumnContent, ColumnContent("Framework", "str", False)])
+auto_eval_column_dict.append(["version", ColumnContent, ColumnContent("Version", "str", False, False)])
+auto_eval_column_dict.append(["language", ColumnContent, ColumnContent("Language", "str", False)])
+auto_eval_column_dict.append(["license_name", ColumnContent, ColumnContent("License", "str", True)])
+auto_eval_column_dict.append(["stars", ColumnContent, ColumnContent("GitHub ⭐", "number", False)])
+auto_eval_column_dict.append(["last_update", ColumnContent, ColumnContent("Last Updated", "str", False)])
+auto_eval_column_dict.append(["verified", ColumnContent, ColumnContent("Independently Verified", "bool", False)])
+auto_eval_column_dict.append(["availability", ColumnContent, ColumnContent("Active Maintenance", "bool", True)])
+auto_eval_column_dict.append(["report_url", ColumnContent, ColumnContent("Report", "str", True)])
+# We use make dataclass to dynamically fill the scores from Tasks
+AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
 ## For the queue columns in the submission tab
 @dataclass(frozen=True)
 class EvalQueueColumn:  # Queue column
+    library = ColumnContent("library", "markdown", True)
+    version = ColumnContent("version", "str", True)
+    language = ColumnContent("language", "str", True)
+    framework = ColumnContent("framework", "str", True)
+    library_type = ColumnContent("library_type", "str", True)
+    status = ColumnContent("status", "str", True)
 ## All the library information that we might need
 @dataclass
 class LibraryType(Enum):
+    ML = LibraryDetails(name="machine learning", symbol="🟢")
+    LLM = LibraryDetails(name="llm framework", symbol="🔶")
+    AGENT = LibraryDetails(name="agent framework", symbol="⭕")
+    VIS = LibraryDetails(name="visualization", symbol="🟦")
+    GENERAL = LibraryDetails(name="general ai", symbol="🟣")
     Unknown = LibraryDetails(name="", symbol="?")
     def to_str(self, separator=" "):
         return f"{self.value.symbol}{separator}{self.value.name}"
     @staticmethod
+    def from_str(type):
+        if "machine learning" in type or "🟢" in type:
             return LibraryType.ML
+        if "llm framework" in type or "🔶" in type:
             return LibraryType.LLM
+        if "agent framework" in type or "⭕" in type:
             return LibraryType.AGENT
+        if "visualization" in type or "🟦" in type:
             return LibraryType.VIS
+        if "general ai" in type or "🟣" in type:
             return LibraryType.GENERAL
         return LibraryType.Unknown
     Disputed = LibraryDetails("Disputed")
 # Column selection
+COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
+EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
+EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
 # Task columns for benchmarking - use the display column names from the Tasks enum
 BENCHMARK_COLS = [task.value.col_name for task in Tasks]

src/envs.py CHANGED Viewed

@@ -2,30 +2,21 @@ import os
 from huggingface_hub import HfApi
-# Dynamically determine if we're running in local mode
-def is_local_mode():
-    if os.environ.get("SPACE_AUTHOR_NAME") and os.environ.get("SPACE_REPO_NAME") and os.environ.get("HF_TOKEN") and os.environ.get("SPACE_ID"):
-        return False
-    return True
-LOCAL_MODE = is_local_mode()
 # Info to change for your repository
 # ----------------------------------
 # Get token from environment or use None in local mode
 TOKEN = os.environ.get("HF_TOKEN") if not LOCAL_MODE else None
-OWNER = "holistic-ai" # Change to your org - don't forget to create a results and request dataset, with the correct format!
 # ----------------------------------
-REPO_ID = f"{OWNER}/LibVulnWatch"
-QUEUE_REPO = REPO_ID  # Use the same repository
-RESULTS_REPO = REPO_ID  # Use the same repository
-if not LOCAL_MODE:
-  REPO_ID = str(os.environ.get("SPACE_ID"))
-  QUEUE_REPO = REPO_ID
-  RESULTS_REPO = REPO_ID
 # If you setup a cache later, just change HF_HOME
 CACHE_PATH=os.getenv("HF_HOME", ".")

 from huggingface_hub import HfApi
+# Run in local mode (no Hugging Face connection required)
+# Set to True when developing locally without HF credentials
+LOCAL_MODE = True
 # Info to change for your repository
 # ----------------------------------
 # Get token from environment or use None in local mode
 TOKEN = os.environ.get("HF_TOKEN") if not LOCAL_MODE else None
+OWNER = "libvulnwatch" # Change to your org - don't forget to create a results and request dataset, with the correct format!
 # ----------------------------------
+REPO_ID = f"{OWNER}/leaderboard"
+QUEUE_REPO = f"{OWNER}/vulnerability-requests"
+RESULTS_REPO = f"{OWNER}/vulnerability-assessments"
 # If you setup a cache later, just change HF_HOME
 CACHE_PATH=os.getenv("HF_HOME", ".")

src/leaderboard/read_evals.py CHANGED Viewed

@@ -1,32 +1,28 @@
 import glob
 import json
 import os
 from datetime import datetime
-from pydantic import BaseModel
-from src.display.formatting import make_clickable_library, make_clickable_report
-from src.display.utils import auto_eval_column_attrs, LibraryType, Tasks, Language
-def parse_iso_datetime(datetime_str: str) -> datetime:
-    """Parse ISO format datetime string, handling 'Z' UTC timezone indicator"""
-    if datetime_str.endswith('Z'):
-        datetime_str = datetime_str[:-1] + '+00:00'
-    return datetime.fromisoformat(datetime_str)
-class AssessmentResult(BaseModel):
     """Represents one full vulnerability assessment. Built from a combination of the result and request file for a given library.
     """
     assessment_id: str # Unique identifier
-    library_name: str
     org: str
     repo: str
     version: str
     results: dict # Risk scores
     framework: str = ""
     language: Language = Language.Other
-    language_str: str = ""  # Original language string to support multiple languages
     library_type: LibraryType = LibraryType.Unknown
     license: str = "?"
     stars: int = 0
@@ -36,7 +32,7 @@ class AssessmentResult(BaseModel):
     report_url: str = ""  # URL to detailed assessment report
     @classmethod
-    def init_from_json_file(cls, json_filepath):
         """Initializes the assessment result from a JSON file"""
         with open(json_filepath) as fp:
             data = json.load(fp)
@@ -47,7 +43,7 @@ class AssessmentResult(BaseModel):
         org_and_repo = library_name.split("/", 1)
         if len(org_and_repo) == 1:
-            org = ""
             repo = org_and_repo[0]
             assessment_id = f"{repo}_{assessment.get('version', '')}"
         else:
@@ -66,27 +62,19 @@ class AssessmentResult(BaseModel):
         # Library metadata
         framework = assessment.get("framework", "")
         language_str = assessment.get("language", "Other")
-        # Handle multiple languages separated by /
-        if "/" in language_str:
-            language_parts = [lang.strip() for lang in language_str.split("/")]
-            # Store the full string but parse the first language for enum
-            language = next((lang for lang in Language if lang.value.name == language_parts[0]), Language.Other)
-        else:
-            language = next((lang for lang in Language if lang.value.name == language_str), Language.Other)
         # Availability and verification
         last_update = assessment.get("last_updated", "")
         if last_update:
             try:
                 # Format date for display
-                dt = parse_iso_datetime(last_update)
                 last_update = dt.strftime("%Y-%m-%d")
-            except Exception as e:
-                print(e)
                 pass
-        return cls(
             assessment_id=assessment_id,
             library_name=library_name,
             org=org,
@@ -95,7 +83,6 @@ class AssessmentResult(BaseModel):
             results=risk_scores,
             framework=framework,
             language=language,
-            language_str=language_str,
             license=assessment.get("license", "?"),
             availability=assessment.get("active_maintenance", True),
             verified=assessment.get("independently_verified", False),
@@ -103,6 +90,18 @@ class AssessmentResult(BaseModel):
             report_url=assessment.get("report_url", ""),
         )
     def to_dict(self):
         """Converts the Assessment Result to a dict compatible with our dataframe display"""
         # Calculate Trust Score as equal-weight average
@@ -124,24 +123,22 @@ class AssessmentResult(BaseModel):
             weight_sum += weight
         trust_score = risk_sum / weight_sum if weight_sum > 0 else 10
-        # Round to 1 decimal place
-        trust_score = round(trust_score, 1)
         data_dict = {
             "assessment_id": self.assessment_id,  # not a column, just a save name
-            auto_eval_column_attrs.library_type.name: self.library_type.value.name,
-            auto_eval_column_attrs.library_type_symbol.name: self.library_type.value.symbol,
-            auto_eval_column_attrs.language.name: self.language_str if self.language_str else self.language.value.name,
-            auto_eval_column_attrs.framework.name: self.framework,
-            auto_eval_column_attrs.library.name: make_clickable_library(self.library_name),
-            auto_eval_column_attrs.version.name: self.version,
-            auto_eval_column_attrs.overall_risk.name: trust_score,
-            auto_eval_column_attrs.license_name.name: self.license,
-            auto_eval_column_attrs.stars.name: self.stars,
-            auto_eval_column_attrs.last_update.name: self.last_update,
-            auto_eval_column_attrs.verified.name: self.verified,
-            auto_eval_column_attrs.availability.name: self.availability,
-            auto_eval_column_attrs.report_url.name: make_clickable_report(self.report_url),
         }
         # Add task-specific risk scores - map to display column names
@@ -150,25 +147,11 @@ class AssessmentResult(BaseModel):
             benchmark_key = task_enum.benchmark  # e.g., "license_validation"
             col_name = task_enum.col_name  # Use the display name, e.g., "License Risk"
             risk_score = self.results.get(benchmark_key, 10)  # Default to highest risk
-            # Round to 1 decimal place
-            data_dict[col_name] = round(risk_score, 1)
         return data_dict
-    def update_with_request_file(self, assessment_filepath):
-        """Finds the relevant request file for the current library and updates info with it"""
-        try:
-            with open(assessment_filepath, "r") as f:
-                request = json.load(f)["assessment"]
-            self.library_type = LibraryType.from_str(request.get("framework", ""))
-            self.stars = request.get("github_stars", 0)
-        except Exception as e:
-            print(e)
-            print(f"Could not find request file for {self.library_name} version {self.version}")
 def get_request_file_for_library(requests_path, library_name, version):
     """Selects the correct request file for a given library. Only keeps runs tagged as FINISHED"""
     # Try multiple naming patterns for flexibility
@@ -219,9 +202,8 @@ def get_raw_assessment_results(results_path: str, requests_path: str) -> list[As
         # Sort the files by date if they have date info
         try:
-            files.sort(key=lambda x: parse_iso_datetime(json.loads(open(os.path.join(root, x)).read())["assessment"]["completed_time"]), reverse=True)
-        except Exception as e:
-            print(e)
             pass
         for file in files:
@@ -231,7 +213,7 @@ def get_raw_assessment_results(results_path: str, requests_path: str) -> list[As
     for assessment_filepath in assessment_filepaths:
         # Creation of result
         assessment_result = AssessmentResult.init_from_json_file(assessment_filepath)
-        assessment_result.update_with_request_file(assessment_filepath)
         # Store results of same eval together
         assessment_id = assessment_result.assessment_id

 import glob
 import json
+import math
 import os
+from dataclasses import dataclass
 from datetime import datetime
+import numpy as np
+from src.display.formatting import make_clickable_library, make_clickable_report
+from src.display.utils import AutoEvalColumn, LibraryType, Tasks, Language, AssessmentStatus
+@dataclass
+class AssessmentResult:
     """Represents one full vulnerability assessment. Built from a combination of the result and request file for a given library.
     """
     assessment_id: str # Unique identifier
+    library_name: str # org/repo
     org: str
     repo: str
     version: str
     results: dict # Risk scores
     framework: str = ""
     language: Language = Language.Other
     library_type: LibraryType = LibraryType.Unknown
     license: str = "?"
     stars: int = 0
     report_url: str = ""  # URL to detailed assessment report
     @classmethod
+    def init_from_json_file(self, json_filepath):
         """Initializes the assessment result from a JSON file"""
         with open(json_filepath) as fp:
             data = json.load(fp)
         org_and_repo = library_name.split("/", 1)
         if len(org_and_repo) == 1:
+            org = None
             repo = org_and_repo[0]
             assessment_id = f"{repo}_{assessment.get('version', '')}"
         else:
         # Library metadata
         framework = assessment.get("framework", "")
         language_str = assessment.get("language", "Other")
+        language = next((lang for lang in Language if lang.value.name == language_str), Language.Other)
         # Availability and verification
         last_update = assessment.get("last_updated", "")
         if last_update:
             try:
                 # Format date for display
+                dt = datetime.fromisoformat(last_update)
                 last_update = dt.strftime("%Y-%m-%d")
+            except:
                 pass
+        return self(
             assessment_id=assessment_id,
             library_name=library_name,
             org=org,
             results=risk_scores,
             framework=framework,
             language=language,
             license=assessment.get("license", "?"),
             availability=assessment.get("active_maintenance", True),
             verified=assessment.get("independently_verified", False),
             report_url=assessment.get("report_url", ""),
         )
+    def update_with_request_file(self, requests_path):
+        """Finds the relevant request file for the current library and updates info with it"""
+        request_file = get_request_file_for_library(requests_path, self.library_name, self.version)
+        try:
+            with open(request_file, "r") as f:
+                request = json.load(f)
+            self.library_type = LibraryType.from_str(request.get("library_type", ""))
+            self.stars = request.get("stars", 0)
+        except Exception:
+            print(f"Could not find request file for {self.library_name} version {self.version}")
     def to_dict(self):
         """Converts the Assessment Result to a dict compatible with our dataframe display"""
         # Calculate Trust Score as equal-weight average
             weight_sum += weight
         trust_score = risk_sum / weight_sum if weight_sum > 0 else 10
         data_dict = {
             "assessment_id": self.assessment_id,  # not a column, just a save name
+            AutoEvalColumn.library_type.name: self.library_type.value.name,
+            AutoEvalColumn.library_type_symbol.name: self.library_type.value.symbol,
+            AutoEvalColumn.language.name: self.language.value.name,
+            AutoEvalColumn.framework.name: self.framework,
+            AutoEvalColumn.library.name: make_clickable_library(self.library_name),
+            AutoEvalColumn.version.name: self.version,
+            AutoEvalColumn.overall_risk.name: trust_score,
+            AutoEvalColumn.license_name.name: self.license,
+            AutoEvalColumn.stars.name: self.stars,
+            AutoEvalColumn.last_update.name: self.last_update,
+            AutoEvalColumn.verified.name: self.verified,
+            AutoEvalColumn.availability.name: self.availability,
+            AutoEvalColumn.report_url.name: make_clickable_report(self.report_url),
         }
         # Add task-specific risk scores - map to display column names
             benchmark_key = task_enum.benchmark  # e.g., "license_validation"
             col_name = task_enum.col_name  # Use the display name, e.g., "License Risk"
             risk_score = self.results.get(benchmark_key, 10)  # Default to highest risk
+            data_dict[col_name] = risk_score
         return data_dict
 def get_request_file_for_library(requests_path, library_name, version):
     """Selects the correct request file for a given library. Only keeps runs tagged as FINISHED"""
     # Try multiple naming patterns for flexibility
         # Sort the files by date if they have date info
         try:
+            files.sort(key=lambda x: datetime.fromisoformat(json.loads(open(os.path.join(root, x)).read())["assessment"]["completed_time"]), reverse=True)
+        except:
             pass
         for file in files:
     for assessment_filepath in assessment_filepaths:
         # Creation of result
         assessment_result = AssessmentResult.init_from_json_file(assessment_filepath)
+        assessment_result.update_with_request_file(requests_path)
         # Store results of same eval together
         assessment_id = assessment_result.assessment_id

src/populate.py CHANGED Viewed

@@ -2,8 +2,9 @@
 import pandas as pd
-from src.display.utils import auto_eval_column_attrs
 from src.leaderboard.read_evals import get_raw_assessment_results
 def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
@@ -33,9 +34,9 @@ def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_co
                     print(f"Warning: Column '{col}' missing, adding empty column")
                     all_df[col] = 10.0  # Default to highest risk
-            # Sort by Trust Score (ascending - higher is better)
-            if auto_eval_column_attrs.overall_risk.name in all_df.columns:
-                all_df = all_df.sort_values(by=[auto_eval_column_attrs.overall_risk.name], ascending=False)
             return all_df
@@ -68,16 +69,19 @@ def get_evaluation_queue_df(eval_requests_path, eval_cols):
         finished_data = []
         running_data = []
         pending_data = []
-        rejected_data = []
         for file_path in request_files:
             try:
-                with open(file_path, "r", encoding="utf-8") as f:
                     data = json.load(f)
                     # Extract relevant fields
                     row = {
                         "library": data.get("library", ""),
                         "language": data.get("language", ""),
                         "status": data.get("status", "UNKNOWN")
                     }
@@ -88,8 +92,6 @@ def get_evaluation_queue_df(eval_requests_path, eval_cols):
                         running_data.append(row)
                     elif row["status"] == "PENDING":
                         pending_data.append(row)
-                    elif row["status"] == "REJECTED":
-                        rejected_data.append(row)
             except Exception as e:
                 print(f"Error reading request file {file_path}: {e}")
                 continue
@@ -98,11 +100,11 @@ def get_evaluation_queue_df(eval_requests_path, eval_cols):
         finished_df = pd.DataFrame(finished_data, columns=eval_cols)
         running_df = pd.DataFrame(running_data, columns=eval_cols)
         pending_df = pd.DataFrame(pending_data, columns=eval_cols)
-        rejected_df = pd.DataFrame(rejected_data, columns=eval_cols)
-        return finished_df, running_df, pending_df, rejected_df
     except Exception as e:
         print(f"Error reading evaluation queue: {e}")
         # Return empty dataframes
         empty_df = pd.DataFrame(columns=eval_cols)
-        return empty_df.copy(), empty_df.copy(), empty_df.copy(), empty_df.copy()

 import pandas as pd
+from src.display.utils import AutoEvalColumn
 from src.leaderboard.read_evals import get_raw_assessment_results
+from src.about import Tasks
 def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
                     print(f"Warning: Column '{col}' missing, adding empty column")
                     all_df[col] = 10.0  # Default to highest risk
+            # Sort by Trust Score (ascending - lower is better)
+            if AutoEvalColumn.overall_risk.name in all_df.columns:
+                all_df = all_df.sort_values(by=[AutoEvalColumn.overall_risk.name])
             return all_df
         finished_data = []
         running_data = []
         pending_data = []
         for file_path in request_files:
             try:
+                with open(file_path, "r") as f:
                     data = json.load(f)
                     # Extract relevant fields
                     row = {
                         "library": data.get("library", ""),
+                        "version": data.get("version", ""),
                         "language": data.get("language", ""),
+                        "framework": data.get("framework", ""),
+                        "library_type": data.get("library_type", ""),
                         "status": data.get("status", "UNKNOWN")
                     }
                         running_data.append(row)
                     elif row["status"] == "PENDING":
                         pending_data.append(row)
             except Exception as e:
                 print(f"Error reading request file {file_path}: {e}")
                 continue
         finished_df = pd.DataFrame(finished_data, columns=eval_cols)
         running_df = pd.DataFrame(running_data, columns=eval_cols)
         pending_df = pd.DataFrame(pending_data, columns=eval_cols)
+        return finished_df, running_df, pending_df
     except Exception as e:
         print(f"Error reading evaluation queue: {e}")
         # Return empty dataframes
         empty_df = pd.DataFrame(columns=eval_cols)
+        return empty_df.copy(), empty_df.copy(), empty_df.copy()

src/submission/check_validity.py CHANGED Viewed

@@ -1,22 +1,58 @@
 import json
 import os
 from collections import defaultdict
 from typing import Dict, Tuple, Any, List, Set
-def is_repository_valid(repo_name: str) -> Tuple[bool, str, Dict[str, Any]]:
     """
     Checks if a GitHub repository is valid and accessible.
     Args:
-        repo_name: The name of the repository
     Returns:
         Tuple of (is_valid, error_message, library_info)
     """
     # Basic format validation
-    if not repo_name:
-        return False, "Repository name is required", {}
-    return True, "", {}
 def get_library_info(repo_data: Dict[str, Any]) -> Dict[str, Any]:
     """

 import json
 import os
+import re
+import requests
 from collections import defaultdict
+from datetime import datetime, timedelta, timezone
 from typing import Dict, Tuple, Any, List, Set
+def is_repository_valid(repo_name: str, repo_url: str) -> Tuple[bool, str, Dict[str, Any]]:
     """
     Checks if a GitHub repository is valid and accessible.
     Args:
+        repo_name: The name of the repository (org/repo format)
+        repo_url: URL to the repository
     Returns:
         Tuple of (is_valid, error_message, library_info)
     """
     # Basic format validation
+    if not repo_name or "/" not in repo_name:
+        return False, "Repository name must be in the format 'organization/repository'", {}
+    # Check if GitHub URL
+    if repo_url and "github.com" in repo_url:
+        # Extract org and repo from URL if provided
+        try:
+            parts = repo_url.split("github.com/")[1].split("/")
+            org = parts[0]
+            repo = parts[1].split(".")[0] if "." in parts[1] else parts[1]
+            url_repo_name = f"{org}/{repo}"
+            # Check if URL matches repo_name
+            if url_repo_name != repo_name:
+                return False, f"Repository name ({repo_name}) doesn't match the URL ({url_repo_name})", {}
+        except:
+            pass  # Fall back to using repo_name
+    # Get repository information from GitHub API
+    org, repo = repo_name.split("/")
+    api_url = f"https://api.github.com/repos/{org}/{repo}"
+    try:
+        response = requests.get(api_url)
+        if response.status_code != 200:
+            return False, f"Repository not found or not accessible: {response.json().get('message', 'Unknown error')}", {}
+        # Parse repository data
+        repo_data = response.json()
+        library_info = get_library_info(repo_data)
+        return True, "", library_info
+    except Exception as e:
+        return False, f"Error accessing repository: {str(e)}", {}
 def get_library_info(repo_data: Dict[str, Any]) -> Dict[str, Any]:
     """

src/submission/submit.py CHANGED Viewed

@@ -1,35 +1,57 @@
 import json
 import os
 import uuid
 from datetime import datetime
 from pathlib import Path
 from src.display.formatting import styled_error, styled_warning, styled_message
-from src.envs import API, EVAL_REQUESTS_PATH, QUEUE_REPO, LOCAL_MODE
-from src.submission.check_validity import is_repository_valid
 def add_new_eval(
     library_name,
 ) -> str:
     """
     Adds a new library to the assessment queue.
     Args:
-        library_name: Name of the library
     Returns:
         A message indicating the status of the submission
     """
     # Check if valid repository
-    is_valid, validity_message, library_info = is_repository_valid(library_name)
     if not is_valid:
         return styled_error(f"Invalid submission: {validity_message}")
     # Create a unique identifier for the submission
     uid = uuid.uuid4().hex[:6]
-    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
     request_filename = f"{library_name.replace('/', '_')}_eval_request_{timestamp}_{uid}.json"
     # Stars count and license info from library_info if available
@@ -39,6 +61,11 @@ def add_new_eval(
     # Create the assessment request JSON
     assessment_request = {
         "library": library_name,
         "license": license_name,
         "stars": stars,
         "status": "PENDING",
@@ -57,7 +84,7 @@ def add_new_eval(
     # If in local mode, don't try to upload to HF
     if LOCAL_MODE:
-        return styled_message(f"Library '{library_name}' has been added to the local assessment queue! Assessment ID: {uid}")
     # Try to upload to HF if not in local mode
     try:
@@ -65,12 +92,12 @@ def add_new_eval(
         path = Path(request_file_path)
         API.upload_file(
             path_or_fileobj=path,
-            path_in_repo=f"assessment-queue/{request_filename}",
             repo_id=QUEUE_REPO,
-            repo_type="space",
         )
-        return styled_message(f"Library '{library_name}' has been added to the assessment queue! Assessment ID: {uid}")
     except Exception as e:
         return styled_warning(f"Saved locally but failed to upload to Hugging Face: {str(e)}")

 import json
 import os
+import re
+import time
 import uuid
 from datetime import datetime
 from pathlib import Path
+import huggingface_hub
+import requests
+from huggingface_hub import HfApi
+from src.display.utils import LibraryType, Language, AssessmentStatus
 from src.display.formatting import styled_error, styled_warning, styled_message
+from src.envs import API, EVAL_REQUESTS_PATH, QUEUE_REPO, TOKEN, LOCAL_MODE
+from src.submission.check_validity import is_repository_valid, get_library_info
 def add_new_eval(
     library_name,
+    library_version,
+    repository_url,
+    language,
+    framework,
+    library_type_str,
 ) -> str:
     """
     Adds a new library to the assessment queue.
     Args:
+        library_name: Name of the library (org/repo format)
+        library_version: Version of the library
+        repository_url: URL to the repository
+        language: Programming language
+        framework: Related framework/ecosystem
+        library_type_str: Type of AI library
     Returns:
         A message indicating the status of the submission
     """
     # Check if valid repository
+    is_valid, validity_message, library_info = is_repository_valid(library_name, repository_url)
     if not is_valid:
         return styled_error(f"Invalid submission: {validity_message}")
+    # Parse library type
+    library_type = LibraryType.from_str(library_type_str)
+    if library_type == LibraryType.Unknown:
+        return styled_error("Please select a valid library type.")
     # Create a unique identifier for the submission
     uid = uuid.uuid4().hex[:6]
+    timestamp = datetime.now().isoformat()
     request_filename = f"{library_name.replace('/', '_')}_eval_request_{timestamp}_{uid}.json"
     # Stars count and license info from library_info if available
     # Create the assessment request JSON
     assessment_request = {
         "library": library_name,
+        "version": library_version,
+        "repository_url": repository_url,
+        "language": language,
+        "framework": framework,
+        "library_type": library_type.value.name,
         "license": license_name,
         "stars": stars,
         "status": "PENDING",
     # If in local mode, don't try to upload to HF
     if LOCAL_MODE:
+        return styled_message(f"Library '{library_name}' (version {library_version}) has been added to the local assessment queue! Assessment ID: {uid}")
     # Try to upload to HF if not in local mode
     try:
         path = Path(request_file_path)
         API.upload_file(
             path_or_fileobj=path,
+            path_in_repo=request_filename,
             repo_id=QUEUE_REPO,
+            repo_type="dataset",
         )
+        return styled_message(f"Library '{library_name}' (version {library_version}) has been added to the assessment queue! Assessment ID: {uid}")
     except Exception as e:
         return styled_warning(f"Saved locally but failed to upload to Hugging Face: {str(e)}")

uv.lock DELETED Viewed

The diff for this file is too large to render. See raw diff