Spaces:

holistic-ai
/

LibVulnWatch

Running

App Files Files Community

LibVulnWatch / src /about.py

wu981526092

Enrich About & README with details from ACL/ICML paper; fix YAML frontmatter

3342a22 13 days ago

raw

history blame

5.12 kB

	from dataclasses import dataclass
	from enum import Enum

	@dataclass
	class Task:
	benchmark: str
	metric: str
	col_name: str


	# Select your tasks here
	# ---------------------------------------------------
	class Tasks(Enum):
	# Risk domains from LibVulnWatch paper
	license = Task("license_validation", "score", "License Risk")
	security = Task("security_assessment", "score", "Security Risk")
	maintenance = Task("maintenance_health", "score", "Maintenance Risk")
	dependency = Task("dependency_management", "score", "Dependency Risk")
	regulatory = Task("regulatory_compliance", "score", "Regulatory Risk")

	NUM_FEWSHOT = 0 # Not relevant for vulnerability assessment
	# ---------------------------------------------------



	# Your leaderboard name
	TITLE = """<h1 align="center" id="space-title">LibVulnWatch: Vulnerability Assessment Leaderboard</h1>"""

	# What does your leaderboard evaluate?
	INTRODUCTION_TEXT = """
	## LibVulnWatch – Continuous, Multi-Domain Risk Scoring for AI Libraries

	_As presented at the ACL 2025 Student Research Workshop and the ICML 2025 Technical AI Governance (TAIG) workshop_, LibVulnWatch provides an evidence-based, end-to-end pipeline that uncovers hidden vulnerabilities in open-source AI libraries across five governance-aligned domains:

	• License Validation – compatibility, provenance, obligations
	• Security Assessment – CVEs, patch latency, exploit primitives
	• Maintenance Health – bus-factor, release cadence, contributor diversity
	• Dependency Management – transitive risk, SBOM completeness
	• Regulatory Compliance – privacy/export controls, policy documentation

	In the paper we apply the framework to 20 popular libraries, achieving 88 % coverage of OpenSSF Scorecard checks and surfacing up to 19 previously-unreported risks per library.
	Lower scores indicate lower risk, and the Trust Score is the equal-weight average of the five domains.
	"""

	# Which evaluations are you running? how can people reproduce what you have?
	LLM_BENCHMARKS_TEXT = """
	## Methodology at a Glance

	LibVulnWatch orchestrates a graph of specialised agents powered by large language models. Each agent contributes one evidence layer and writes structured findings to a shared memory:

	1️⃣ Static agents – licence parsing, secret scanning, call-graph reachability
	2️⃣ Dynamic agents – fuzzing harnesses, dependency-confusion probes, CVE replay
	3️⃣ Metadata agents – GitHub mining, release-cadence modelling, community health
	4️⃣ Policy agents – mapping evidence to NIST SSDF, EU AI Act, and related frameworks

	The aggregator agent converts raw findings into 0–10 scores per domain, producing a reproducible JSON result that is 88 % compatible with OpenSSF Scorecard checks. All artefacts (SBOMs, logs, annotated evidence) are archived and linked in the public report.
	"""

	EVALUATION_QUEUE_TEXT = """
	## Before submitting a library for assessment

	### 1) Ensure your library is publicly accessible
	LibVulnWatch can only assess libraries that are publicly available on GitHub or another accessible repository.

	### 2) Verify complete metadata is available
	Our assessment relies on metadata including:
	- License information
	- Dependency specifications
	- Maintenance history and contributor information
	- Security policies and vulnerability handling processes

	### 3) Make sure your repository has an open license
	This leaderboard is designed for open-source AI libraries, which should have clear licensing terms.

	### 4) Add security documentation
	Libraries with comprehensive security documentation tend to receive better assessments.

	## If your assessment fails
	If your library shows as "FAILED" in the assessment queue, check that:
	- The repository is publicly accessible
	- All required metadata files are present
	- Dependencies can be resolved
	- The repository doesn't employ obfuscation techniques that interfere with analysis
	"""

	CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
	CITATION_BUTTON_TEXT = r"""@inproceedings{wu2025libvulnwatch,
	title={LibVulnWatch: A Deep Assessment Agent System and Leaderboard for Uncovering Hidden Vulnerabilities in Open-Source {AI} Libraries},
	author={Zekun Wu and Seonglae Cho and Umar Mohammed and CRISTIAN ENRIQUE MUNOZ VILLALOBOS and Kleyton Da Costa and Xin Guan and Theo King and Ze Wang and Emre Kazim and Adriano Koshiyama},
	booktitle={ACL 2025 Student Research Workshop},
	year={2025},
	url={https://openreview.net/forum?id=yQzYEAL0BT}
	}

	@inproceedings{anonymous2025libvulnwatch,
	title={LibVulnWatch: A Deep Assessment Agent System and Leaderboard for Uncovering Hidden Vulnerabilities in Open-Source {AI} Libraries},
	author={Zekun Wu and Seonglae Cho and Umar Mohammed and CRISTIAN ENRIQUE MUNOZ VILLALOBOS and Kleyton Da Costa and Xin Guan and Theo King and Ze Wang and Emre Kazim and Adriano Koshiyama},
	booktitle={ICML Workshop on Technical AI Governance (TAIG)},
	year={2025},
	url={https://openreview.net/forum?id=MHhrr8QHgR}
	}"""