Spaces:
Running
Running
from dataclasses import dataclass | |
from enum import Enum | |
class Task: | |
benchmark: str | |
metric: str | |
col_name: str | |
# Select your tasks here | |
# --------------------------------------------------- | |
class Tasks(Enum): | |
# Risk domains from LibVulnWatch paper | |
license = Task("license_validation", "score", "License Risk") | |
security = Task("security_assessment", "score", "Security Risk") | |
maintenance = Task("maintenance_health", "score", "Maintenance Risk") | |
dependency = Task("dependency_management", "score", "Dependency Risk") | |
regulatory = Task("regulatory_compliance", "score", "Regulatory Risk") | |
NUM_FEWSHOT = 0 # Not relevant for vulnerability assessment | |
# --------------------------------------------------- | |
# Your leaderboard name | |
TITLE = """<h1 align="center" id="space-title">LibVulnWatch: Vulnerability Assessment Leaderboard</h1>""" | |
# What does your leaderboard evaluate? | |
INTRODUCTION_TEXT = """ | |
## Systematic Vulnerability Assessment and Leaderboard Tracking for Open-Source AI Libraries | |
This leaderboard provides continuous vulnerability assessment for open-source AI libraries across five critical risk domains: | |
- **License Validation**: Legal risks based on license type, compatibility, and requirements | |
- **Security Assessment**: Vulnerability severity and patch responsiveness | |
- **Maintenance Health**: Sustainability and governance practices | |
- **Dependency Management**: Vulnerability inheritance and supply chain security | |
- **Regulatory Compliance**: Compliance readiness for various frameworks | |
Lower scores indicate fewer vulnerabilities and lower risk. The Trust Score is an equal-weighted average of all five domains, providing a balanced assessment of overall library trustworthiness. | |
""" | |
# Which evaluations are you running? how can people reproduce what you have? | |
LLM_BENCHMARKS_TEXT = """ | |
## How LibVulnWatch Works | |
Our assessment methodology evaluates libraries through: | |
1. **Static Analysis**: Code review, license parsing, and documentation examination | |
2. **Dynamic Analysis**: Vulnerability scanning, dependency checking, and API testing | |
3. **Metadata Analysis**: Repository metrics, contributor patterns, and release cadence | |
Each library receives a risk score (0-10) in each domain, with lower scores indicating lower risk. | |
""" | |
EVALUATION_QUEUE_TEXT = """ | |
## Before submitting a library for assessment | |
### 1) Ensure your library is publicly accessible | |
LibVulnWatch can only assess libraries that are publicly available on GitHub or another accessible repository. | |
### 2) Verify complete metadata is available | |
Our assessment relies on metadata including: | |
- License information | |
- Dependency specifications | |
- Maintenance history and contributor information | |
- Security policies and vulnerability handling processes | |
### 3) Make sure your repository has an open license | |
This leaderboard is designed for open-source AI libraries, which should have clear licensing terms. | |
### 4) Add security documentation | |
Libraries with comprehensive security documentation tend to receive better assessments. | |
## If your assessment fails | |
If your library shows as "FAILED" in the assessment queue, check that: | |
- The repository is publicly accessible | |
- All required metadata files are present | |
- Dependencies can be resolved | |
- The repository doesn't employ obfuscation techniques that interfere with analysis | |
""" | |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" | |
CITATION_BUTTON_TEXT = r"""@inproceedings{wu2025libvulnwatch, | |
title={LibVulnWatch: A Deep Assessment Agent System and Leaderboard for Uncovering Hidden Vulnerabilities in Open-Source {AI} Libraries}, | |
author={Zekun Wu and Seonglae Cho and Umar Mohammed and CRISTIAN ENRIQUE MUNOZ VILLALOBOS and Kleyton Da Costa and Xin Guan and Theo King and Ze Wang and Emre Kazim and Adriano Koshiyama}, | |
booktitle={ACL 2025 Student Research Workshop}, | |
year={2025}, | |
url={https://openreview.net/forum?id=yQzYEAL0BT} | |
} | |
@inproceedings{anonymous2025libvulnwatch, | |
title={LibVulnWatch: A Deep Assessment Agent System and Leaderboard for Uncovering Hidden Vulnerabilities in Open-Source {AI} Libraries}, | |
author={Zekun Wu and Seonglae Cho and Umar Mohammed and CRISTIAN ENRIQUE MUNOZ VILLALOBOS and Kleyton Da Costa and Xin Guan and Theo King and Ze Wang and Emre Kazim and Adriano Koshiyama}, | |
booktitle={ICML Workshop on Technical AI Governance (TAIG)}, | |
year={2025}, | |
url={https://openreview.net/forum?id=MHhrr8QHgR} | |
}""" | |