from dataclasses import dataclass
from enum import Enum
@dataclass
class Task:
benchmark: str
metric: str
col_name: str
# Select your tasks here
# ---------------------------------------------------
class Tasks(Enum):
# Risk domains from LibVulnWatch paper
license = Task("license_validation", "score", "License Risk")
security = Task("security_assessment", "score", "Security Risk")
maintenance = Task("maintenance_health", "score", "Maintenance Risk")
dependency = Task("dependency_management", "score", "Dependency Risk")
regulatory = Task("regulatory_compliance", "score", "Regulatory Risk")
NUM_FEWSHOT = 0 # Not relevant for vulnerability assessment
# ---------------------------------------------------
# Your leaderboard name
TITLE = """
LibVulnWatch: Vulnerability Assessment Leaderboard
"""
# What does your leaderboard evaluate?
INTRODUCTION_TEXT = """
## Systematic Vulnerability Assessment and Leaderboard Tracking for Open-Source AI Libraries
This leaderboard provides continuous vulnerability assessment for open-source AI libraries across five critical risk domains:
- **License Validation**: Legal risks based on license type, compatibility, and requirements
- **Security Assessment**: Vulnerability severity and patch responsiveness
- **Maintenance Health**: Sustainability and governance practices
- **Dependency Management**: Vulnerability inheritance and supply chain security
- **Regulatory Compliance**: Compliance readiness for various frameworks
Lower scores indicate fewer vulnerabilities and lower risk. The Trust Score is an equal-weighted average of all five domains, providing a balanced assessment of overall library trustworthiness.
"""
# Which evaluations are you running? how can people reproduce what you have?
LLM_BENCHMARKS_TEXT = """
## How LibVulnWatch Works
Our assessment methodology evaluates libraries through:
1. **Static Analysis**: Code review, license parsing, and documentation examination
2. **Dynamic Analysis**: Vulnerability scanning, dependency checking, and API testing
3. **Metadata Analysis**: Repository metrics, contributor patterns, and release cadence
Each library receives a risk score (0-10) in each domain, with lower scores indicating lower risk.
## Reproducibility
To reproduce our assessment for a specific library:
```python
from libvulnwatch import VulnerabilityAssessor
# Initialize the assessor
assessor = VulnerabilityAssessor()
# Run assessment on a library
results = assessor.assess_library("organization/library_name")
# View detailed results
print(results.risk_scores)
print(results.detailed_findings)
```
"""
EVALUATION_QUEUE_TEXT = """
## Before submitting a library for assessment
### 1) Ensure your library is publicly accessible
LibVulnWatch can only assess libraries that are publicly available on GitHub or another accessible repository.
### 2) Verify complete metadata is available
Our assessment relies on metadata including:
- License information
- Dependency specifications
- Maintenance history and contributor information
- Security policies and vulnerability handling processes
### 3) Make sure your repository has an open license
This leaderboard is designed for open-source AI libraries, which should have clear licensing terms.
### 4) Add security documentation
Libraries with comprehensive security documentation tend to receive better assessments.
## If your assessment fails
If your library shows as "FAILED" in the assessment queue, check that:
- The repository is publicly accessible
- All required metadata files are present
- Dependencies can be resolved
- The repository doesn't employ obfuscation techniques that interfere with analysis
"""
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
CITATION_BUTTON_TEXT = r"""@article{LibVulnWatch2025,
title={LibVulnWatch: Systematic Vulnerability Assessment and Leaderboard Tracking for Open-Source AI Libraries},
author={First Author and Second Author},
journal={ICML 2025 Technical AI Governance Workshop},
year={2025}
}"""