from dataclasses import dataclass from enum import Enum @dataclass class Task: benchmark: str metric: str col_name: str # Select your tasks here # --------------------------------------------------- class Tasks(Enum): # Risk domains from LibVulnWatch paper license = Task("license_validation", "score", "License Risk") security = Task("security_assessment", "score", "Security Risk") maintenance = Task("maintenance_health", "score", "Maintenance Risk") dependency = Task("dependency_management", "score", "Dependency Risk") regulatory = Task("regulatory_compliance", "score", "Regulatory Risk") NUM_FEWSHOT = 0 # Not relevant for vulnerability assessment # --------------------------------------------------- # Your leaderboard name TITLE = """

LibVulnWatch: Vulnerability Assessment Leaderboard

""" # What does your leaderboard evaluate? INTRODUCTION_TEXT = """ ## Systematic Vulnerability Assessment and Leaderboard Tracking for Open-Source AI Libraries This leaderboard provides continuous vulnerability assessment for open-source AI libraries across five critical risk domains: - **License Validation**: Legal risks based on license type, compatibility, and requirements - **Security Assessment**: Vulnerability severity and patch responsiveness - **Maintenance Health**: Sustainability and governance practices - **Dependency Management**: Vulnerability inheritance and supply chain security - **Regulatory Compliance**: Compliance readiness for various frameworks Lower scores indicate fewer vulnerabilities and lower risk. The Trust Score is an equal-weighted average of all five domains, providing a balanced assessment of overall library trustworthiness. """ # Which evaluations are you running? how can people reproduce what you have? LLM_BENCHMARKS_TEXT = f""" ## How LibVulnWatch Works Our assessment methodology evaluates libraries through: 1. **Static Analysis**: Code review, license parsing, and documentation examination 2. **Dynamic Analysis**: Vulnerability scanning, dependency checking, and API testing 3. **Metadata Analysis**: Repository metrics, contributor patterns, and release cadence Each library receives a risk score (0-10) in each domain, with lower scores indicating lower risk. ## Reproducibility To reproduce our assessment for a specific library: ```python from libvulnwatch import VulnerabilityAssessor # Initialize the assessor assessor = VulnerabilityAssessor() # Run assessment on a library results = assessor.assess_library("organization/library_name") # View detailed results print(results.risk_scores) print(results.detailed_findings) ``` """ EVALUATION_QUEUE_TEXT = """ ## Before submitting a library for assessment ### 1) Ensure your library is publicly accessible LibVulnWatch can only assess libraries that are publicly available on GitHub or another accessible repository. ### 2) Verify complete metadata is available Our assessment relies on metadata including: - License information - Dependency specifications - Maintenance history and contributor information - Security policies and vulnerability handling processes ### 3) Make sure your repository has an open license This leaderboard is designed for open-source AI libraries, which should have clear licensing terms. ### 4) Add security documentation Libraries with comprehensive security documentation tend to receive better assessments. ## If your assessment fails If your library shows as "FAILED" in the assessment queue, check that: - The repository is publicly accessible - All required metadata files are present - Dependencies can be resolved - The repository doesn't employ obfuscation techniques that interfere with analysis """ CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" CITATION_BUTTON_TEXT = r""" @article{LibVulnWatch2025, title={LibVulnWatch: Systematic Vulnerability Assessment and Leaderboard Tracking for Open-Source AI Libraries}, author={First Author and Second Author}, journal={ICML 2025 Technical AI Governance Workshop}, year={2025} } """