Spaces:

holistic-ai
/

LibVulnWatch

Running

App Files Files Community

wu981526092 commited on May 12

Commit

995dcf8

1 Parent(s): 95ba712

add

Browse files

Files changed (5) hide show

README.md +4 -2
requirements.txt +0 -1
src/envs.py +0 -4
src/leaderboard/github_data.py +0 -131
src/leaderboard/read_evals.py +0 -10

README.md CHANGED Viewed

@@ -16,6 +16,7 @@ sdk_version: 5.19.0
 Most of the variables to change for a default leaderboard are in `src/env.py` (replace the path for your leaderboard) and `src/about.py` (for tasks).
 Results files should have the following format and be stored as json files:
 ```json
 {
     "config": {
@@ -40,7 +41,8 @@ If you encounter problem on the space, don't hesitate to restart it to remove th
 # Code logic for more complex edits
-You'll find
 - the main table' columns names and properties in `src/display/utils.py`
 - the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
-- the logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`

 Most of the variables to change for a default leaderboard are in `src/env.py` (replace the path for your leaderboard) and `src/about.py` (for tasks).
 Results files should have the following format and be stored as json files:
 ```json
 {
     "config": {
 # Code logic for more complex edits
+You'll find
 - the main table' columns names and properties in `src/display/utils.py`
 - the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
+- the logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`

requirements.txt CHANGED Viewed

@@ -10,7 +10,6 @@ matplotlib
 numpy
 pandas
 python-dateutil
-requests
 tqdm
 transformers
 tokenizers>=0.15.0

 numpy
 pandas
 python-dateutil
 tqdm
 transformers
 tokenizers>=0.15.0

src/envs.py CHANGED Viewed

@@ -11,10 +11,6 @@ LOCAL_MODE = True
 # Get token from environment or use None in local mode
 TOKEN = os.environ.get("HF_TOKEN") if not LOCAL_MODE else None
-# GitHub API token for fetching repo metadata
-# This increases rate limits from 60 to 5000 requests per hour
-GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN", "")
 OWNER = "libvulnwatch" # Change to your org - don't forget to create a results and request dataset, with the correct format!
 # ----------------------------------

 # Get token from environment or use None in local mode
 TOKEN = os.environ.get("HF_TOKEN") if not LOCAL_MODE else None
 OWNER = "libvulnwatch" # Change to your org - don't forget to create a results and request dataset, with the correct format!
 # ----------------------------------

src/leaderboard/github_data.py DELETED Viewed

@@ -1,131 +0,0 @@
-"""Utilities for fetching GitHub repository data"""
-import os
-import requests
-import time
-from functools import lru_cache
-from urllib.parse import urlparse
-# Import GitHub token from envs
-from src.envs import GITHUB_TOKEN
-def extract_repo_path(repo_url):
-    """Extract org/repo path from GitHub URL
-    Args:
-        repo_url: GitHub repository URL
-    Returns:
-        Repository path in format "org/repo"
-    """
-    if not repo_url:
-        return None
-    # Handle both URL and org/repo format
-    if repo_url.startswith(("http://", "https://")):
-        parsed = urlparse(repo_url)
-        path = parsed.path.strip("/")
-        # Remove .git suffix if present
-        if path.endswith(".git"):
-            path = path[:-4]
-        return path
-    # Already in org/repo format
-    return repo_url
-@lru_cache(maxsize=128)
-def get_github_data(repo_path, use_token=True):
-    """Fetch repository data from GitHub API
-    Args:
-        repo_path: Repository path in format "org/repo"
-        use_token: Whether to use GitHub token if available
-    Returns:
-        Dictionary with repository data including stars and license
-    """
-    if not repo_path:
-        return {"github_stars": 0, "license": "Unknown"}
-    api_url = f"https://api.github.com/repos/{repo_path}"
-    headers = {"Accept": "application/vnd.github.v3+json"}
-    # Add token for higher rate limits if available
-    if use_token and GITHUB_TOKEN:
-        headers["Authorization"] = f"token {GITHUB_TOKEN}"
-    try:
-        response = requests.get(api_url, headers=headers)
-        if response.status_code == 200:
-            data = response.json()
-            # Extract relevant fields
-            result = {
-                "github_stars": data.get("stargazers_count", 0),
-                "license": data.get("license", {}).get("spdx_id", "Unknown"),
-                "full_name": data.get("full_name", repo_path),
-                "created_at": data.get("created_at", ""),
-                "updated_at": data.get("updated_at", ""),
-                "language": data.get("language", ""),
-                "forks_count": data.get("forks_count", 0),
-                "default_branch": data.get("default_branch", "main"),
-            }
-            # If license is None or "NOASSERTION", use "Unknown"
-            if not result["license"] or result["license"] == "NOASSERTION":
-                result["license"] = "Unknown"
-            return result
-        else:
-            print(f"GitHub API error for {repo_path}: {response.status_code} - {response.text}")
-            return {"github_stars": 0, "license": "Unknown"}
-    except Exception as e:
-        print(f"Error fetching GitHub data for {repo_path}: {e}")
-        return {"github_stars": 0, "license": "Unknown"}
-def update_assessment_with_github_data(assessment, force_update=False):
-    """Update assessment with data from GitHub
-    Args:
-        assessment: AssessmentResult object
-        force_update: Whether to force update even if values exist
-    Returns:
-        Updated AssessmentResult object
-    """
-    # Skip if no data is missing or if force_update is False
-    if not force_update and assessment.stars > 0 and assessment.license != "?":
-        return assessment
-    # Try getting repo path from library_name first
-    repo_path = None
-    if assessment.library_name and "/" in assessment.library_name:
-        repo_path = assessment.library_name
-    # Fall back to repository_url if available
-    if not repo_path and hasattr(assessment, 'repository_url') and assessment.repository_url:
-        repo_path = extract_repo_path(assessment.repository_url)
-    # If we still don't have a path, reconstruct from org/repo
-    if not repo_path and assessment.org and assessment.repo:
-        repo_path = f"{assessment.org}/{assessment.repo}"
-    # If we found a valid path, fetch and update
-    if repo_path:
-        github_data = get_github_data(repo_path)
-        # Update if data is missing or force_update is True
-        if force_update or assessment.stars == 0:
-            assessment.stars = github_data["github_stars"]
-        if force_update or assessment.license == "?":
-            assessment.license = github_data["license"]
-    return assessment

src/leaderboard/read_evals.py CHANGED Viewed

@@ -9,7 +9,6 @@ import numpy as np
 from src.display.formatting import make_clickable_library, make_clickable_report
 from src.display.utils import AutoEvalColumn, LibraryType, Tasks, Language, AssessmentStatus
-from src.leaderboard.github_data import update_assessment_with_github_data
 @dataclass
@@ -31,7 +30,6 @@ class AssessmentResult:
     availability: bool = True
     verified: bool = False
     report_url: str = ""  # URL to detailed assessment report
-    repository_url: str = ""  # GitHub repository URL
     @classmethod
     def init_from_json_file(self, json_filepath):
@@ -90,7 +88,6 @@ class AssessmentResult:
             verified=assessment.get("independently_verified", False),
             last_update=last_update,
             report_url=assessment.get("report_url", ""),
-            repository_url=assessment.get("repository_url", ""),
         )
     def update_with_request_file(self, requests_path):
@@ -102,15 +99,8 @@ class AssessmentResult:
                 request = json.load(f)
             self.library_type = LibraryType.from_str(request.get("library_type", ""))
             self.stars = request.get("stars", 0)
-            # Add repository URL if not already set
-            if not self.repository_url and "repository_url" in request:
-                self.repository_url = request.get("repository_url", "")
         except Exception:
             print(f"Could not find request file for {self.library_name} version {self.version}")
-        # Try to get GitHub stars and license if missing
-        if self.stars == 0 or self.license == "?":
-            update_assessment_with_github_data(self)
     def to_dict(self):
         """Converts the Assessment Result to a dict compatible with our dataframe display"""

 from src.display.formatting import make_clickable_library, make_clickable_report
 from src.display.utils import AutoEvalColumn, LibraryType, Tasks, Language, AssessmentStatus
 @dataclass
     availability: bool = True
     verified: bool = False
     report_url: str = ""  # URL to detailed assessment report
     @classmethod
     def init_from_json_file(self, json_filepath):
             verified=assessment.get("independently_verified", False),
             last_update=last_update,
             report_url=assessment.get("report_url", ""),
         )
     def update_with_request_file(self, requests_path):
                 request = json.load(f)
             self.library_type = LibraryType.from_str(request.get("library_type", ""))
             self.stars = request.get("stars", 0)
         except Exception:
             print(f"Could not find request file for {self.library_name} version {self.version}")
     def to_dict(self):
         """Converts the Assessment Result to a dict compatible with our dataframe display"""