wu981526092 commited on
Commit
995dcf8
·
1 Parent(s): 95ba712
README.md CHANGED
@@ -16,6 +16,7 @@ sdk_version: 5.19.0
16
  Most of the variables to change for a default leaderboard are in `src/env.py` (replace the path for your leaderboard) and `src/about.py` (for tasks).
17
 
18
  Results files should have the following format and be stored as json files:
 
19
  ```json
20
  {
21
  "config": {
@@ -40,7 +41,8 @@ If you encounter problem on the space, don't hesitate to restart it to remove th
40
 
41
  # Code logic for more complex edits
42
 
43
- You'll find
 
44
  - the main table' columns names and properties in `src/display/utils.py`
45
  - the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
46
- - the logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`
 
16
  Most of the variables to change for a default leaderboard are in `src/env.py` (replace the path for your leaderboard) and `src/about.py` (for tasks).
17
 
18
  Results files should have the following format and be stored as json files:
19
+
20
  ```json
21
  {
22
  "config": {
 
41
 
42
  # Code logic for more complex edits
43
 
44
+ You'll find
45
+
46
  - the main table' columns names and properties in `src/display/utils.py`
47
  - the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
48
+ - the logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`
requirements.txt CHANGED
@@ -10,7 +10,6 @@ matplotlib
10
  numpy
11
  pandas
12
  python-dateutil
13
- requests
14
  tqdm
15
  transformers
16
  tokenizers>=0.15.0
 
10
  numpy
11
  pandas
12
  python-dateutil
 
13
  tqdm
14
  transformers
15
  tokenizers>=0.15.0
src/envs.py CHANGED
@@ -11,10 +11,6 @@ LOCAL_MODE = True
11
  # Get token from environment or use None in local mode
12
  TOKEN = os.environ.get("HF_TOKEN") if not LOCAL_MODE else None
13
 
14
- # GitHub API token for fetching repo metadata
15
- # This increases rate limits from 60 to 5000 requests per hour
16
- GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN", "")
17
-
18
  OWNER = "libvulnwatch" # Change to your org - don't forget to create a results and request dataset, with the correct format!
19
  # ----------------------------------
20
 
 
11
  # Get token from environment or use None in local mode
12
  TOKEN = os.environ.get("HF_TOKEN") if not LOCAL_MODE else None
13
 
 
 
 
 
14
  OWNER = "libvulnwatch" # Change to your org - don't forget to create a results and request dataset, with the correct format!
15
  # ----------------------------------
16
 
src/leaderboard/github_data.py DELETED
@@ -1,131 +0,0 @@
1
- """Utilities for fetching GitHub repository data"""
2
-
3
- import os
4
- import requests
5
- import time
6
- from functools import lru_cache
7
- from urllib.parse import urlparse
8
-
9
- # Import GitHub token from envs
10
- from src.envs import GITHUB_TOKEN
11
-
12
-
13
- def extract_repo_path(repo_url):
14
- """Extract org/repo path from GitHub URL
15
-
16
- Args:
17
- repo_url: GitHub repository URL
18
-
19
- Returns:
20
- Repository path in format "org/repo"
21
- """
22
- if not repo_url:
23
- return None
24
-
25
- # Handle both URL and org/repo format
26
- if repo_url.startswith(("http://", "https://")):
27
- parsed = urlparse(repo_url)
28
- path = parsed.path.strip("/")
29
-
30
- # Remove .git suffix if present
31
- if path.endswith(".git"):
32
- path = path[:-4]
33
-
34
- return path
35
-
36
- # Already in org/repo format
37
- return repo_url
38
-
39
-
40
- @lru_cache(maxsize=128)
41
- def get_github_data(repo_path, use_token=True):
42
- """Fetch repository data from GitHub API
43
-
44
- Args:
45
- repo_path: Repository path in format "org/repo"
46
- use_token: Whether to use GitHub token if available
47
-
48
- Returns:
49
- Dictionary with repository data including stars and license
50
- """
51
- if not repo_path:
52
- return {"github_stars": 0, "license": "Unknown"}
53
-
54
- api_url = f"https://api.github.com/repos/{repo_path}"
55
- headers = {"Accept": "application/vnd.github.v3+json"}
56
-
57
- # Add token for higher rate limits if available
58
- if use_token and GITHUB_TOKEN:
59
- headers["Authorization"] = f"token {GITHUB_TOKEN}"
60
-
61
- try:
62
- response = requests.get(api_url, headers=headers)
63
-
64
- if response.status_code == 200:
65
- data = response.json()
66
-
67
- # Extract relevant fields
68
- result = {
69
- "github_stars": data.get("stargazers_count", 0),
70
- "license": data.get("license", {}).get("spdx_id", "Unknown"),
71
- "full_name": data.get("full_name", repo_path),
72
- "created_at": data.get("created_at", ""),
73
- "updated_at": data.get("updated_at", ""),
74
- "language": data.get("language", ""),
75
- "forks_count": data.get("forks_count", 0),
76
- "default_branch": data.get("default_branch", "main"),
77
- }
78
-
79
- # If license is None or "NOASSERTION", use "Unknown"
80
- if not result["license"] or result["license"] == "NOASSERTION":
81
- result["license"] = "Unknown"
82
-
83
- return result
84
- else:
85
- print(f"GitHub API error for {repo_path}: {response.status_code} - {response.text}")
86
- return {"github_stars": 0, "license": "Unknown"}
87
-
88
- except Exception as e:
89
- print(f"Error fetching GitHub data for {repo_path}: {e}")
90
- return {"github_stars": 0, "license": "Unknown"}
91
-
92
-
93
- def update_assessment_with_github_data(assessment, force_update=False):
94
- """Update assessment with data from GitHub
95
-
96
- Args:
97
- assessment: AssessmentResult object
98
- force_update: Whether to force update even if values exist
99
-
100
- Returns:
101
- Updated AssessmentResult object
102
- """
103
- # Skip if no data is missing or if force_update is False
104
- if not force_update and assessment.stars > 0 and assessment.license != "?":
105
- return assessment
106
-
107
- # Try getting repo path from library_name first
108
- repo_path = None
109
- if assessment.library_name and "/" in assessment.library_name:
110
- repo_path = assessment.library_name
111
-
112
- # Fall back to repository_url if available
113
- if not repo_path and hasattr(assessment, 'repository_url') and assessment.repository_url:
114
- repo_path = extract_repo_path(assessment.repository_url)
115
-
116
- # If we still don't have a path, reconstruct from org/repo
117
- if not repo_path and assessment.org and assessment.repo:
118
- repo_path = f"{assessment.org}/{assessment.repo}"
119
-
120
- # If we found a valid path, fetch and update
121
- if repo_path:
122
- github_data = get_github_data(repo_path)
123
-
124
- # Update if data is missing or force_update is True
125
- if force_update or assessment.stars == 0:
126
- assessment.stars = github_data["github_stars"]
127
-
128
- if force_update or assessment.license == "?":
129
- assessment.license = github_data["license"]
130
-
131
- return assessment
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/leaderboard/read_evals.py CHANGED
@@ -9,7 +9,6 @@ import numpy as np
9
 
10
  from src.display.formatting import make_clickable_library, make_clickable_report
11
  from src.display.utils import AutoEvalColumn, LibraryType, Tasks, Language, AssessmentStatus
12
- from src.leaderboard.github_data import update_assessment_with_github_data
13
 
14
 
15
  @dataclass
@@ -31,7 +30,6 @@ class AssessmentResult:
31
  availability: bool = True
32
  verified: bool = False
33
  report_url: str = "" # URL to detailed assessment report
34
- repository_url: str = "" # GitHub repository URL
35
 
36
  @classmethod
37
  def init_from_json_file(self, json_filepath):
@@ -90,7 +88,6 @@ class AssessmentResult:
90
  verified=assessment.get("independently_verified", False),
91
  last_update=last_update,
92
  report_url=assessment.get("report_url", ""),
93
- repository_url=assessment.get("repository_url", ""),
94
  )
95
 
96
  def update_with_request_file(self, requests_path):
@@ -102,15 +99,8 @@ class AssessmentResult:
102
  request = json.load(f)
103
  self.library_type = LibraryType.from_str(request.get("library_type", ""))
104
  self.stars = request.get("stars", 0)
105
- # Add repository URL if not already set
106
- if not self.repository_url and "repository_url" in request:
107
- self.repository_url = request.get("repository_url", "")
108
  except Exception:
109
  print(f"Could not find request file for {self.library_name} version {self.version}")
110
-
111
- # Try to get GitHub stars and license if missing
112
- if self.stars == 0 or self.license == "?":
113
- update_assessment_with_github_data(self)
114
 
115
  def to_dict(self):
116
  """Converts the Assessment Result to a dict compatible with our dataframe display"""
 
9
 
10
  from src.display.formatting import make_clickable_library, make_clickable_report
11
  from src.display.utils import AutoEvalColumn, LibraryType, Tasks, Language, AssessmentStatus
 
12
 
13
 
14
  @dataclass
 
30
  availability: bool = True
31
  verified: bool = False
32
  report_url: str = "" # URL to detailed assessment report
 
33
 
34
  @classmethod
35
  def init_from_json_file(self, json_filepath):
 
88
  verified=assessment.get("independently_verified", False),
89
  last_update=last_update,
90
  report_url=assessment.get("report_url", ""),
 
91
  )
92
 
93
  def update_with_request_file(self, requests_path):
 
99
  request = json.load(f)
100
  self.library_type = LibraryType.from_str(request.get("library_type", ""))
101
  self.stars = request.get("stars", 0)
 
 
 
102
  except Exception:
103
  print(f"Could not find request file for {self.library_name} version {self.version}")
 
 
 
 
104
 
105
  def to_dict(self):
106
  """Converts the Assessment Result to a dict compatible with our dataframe display"""