LibVulnWatch / src /submission /check_validity.py
seonglae-holistic's picture
fix: submit libaray with minimal information
fdddab8
raw
history blame
2.81 kB
import json
import os
from collections import defaultdict
from typing import Dict, Tuple, Any, List, Set
def is_repository_valid(repo_name: str) -> Tuple[bool, str, Dict[str, Any]]:
"""
Checks if a GitHub repository is valid and accessible.
Args:
repo_name: The name of the repository
Returns:
Tuple of (is_valid, error_message, library_info)
"""
# Basic format validation
if not repo_name:
return False, "Repository name is required", {}
return True, "", {}
def get_library_info(repo_data: Dict[str, Any]) -> Dict[str, Any]:
"""
Extracts relevant information from GitHub repository data.
Args:
repo_data: GitHub API response for a repository
Returns:
Dictionary with library metadata
"""
library_info = {
"name": repo_data.get("name", ""),
"full_name": repo_data.get("full_name", ""),
"description": repo_data.get("description", ""),
"stars": repo_data.get("stargazers_count", 0),
"forks": repo_data.get("forks_count", 0),
"license": repo_data.get("license", {}).get("name", "Unknown"),
"created_at": repo_data.get("created_at", ""),
"updated_at": repo_data.get("updated_at", ""),
"open_issues": repo_data.get("open_issues_count", 0),
"default_branch": repo_data.get("default_branch", "main"),
"is_archived": repo_data.get("archived", False),
}
return library_info
def already_submitted_libraries(requested_libraries_dir: str) -> Tuple[Set[str], Dict[str, List[str]]]:
"""
Gathers a list of already submitted libraries to avoid duplicates.
Args:
requested_libraries_dir: Directory with library assessment requests
Returns:
Tuple of (set of library identifiers, dict mapping orgs to submission dates)
"""
depth = 1
library_ids = []
orgs_to_submission_dates = defaultdict(list)
for root, _, files in os.walk(requested_libraries_dir):
current_depth = root.count(os.sep) - requested_libraries_dir.count(os.sep)
if current_depth == depth:
for file in files:
if not file.endswith(".json"):
continue
with open(os.path.join(root, file), "r") as f:
info = json.load(f)
library_ids.append(f"{info['library']}_{info['version']}")
# Select organisation
if info["library"].count("/") == 0 or "submitted_time" not in info:
continue
organisation, _ = info["library"].split("/")
orgs_to_submission_dates[organisation].append(info["submitted_time"])
return set(library_ids), orgs_to_submission_dates