Spaces:
Running
Running
import json | |
import os | |
import re | |
import requests | |
from collections import defaultdict | |
from datetime import datetime, timedelta, timezone | |
from typing import Dict, Tuple, Any, List, Set | |
def is_repository_valid(repo_name: str, repo_url: str) -> Tuple[bool, str, Dict[str, Any]]: | |
""" | |
Checks if a GitHub repository is valid and accessible. | |
Args: | |
repo_name: The name of the repository (org/repo format) | |
repo_url: URL to the repository | |
Returns: | |
Tuple of (is_valid, error_message, library_info) | |
""" | |
# Basic format validation | |
if not repo_name or "/" not in repo_name: | |
return False, "Repository name must be in the format 'organization/repository'", {} | |
# Check if GitHub URL | |
if repo_url and "github.com" in repo_url: | |
# Extract org and repo from URL if provided | |
try: | |
parts = repo_url.split("github.com/")[1].split("/") | |
org = parts[0] | |
repo = parts[1].split(".")[0] if "." in parts[1] else parts[1] | |
url_repo_name = f"{org}/{repo}" | |
# Check if URL matches repo_name | |
if url_repo_name != repo_name: | |
return False, f"Repository name ({repo_name}) doesn't match the URL ({url_repo_name})", {} | |
except: | |
pass # Fall back to using repo_name | |
# Get repository information from GitHub API | |
org, repo = repo_name.split("/") | |
api_url = f"https://api.github.com/repos/{org}/{repo}" | |
try: | |
response = requests.get(api_url) | |
if response.status_code != 200: | |
return False, f"Repository not found or not accessible: {response.json().get('message', 'Unknown error')}", {} | |
# Parse repository data | |
repo_data = response.json() | |
library_info = get_library_info(repo_data) | |
return True, "", library_info | |
except Exception as e: | |
return False, f"Error accessing repository: {str(e)}", {} | |
def get_library_info(repo_data: Dict[str, Any]) -> Dict[str, Any]: | |
""" | |
Extracts relevant information from GitHub repository data. | |
Args: | |
repo_data: GitHub API response for a repository | |
Returns: | |
Dictionary with library metadata | |
""" | |
library_info = { | |
"name": repo_data.get("name", ""), | |
"full_name": repo_data.get("full_name", ""), | |
"description": repo_data.get("description", ""), | |
"stars": repo_data.get("stargazers_count", 0), | |
"forks": repo_data.get("forks_count", 0), | |
"license": repo_data.get("license", {}).get("name", "Unknown"), | |
"created_at": repo_data.get("created_at", ""), | |
"updated_at": repo_data.get("updated_at", ""), | |
"open_issues": repo_data.get("open_issues_count", 0), | |
"default_branch": repo_data.get("default_branch", "main"), | |
"is_archived": repo_data.get("archived", False), | |
} | |
return library_info | |
def already_submitted_libraries(requested_libraries_dir: str) -> Tuple[Set[str], Dict[str, List[str]]]: | |
""" | |
Gathers a list of already submitted libraries to avoid duplicates. | |
Args: | |
requested_libraries_dir: Directory with library assessment requests | |
Returns: | |
Tuple of (set of library identifiers, dict mapping orgs to submission dates) | |
""" | |
depth = 1 | |
library_ids = [] | |
orgs_to_submission_dates = defaultdict(list) | |
for root, _, files in os.walk(requested_libraries_dir): | |
current_depth = root.count(os.sep) - requested_libraries_dir.count(os.sep) | |
if current_depth == depth: | |
for file in files: | |
if not file.endswith(".json"): | |
continue | |
with open(os.path.join(root, file), "r") as f: | |
info = json.load(f) | |
library_ids.append(f"{info['library']}_{info['version']}") | |
# Select organisation | |
if info["library"].count("/") == 0 or "submitted_time" not in info: | |
continue | |
organisation, _ = info["library"].split("/") | |
orgs_to_submission_dates[organisation].append(info["submitted_time"]) | |
return set(library_ids), orgs_to_submission_dates | |