import json import os import re import requests from collections import defaultdict from datetime import datetime, timedelta, timezone from typing import Dict, Tuple, Any, List, Set def is_repository_valid(repo_name: str, repo_url: str) -> Tuple[bool, str, Dict[str, Any]]: """ Checks if a GitHub repository is valid and accessible. Args: repo_name: The name of the repository (org/repo format) repo_url: URL to the repository Returns: Tuple of (is_valid, error_message, library_info) """ # Basic format validation if not repo_name or "/" not in repo_name: return False, "Repository name must be in the format 'organization/repository'", {} # Check if GitHub URL if repo_url and "github.com" in repo_url: # Extract org and repo from URL if provided try: parts = repo_url.split("github.com/")[1].split("/") org = parts[0] repo = parts[1].split(".")[0] if "." in parts[1] else parts[1] url_repo_name = f"{org}/{repo}" # Check if URL matches repo_name if url_repo_name != repo_name: return False, f"Repository name ({repo_name}) doesn't match the URL ({url_repo_name})", {} except: pass # Fall back to using repo_name # Get repository information from GitHub API org, repo = repo_name.split("/") api_url = f"https://api.github.com/repos/{org}/{repo}" try: response = requests.get(api_url) if response.status_code != 200: return False, f"Repository not found or not accessible: {response.json().get('message', 'Unknown error')}", {} # Parse repository data repo_data = response.json() library_info = get_library_info(repo_data) return True, "", library_info except Exception as e: return False, f"Error accessing repository: {str(e)}", {} def get_library_info(repo_data: Dict[str, Any]) -> Dict[str, Any]: """ Extracts relevant information from GitHub repository data. Args: repo_data: GitHub API response for a repository Returns: Dictionary with library metadata """ library_info = { "name": repo_data.get("name", ""), "full_name": repo_data.get("full_name", ""), "description": repo_data.get("description", ""), "stars": repo_data.get("stargazers_count", 0), "forks": repo_data.get("forks_count", 0), "license": repo_data.get("license", {}).get("name", "Unknown"), "created_at": repo_data.get("created_at", ""), "updated_at": repo_data.get("updated_at", ""), "open_issues": repo_data.get("open_issues_count", 0), "default_branch": repo_data.get("default_branch", "main"), "is_archived": repo_data.get("archived", False), } return library_info def already_submitted_libraries(requested_libraries_dir: str) -> Tuple[Set[str], Dict[str, List[str]]]: """ Gathers a list of already submitted libraries to avoid duplicates. Args: requested_libraries_dir: Directory with library assessment requests Returns: Tuple of (set of library identifiers, dict mapping orgs to submission dates) """ depth = 1 library_ids = [] orgs_to_submission_dates = defaultdict(list) for root, _, files in os.walk(requested_libraries_dir): current_depth = root.count(os.sep) - requested_libraries_dir.count(os.sep) if current_depth == depth: for file in files: if not file.endswith(".json"): continue with open(os.path.join(root, file), "r") as f: info = json.load(f) library_ids.append(f"{info['library']}_{info['version']}") # Select organisation if info["library"].count("/") == 0 or "submitted_time" not in info: continue organisation, _ = info["library"].split("/") orgs_to_submission_dates[organisation].append(info["submitted_time"]) return set(library_ids), orgs_to_submission_dates