import json import os from collections import defaultdict from typing import Dict, Tuple, Any, List, Set def is_repository_valid(repo_name: str) -> Tuple[bool, str, Dict[str, Any]]: """ Checks if a GitHub repository is valid and accessible. Args: repo_name: The name of the repository Returns: Tuple of (is_valid, error_message, library_info) """ # Basic format validation if not repo_name: return False, "Repository name is required", {} return True, "", {} def get_library_info(repo_data: Dict[str, Any]) -> Dict[str, Any]: """ Extracts relevant information from GitHub repository data. Args: repo_data: GitHub API response for a repository Returns: Dictionary with library metadata """ library_info = { "name": repo_data.get("name", ""), "full_name": repo_data.get("full_name", ""), "description": repo_data.get("description", ""), "stars": repo_data.get("stargazers_count", 0), "forks": repo_data.get("forks_count", 0), "license": repo_data.get("license", {}).get("name", "Unknown"), "created_at": repo_data.get("created_at", ""), "updated_at": repo_data.get("updated_at", ""), "open_issues": repo_data.get("open_issues_count", 0), "default_branch": repo_data.get("default_branch", "main"), "is_archived": repo_data.get("archived", False), } return library_info def already_submitted_libraries(requested_libraries_dir: str) -> Tuple[Set[str], Dict[str, List[str]]]: """ Gathers a list of already submitted libraries to avoid duplicates. Args: requested_libraries_dir: Directory with library assessment requests Returns: Tuple of (set of library identifiers, dict mapping orgs to submission dates) """ depth = 1 library_ids = [] orgs_to_submission_dates = defaultdict(list) for root, _, files in os.walk(requested_libraries_dir): current_depth = root.count(os.sep) - requested_libraries_dir.count(os.sep) if current_depth == depth: for file in files: if not file.endswith(".json"): continue with open(os.path.join(root, file), "r") as f: info = json.load(f) library_ids.append(f"{info['library']}_{info['version']}") # Select organisation if info["library"].count("/") == 0 or "submitted_time" not in info: continue organisation, _ = info["library"].split("/") orgs_to_submission_dates[organisation].append(info["submitted_time"]) return set(library_ids), orgs_to_submission_dates