Spaces:
Running
Running
File size: 4,265 Bytes
9ab539a bccaf50 9ab539a bccaf50 9ab539a bccaf50 9ab539a bccaf50 9ab539a bccaf50 9ab539a bccaf50 9ab539a bccaf50 9ab539a bccaf50 9ab539a bccaf50 9ab539a bccaf50 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
import json
import os
import re
import requests
from collections import defaultdict
from datetime import datetime, timedelta, timezone
from typing import Dict, Tuple, Any, List, Set
def is_repository_valid(repo_name: str, repo_url: str) -> Tuple[bool, str, Dict[str, Any]]:
"""
Checks if a GitHub repository is valid and accessible.
Args:
repo_name: The name of the repository (org/repo format)
repo_url: URL to the repository
Returns:
Tuple of (is_valid, error_message, library_info)
"""
# Basic format validation
if not repo_name or "/" not in repo_name:
return False, "Repository name must be in the format 'organization/repository'", {}
# Check if GitHub URL
if repo_url and "github.com" in repo_url:
# Extract org and repo from URL if provided
try:
parts = repo_url.split("github.com/")[1].split("/")
org = parts[0]
repo = parts[1].split(".")[0] if "." in parts[1] else parts[1]
url_repo_name = f"{org}/{repo}"
# Check if URL matches repo_name
if url_repo_name != repo_name:
return False, f"Repository name ({repo_name}) doesn't match the URL ({url_repo_name})", {}
except:
pass # Fall back to using repo_name
# Get repository information from GitHub API
org, repo = repo_name.split("/")
api_url = f"https://api.github.com/repos/{org}/{repo}"
try:
response = requests.get(api_url)
if response.status_code != 200:
return False, f"Repository not found or not accessible: {response.json().get('message', 'Unknown error')}", {}
# Parse repository data
repo_data = response.json()
library_info = get_library_info(repo_data)
return True, "", library_info
except Exception as e:
return False, f"Error accessing repository: {str(e)}", {}
def get_library_info(repo_data: Dict[str, Any]) -> Dict[str, Any]:
"""
Extracts relevant information from GitHub repository data.
Args:
repo_data: GitHub API response for a repository
Returns:
Dictionary with library metadata
"""
library_info = {
"name": repo_data.get("name", ""),
"full_name": repo_data.get("full_name", ""),
"description": repo_data.get("description", ""),
"stars": repo_data.get("stargazers_count", 0),
"forks": repo_data.get("forks_count", 0),
"license": repo_data.get("license", {}).get("name", "Unknown"),
"created_at": repo_data.get("created_at", ""),
"updated_at": repo_data.get("updated_at", ""),
"open_issues": repo_data.get("open_issues_count", 0),
"default_branch": repo_data.get("default_branch", "main"),
"is_archived": repo_data.get("archived", False),
}
return library_info
def already_submitted_libraries(requested_libraries_dir: str) -> Tuple[Set[str], Dict[str, List[str]]]:
"""
Gathers a list of already submitted libraries to avoid duplicates.
Args:
requested_libraries_dir: Directory with library assessment requests
Returns:
Tuple of (set of library identifiers, dict mapping orgs to submission dates)
"""
depth = 1
library_ids = []
orgs_to_submission_dates = defaultdict(list)
for root, _, files in os.walk(requested_libraries_dir):
current_depth = root.count(os.sep) - requested_libraries_dir.count(os.sep)
if current_depth == depth:
for file in files:
if not file.endswith(".json"):
continue
with open(os.path.join(root, file), "r") as f:
info = json.load(f)
library_ids.append(f"{info['library']}_{info['version']}")
# Select organisation
if info["library"].count("/") == 0 or "submitted_time" not in info:
continue
organisation, _ = info["library"].split("/")
orgs_to_submission_dates[organisation].append(info["submitted_time"])
return set(library_ids), orgs_to_submission_dates
|