import json
import os
import re
import requests
from collections import defaultdict
from datetime import datetime, timedelta, timezone
from typing import Dict, Tuple, Any, List, Set

def is_repository_valid(repo_name: str, repo_url: str) -> Tuple[bool, str, Dict[str, Any]]:
    """
    Checks if a GitHub repository is valid and accessible.
    
    Args:
        repo_name: The name of the repository (org/repo format)
        repo_url: URL to the repository
        
    Returns:
        Tuple of (is_valid, error_message, library_info)
    """
    # Basic format validation
    if not repo_name or "/" not in repo_name:
        return False, "Repository name must be in the format 'organization/repository'", {}
    
    # Check if GitHub URL
    if repo_url and "github.com" in repo_url:
        # Extract org and repo from URL if provided
        try:
            parts = repo_url.split("github.com/")[1].split("/")
            org = parts[0]
            repo = parts[1].split(".")[0] if "." in parts[1] else parts[1]
            url_repo_name = f"{org}/{repo}"
            
            # Check if URL matches repo_name
            if url_repo_name != repo_name:
                return False, f"Repository name ({repo_name}) doesn't match the URL ({url_repo_name})", {}
        except:
            pass  # Fall back to using repo_name
    
    # Get repository information from GitHub API
    org, repo = repo_name.split("/")
    api_url = f"https://api.github.com/repos/{org}/{repo}"
    
    try:
        response = requests.get(api_url)
        if response.status_code != 200:
            return False, f"Repository not found or not accessible: {response.json().get('message', 'Unknown error')}", {}
        
        # Parse repository data
        repo_data = response.json()
        library_info = get_library_info(repo_data)
        
        return True, "", library_info
        
    except Exception as e:
        return False, f"Error accessing repository: {str(e)}", {}

def get_library_info(repo_data: Dict[str, Any]) -> Dict[str, Any]:
    """
    Extracts relevant information from GitHub repository data.
    
    Args:
        repo_data: GitHub API response for a repository
        
    Returns:
        Dictionary with library metadata
    """
    library_info = {
        "name": repo_data.get("name", ""),
        "full_name": repo_data.get("full_name", ""),
        "description": repo_data.get("description", ""),
        "stars": repo_data.get("stargazers_count", 0),
        "forks": repo_data.get("forks_count", 0),
        "license": repo_data.get("license", {}).get("name", "Unknown"),
        "created_at": repo_data.get("created_at", ""),
        "updated_at": repo_data.get("updated_at", ""),
        "open_issues": repo_data.get("open_issues_count", 0),
        "default_branch": repo_data.get("default_branch", "main"),
        "is_archived": repo_data.get("archived", False),
    }
    
    return library_info

def already_submitted_libraries(requested_libraries_dir: str) -> Tuple[Set[str], Dict[str, List[str]]]:
    """
    Gathers a list of already submitted libraries to avoid duplicates.
    
    Args:
        requested_libraries_dir: Directory with library assessment requests
        
    Returns:
        Tuple of (set of library identifiers, dict mapping orgs to submission dates)
    """
    depth = 1
    library_ids = []
    orgs_to_submission_dates = defaultdict(list)

    for root, _, files in os.walk(requested_libraries_dir):
        current_depth = root.count(os.sep) - requested_libraries_dir.count(os.sep)
        if current_depth == depth:
            for file in files:
                if not file.endswith(".json"):
                    continue
                with open(os.path.join(root, file), "r") as f:
                    info = json.load(f)
                    library_ids.append(f"{info['library']}_{info['version']}")

                    # Select organisation
                    if info["library"].count("/") == 0 or "submitted_time" not in info:
                        continue
                    organisation, _ = info["library"].split("/")
                    orgs_to_submission_dates[organisation].append(info["submitted_time"])

    return set(library_ids), orgs_to_submission_dates