Spaces:

Surn
/

3D-Viewer

Running

File size: 14,285 Bytes

# modules/storage.py
import os
import urllib.parse
import tempfile
import shutil
import json
import base64
from huggingface_hub import login, upload_folder, hf_hub_download, HfApi
from huggingface_hub.utils import RepositoryNotFoundError, EntryNotFoundError
from modules.constants import HF_API_TOKEN, upload_file_types, model_extensions, image_extensions

def generate_permalink(valid_files, base_url_external, permalink_viewer_url="surn-3d-viewer.hf.space"):
    """
    Given a list of valid files, checks if they contain exactly 1 model file and 2 image files.
    Constructs and returns a permalink URL with query parameters if the criteria is met.
    Otherwise, returns None.
    """
    model_link = None
    images_links = []
    for f in valid_files:
        filename = os.path.basename(f)
        ext = os.path.splitext(filename)[1].lower()
        if ext in model_extensions:
            if model_link is None:
                model_link = f"{base_url_external}/{filename}"
        elif ext in image_extensions:
            images_links.append(f"{base_url_external}/{filename}")
    if model_link and len(images_links) == 2:
        # Construct a permalink to the viewer project with query parameters.
        permalink_viewer_url = f"https://{permalink_viewer_url}/"
        params = {"3d": model_link, "hm": images_links[0], "image": images_links[1]}
        query_str = urllib.parse.urlencode(params)
        return f"{permalink_viewer_url}?{query_str}"
    return None

def generate_permalink_from_urls(model_url, hm_url, img_url, permalink_viewer_url="surn-3d-viewer.hf.space"):
    """
    Constructs and returns a permalink URL with query string parameters for the viewer.
    Each parameter is passed separately so that the image positions remain consistent.
    
    Parameters:
        model_url (str): Processed URL for the 3D model.
        hm_url (str): Processed URL for the height map image.
        img_url (str): Processed URL for the main image.
        permalink_viewer_url (str): The base viewer URL.
    
    Returns:
        str: The generated permalink URL.
    """
    import urllib.parse
    params = {"3d": model_url, "hm": hm_url, "image": img_url}
    query_str = urllib.parse.urlencode(params)
    return f"https://{permalink_viewer_url}/?{query_str}"

def upload_files_to_repo(files, repo_id, folder_name, create_permalink=False, repo_type="dataset", permalink_viewer_url="surn-3d-viewer.hf.space"):
    """
    Uploads multiple files to a Hugging Face repository using a batch upload approach via upload_folder.

    Parameters:
        files (list): A list of file paths (str) to upload.
        repo_id (str): The repository ID on Hugging Face for storage, e.g. "Surn/Storage".
        folder_name (str): The subfolder within the repository where files will be saved.
        create_permalink (bool): If True and if exactly three files are uploaded (1 model and 2 images),
                                 returns a single permalink to the project with query parameters.
                                 Otherwise, returns individual permalinks for each file.
        repo_type (str): Repository type ("space", "dataset", etc.). Default is "dataset".

    Returns:
        If create_permalink is True and files match the criteria:
            tuple: (response, permalink) where response is the output of the batch upload
                   and permalink is the URL string (with fully qualified file paths) for the project.
        Otherwise:
            list: A list of tuples (response, permalink) for each file.
    """
    # Log in using the HF API token.
    login(token=HF_API_TOKEN)
    
    valid_files = []
    
    # Ensure folder_name does not have a trailing slash.
    folder_name = folder_name.rstrip("/")
    
    # Filter for valid files based on allowed extensions.
    for f in files:
        file_name = f if isinstance(f, str) else f.name if hasattr(f, "name") else None
        if file_name is None:
            continue
        ext = os.path.splitext(file_name)[1].lower()
        if ext in upload_file_types:
            valid_files.append(f)
    
    if not valid_files:
        return []  # or raise an exception
    
    # Create a temporary directory; copy valid files directly into it.
    with tempfile.TemporaryDirectory() as temp_dir:
        for file_path in valid_files:
            filename = os.path.basename(file_path)
            dest_path = os.path.join(temp_dir, filename)
            shutil.copy(file_path, dest_path)
        
        # Batch upload all files in the temporary folder.
        # Files will be uploaded under the folder (path_in_repo) given by folder_name.
        response = upload_folder(
            folder_path=temp_dir,
            repo_id=repo_id,
            repo_type=repo_type,
            path_in_repo=folder_name,
            commit_message="Batch upload files"
        )
    
    # Construct external URLs for each uploaded file.
    # For datasets, files are served at:
    # https://huggingface.co/datasets/<repo_id>/resolve/main/<folder_name>/<filename>
    base_url_external = f"https://huggingface.co/datasets/{repo_id}/resolve/main/{folder_name}"
    individual_links = []
    for file_path in valid_files:
        filename = os.path.basename(file_path)
        link = f"{base_url_external}/{filename}"
        individual_links.append(link)
    
    # If permalink creation is requested and exactly 3 valid files are provided,
    # try to generate a permalink using generate_permalink().
    if create_permalink and len(valid_files) == 3:
        permalink = generate_permalink(valid_files, base_url_external, permalink_viewer_url)
        if permalink:
            return response, permalink

    # Otherwise, return individual tuples for each file.
    return [(response, link) for link in individual_links]

def _generate_short_id(length=8):
    """Generates a random base64 URL-safe string."""
    return base64.urlsafe_b64encode(os.urandom(length * 2))[:length].decode('utf-8')

def _get_json_from_repo(repo_id, json_file_name, repo_type="dataset"):
    """Downloads and loads the JSON file from the repo. Returns empty list if not found or error."""
    try:
        login(token=HF_API_TOKEN)
        json_path = hf_hub_download(
            repo_id=repo_id,
            filename=json_file_name,
            repo_type=repo_type,
            token=HF_API_TOKEN  # Added token for consistency, though login might suffice
        )
        with open(json_path, 'r') as f:
            data = json.load(f)
        os.remove(json_path) # Clean up downloaded file
        return data
    except RepositoryNotFoundError:
        print(f"Repository {repo_id} not found.")
        return []
    except EntryNotFoundError:
        print(f"JSON file {json_file_name} not found in {repo_id}. Initializing with empty list.")
        return []
    except json.JSONDecodeError:
        print(f"Error decoding JSON from {json_file_name}. Returning empty list.")
        return []
    except Exception as e:
        print(f"An unexpected error occurred while fetching {json_file_name}: {e}")
        return []

def _upload_json_to_repo(data, repo_id, json_file_name, repo_type="dataset"):
    """Uploads the JSON data to the specified file in the repo."""
    try:
        login(token=HF_API_TOKEN)
        api = HfApi()
        with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".json") as tmp_file:
            json.dump(data, tmp_file, indent=2)
            tmp_file_path = tmp_file.name
        
        api.upload_file(
            path_or_fileobj=tmp_file_path,
            path_in_repo=json_file_name,
            repo_id=repo_id,
            repo_type=repo_type,
            commit_message=f"Update {json_file_name}"
        )
        os.remove(tmp_file_path) # Clean up temporary file
        return True
    except Exception as e:
        print(f"Failed to upload {json_file_name} to {repo_id}: {e}")
        if 'tmp_file_path' in locals() and os.path.exists(tmp_file_path):
            os.remove(tmp_file_path) # Ensure cleanup on error too
        return False

def _find_url_in_json(data, short_url=None, full_url=None):
    """
    Searches the JSON data.
    If short_url is provided, returns the corresponding full_url or None.
    If full_url is provided, returns the corresponding short_url or None.
    """
    if not data: # Handles cases where data might be None or empty
        return None
    if short_url:
        for item in data:
            if item.get("short_url") == short_url:
                return item.get("full_url")
    if full_url:
        for item in data:
            if item.get("full_url") == full_url:
                return item.get("short_url")
    return None

def _add_url_to_json(data, short_url, full_url):
    """Adds a new short_url/full_url pair to the data. Returns updated data."""
    if data is None: # Initialize if data is None
        data = []
    data.append({"short_url": short_url, "full_url": full_url})
    return data

def gen_full_url(short_url=None, full_url=None, repo_id=None, repo_type="dataset", permalink_viewer_url="surn-3d-viewer.hf.space", json_file="shortener.json"):
    """
    Manages short URLs and their corresponding full URLs in a JSON file stored in a Hugging Face repository.

    - If short_url is provided, attempts to retrieve and return the full_url.
    - If full_url is provided, attempts to retrieve an existing short_url or creates a new one, stores it, and returns the short_url.
    - If both are provided, checks for consistency or creates a new entry.
    - If neither is provided, or repo_id is missing, returns an error status.

    Returns:
        tuple: (status_message, result_url)
               status_message can be "success", "created", "exists", "error", "not_found".
               result_url is the relevant URL (short or full) or None if an error occurs or not found.
    """
    if not repo_id:
        return "error_repo_id_missing", None
    if not short_url and not full_url:
        return "error_no_input", None

    login(token=HF_API_TOKEN) # Ensure login at the beginning
    url_data = _get_json_from_repo(repo_id, json_file, repo_type)

    # Case 1: Only short_url provided (lookup full_url)
    if short_url and not full_url:
        found_full_url = _find_url_in_json(url_data, short_url=short_url)
        if found_full_url:
            return "success_retrieved_full", found_full_url
        else:
            return "not_found_short", None

    # Case 2: Only full_url provided (lookup or create short_url)
    if full_url and not short_url:
        existing_short_url = _find_url_in_json(url_data, full_url=full_url)
        if existing_short_url:
            return "success_retrieved_short", existing_short_url
        else:
            # Create new short_url
            new_short_id = _generate_short_id()
            # Construct the short URL using the permalink_viewer_url and the new_short_id as a query parameter or path
            # For this example, let's assume short URLs are like: https://permalink_viewer_url/?id=new_short_id
            # This part might need adjustment based on how you want to structure your short URLs.
            # A common pattern is permalink_viewer_url/new_short_id if the viewer can handle path-based routing.
            # Or, if the viewer expects a query param like `?short=new_short_id`
            # For now, let's assume the short_url itself is just the ID, and the full viewer URL is prepended elsewhere if needed.
            # Or, more directly, the `short_url` parameter to this function *is* the ID.
            # The request implies `short_url` is the *key* in the JSON.
            
            # Let's refine: the `short_url` stored in JSON is the ID. The "shortened URL" returned to user might be different.
            # The function is `gen_full_url`, implying it can also *generate* a short URL if one doesn't exist for a full_url.
            
            url_data = _add_url_to_json(url_data, new_short_id, full_url)
            if _upload_json_to_repo(url_data, repo_id, json_file, repo_type):
                # The value returned as "shortened_url" should be the ID itself, or a URL constructed with it.
                # Let's return the ID for now, as the prompt asks for "shortened_url" as output.
                return "created_short", new_short_id 
            else:
                return "error_upload", None

    # Case 3: Both short_url and full_url provided
    if short_url and full_url:
        found_full_for_short = _find_url_in_json(url_data, short_url=short_url)
        found_short_for_full = _find_url_in_json(url_data, full_url=full_url)

        if found_full_for_short and found_full_for_short == full_url: # Both exist and match
            return "exists_match", short_url 
        elif found_short_for_full and found_short_for_full == short_url: # Both exist and match (redundant check, but safe)
             return "exists_match", short_url
        elif found_full_for_short and found_full_for_short != full_url: # short_url exists but maps to a different full_url
            return "error_conflict_short_exists", short_url # Or perhaps update, depending on desired behavior
        elif found_short_for_full and found_short_for_full != short_url: # full_url exists but maps to a different short_url
             # This implies the user provided a short_url that is *not* the one already associated with this full_url.
             # We should probably return the *existing* short_url for that full_url.
            return "exists_full_maps_to_different_short", found_short_for_full
        else: # Neither exists, or one exists but not the pair. Create new entry.
            # Check if the provided short_url is already in use by another full_url
            if _find_url_in_json(url_data, short_url=short_url) is not None:
                return "error_conflict_short_id_taken", short_url

            url_data = _add_url_to_json(url_data, short_url, full_url)
            if _upload_json_to_repo(url_data, repo_id, json_file, repo_type):
                return "created_specific_pair", short_url
            else:
                return "error_upload", None
                
    return "error_unhandled_case", None # Should not be reached