from PIL import Image
import numpy as np
import base64
import io
from io import BytesIO
from PIL import Image, ImageFile
from pdf2image import convert_from_path
import tempfile
from multiprocessing import Pool
import os
from loguru import logger
import uuid

from typing import Any, List, Tuple, Type, Literal, Optional, Union, Dict

def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

def load_image_from_base64(image):
    return Image.open(BytesIO(base64.b64decode(image)))

def pil_image_to_base64(image: Image) -> str:
    """
    Convert a PIL Image object to its base64 representation.

    Args:
        image (Image): The PIL Image object to be converted.

    Returns:
        str: The base64 representation of the image.
    """

    # Create a bytes buffer
    buffer = io.BytesIO()

    # Save the image to the buffer
    image.save(buffer, format="PNG")

    # Get the bytes from the buffer
    img_bytes = buffer.getvalue()

    # Convert the bytes to base64
    img_base64 = base64.b64encode(img_bytes).decode("utf-8")

    return img_base64

def scale_image(image: Image.Image, new_height: int = 1024) -> Image.Image:
    """
    Scale an image to a new height while maintaining the aspect ratio.
    """
    width, height = image.size
    aspect_ratio = width / height
    new_width = int(new_height * aspect_ratio)

    scaled_image = image.resize((new_width, new_height))

    return scaled_image

def unflatten_array(flat_list, vector_size=128):
    return np.array(flat_list).reshape(-1, vector_size)

def get_image_embedding(image_list: list[Image], openai_client, model: str, flatten: bool = False) -> list:
    """
    Get the embedding of an image.

    Args:
        image (Image): The image to be embedded.

    Returns:
        list[list[float]] if flatten, 
        else: list[list[list[float]]] with shape = (number of images (m), number of vector for each text (n), vector dim = 128)
    """
    if not isinstance(image_list, list):
        image_list = [image_list]

    input_base64_list = [f"data:image/png;base64,{pil_image_to_base64(image)}" for image in image_list]
    # Get the embedding of the image
    embedding = openai_client.embeddings.create(
        input=input_base64_list,
        model=model,
        extra_body={
            "modality": "image",
            "encoding_format":"float" if not flatten else "base64",
        },
    )

    result = []
    for embed in embedding.data:
        result.append(embed.embedding) # embed.embedding is a list[float] in case of flatten, else: list[list[float]]
    return result

def get_text_embedding(texts: list[str], openai_client, model: str, flatten: bool = False) -> list:
    """
    Get the embedding of a text.

    Args:
        text (str): The text to be embedded.

    Returns:
        list[list[float]] if flatten, 
        else: list[list[list[float]]] with shape = (number of texts (m), number of vector for each text (n), vector dim = 128)
    """
    if not isinstance(texts, list):
        texts = [texts]

    # Get the embedding of the text
    embedding = openai_client.embeddings.create(
        input=texts,
        model=model,
        extra_body={
            "encoding_format":"float" if not flatten else "base64",
        },
    )

    result = []
    for embed in embedding.data:
        result.append(embed.embedding) # embed.embedding is a list[float] in case of flatten, else: list[list[float]]
    return result

def load_images(image_paths):
    """
    Load images from a list of paths and return a list of PIL image objects.

    Args:
        image_paths (list): List of image paths.

    Returns:
        list: List of PIL image objects.
    """
    images = []
    for path in image_paths:
        try:
            img = Image.open(path)
            images.append(img)
        except Exception as e:
            logger.error(f"Error loading image at path {path}: {str(e)}")
    return images
    

def process_pdf(pdf_path: str, output_folder: str, thread_count=1):
    result_image_paths = []

    with tempfile.TemporaryDirectory() as temp_dir:
        images = convert_from_path(pdf_path, dpi=200, output_folder=temp_dir, thread_count=thread_count)

    # for page_num, image in enumerate(images):
    #     image_filename = f"{str(uuid.uuid4())}.png"
    #     image_path = os.path.join(output_folder, image_filename)
    #     image.save(image_path, "PNG")
    #     result_image_paths.append(image_path)
    
    # del images
    # return result_image_paths
    return images


def pdf_folder_to_images(pdf_folder: str, output_folder: str, process_count: int = 2):
    try:
        if process_count is None:
            process_count = os.cpu_count()

        pdf_files = [os.path.join(pdf_folder, f) for f in os.listdir(pdf_folder)
                     if f.lower().endswith('.pdf')]
        
        # Create a list of tuples containing (pdf_file, output_folder)
        args = [(pdf_file, output_folder) for pdf_file in pdf_files]
        
        with Pool(process_count) as pool:
            all_images = pool.starmap(process_pdf, args)
        
        result = [img for sublist in all_images for img in sublist]

        logger.debug(f"Number of pdfs processed: {len(all_images)} - Number of images: {len(result)}")
        return result
    except Exception as e:
        logger.exception(f"Error during processing pdf: {e}")