import gradio as gr
import numpy as np
from sklearn.metrics.pairwise import euclidean_distances
import cv2
from keras.models import load_model
from keras.models import Model
from datasets import load_dataset
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from huggingface_hub import hf_hub_download

# Download and load model and encoded images
model_path = hf_hub_download(repo_id="eybro/autoencoder", filename="autoencoder_model.keras", repo_type='model')
data_path = hf_hub_download(repo_id="eybro/encoded_images", filename="X_encoded_compressed.npy", repo_type='dataset')

autoencoder = load_model(model_path)
encoded_images = np.load(data_path)

# Load and split dataset
dataset = load_dataset("eybro/images")
split_dataset = dataset['train'].train_test_split(test_size=0.2, seed=42)  # 80% train, 20% test
dataset['train'] = split_dataset['train']
dataset['test'] = split_dataset['test']

# Example images
example_images = {
    "Example 1": "examples/example_1.png",
}

def create_url_from_title(title: str, timestamp: int):
    video_urls = load_dataset("eybro/video_urls")
    df = video_urls['train'].to_pandas()
    print(df.to_string())
    filtered = df[df['title'] == title]
    print(filtered)
    base_url = filtered.iloc[0, :]["url"]
    return base_url + f"&t={timestamp}s"

def find_nearest_neighbors(encoded_images, input_image, top_n=5):
    """
    Find the closest neighbors to the input image in the encoded image space.
    Args:
    encoded_images (np.ndarray): Array of encoded images (shape: (n_samples, n_features)).
    input_image (np.ndarray): The encoded input image (shape: (1, n_features)).
    top_n (int): The number of nearest neighbors to return.
    Returns:
    List of tuples: (index, distance) of the top_n nearest neighbors.
    """
    # Compute pairwise distances
    distances = euclidean_distances(encoded_images, input_image.reshape(1, -1)).flatten()

    # Sort by distance
    nearest_neighbors = np.argsort(distances)[:top_n]
    return [(index, distances[index]) for index in nearest_neighbors]

def get_image(index):
    split = len(dataset["train"])
    if index < split:
        return dataset["train"][index]
    else:
        return dataset["test"][index-split]

def process_image(image):
    img = np.array(image)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  
    img = cv2.resize(img, (64, 64))  
    img = img.astype('float32')  
    img /= 255.0
    img = np.expand_dims(img, axis=0)

    layer_model = Model(inputs=autoencoder.input, outputs=autoencoder.layers[4].output)

    encoded_array = layer_model.predict(img) 

    pooled_array = encoded_array.max(axis=-1)
    return pooled_array  # Shape: (1, n_features)
    
def inference(image):
    input_image = process_image(image)

    nearest_neighbors = find_nearest_neighbors(encoded_images, input_image, top_n=5)
    
    # Print the results
    print("Nearest neighbors (index, distance):")
    for neighbor in nearest_neighbors:
        print(neighbor)
    
    top4 = [int(i[0]) for i in nearest_neighbors[:4]]
    print(f"top 4: {top4}")
    
    for i in top4:
      im = get_image(i)
      print(im["label"], im["timestamp"])

    result_image = get_image(top4[0])
    url = create_url_from_title(result_image['label'], result_image['timestamp'])
    result = f"{result_image['label']} {result_image['timestamp']} \n{url}"
    
    n=2
    plt.figure(figsize=(8, 8))
    for i, (image1, image2) in enumerate(zip(top4[:2], top4[2:])):
        ax = plt.subplot(2, n, i + 1)
        image1 = get_image(image1)["image"]
        image2 = get_image(image2)["image"]
    
        plt.imshow(image1)
        plt.gray()
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
    
        ax = plt.subplot(2, n, i + 1 + n)
        plt.imshow(image2)
        plt.gray()
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
    
    return result

def load_example(example_name):
    if example_name in example_images:
        image_path = example_images[example_name]
        image = Image.open(image_path)
        return image
    return None
    
with gr.Blocks() as demo:
    gr.Markdown(
        """
        # Image to Video App
        Find your favorite Gordon Ramasay scene by uploading an image from the scene, the app will thereafter find a corresponding youtube video for that scene. 
        Or try one of our examples (unseen data for the model).
        """
    )
    with gr.Row():
        with gr.Column():
            inp_image = gr.Image(label="Upload Image")
        with gr.Column():
            example_selection = gr.Gallery(
                value=list(example_images.values()),
                label="Click an Example Image",
            )

    with gr.Row():
        out = gr.Markdown()

    def handle_selection(user_image, selected_example):
        if user_image is not None:
            return inference(user_image)
        elif selected_example is not None:
            image = load_example(selected_example)
            return inference(image)
        else:
            return "Please upload an image or select an example image."

    inputs = [inp_image, example_selection]
    outputs = out
    example_selection.select(handle_selection, inputs, outputs)

if __name__ == "__main__":
    demo.launch()