import pandas as pd
import numpy as np
import faiss
import gradio as gr
from sentence_transformers import SentenceTransformer

# Load the embeddings from the embeddings CSV file
embeddings_csv_path = 'df_after_rec_embedding.csv'  # Path to the embeddings CSV
embeddings = pd.read_csv(embeddings_csv_path).to_numpy().astype('float32')

# Load the content from the content CSV file
content_csv_path = 'content.csv'  # Path to the content CSV
content_df = pd.read_csv(content_csv_path)  # Load the entire content DataFrame
content_column = content_df['2'].tolist()  # Extract the content from column '2'

# Create a FAISS index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)  # L2 distance metric
index.add(embeddings)  # Add embeddings to the index

# Load the nomic-ai/nomic-embed-text-v1 model
model = SentenceTransformer('nomic-ai/nomic-embed-text-v1', trust_remote_code=True)

# Function to embed query and search using FAISS
def search(query):
    # Embed the query using the model
    query_vector = model.encode([query])[0].astype('float32')
    
    # Search the FAISS index
    distances, indices = index.search(np.array([query_vector]), k=1)  # Search for top 5 closest vectors
    
    # Return corresponding content for the top results using the indices
    results = [f"Content: {content_column[i]}\nDistance: {d:.4f}" for i, d in zip(indices[0], distances[0])]
    return "\n\n".join(results)

# Create the Gradio interface
def gradio_app():
    with gr.Blocks() as demo:
        gr.Markdown("## FAISS Search Interface with Nomic Embedder")
        
        with gr.Row():
            with gr.Column():
                query_input = gr.Textbox(
                    label="Search Query",
                    placeholder="Type your search query here"
                )
                search_button = gr.Button("Search")
            
            with gr.Column():
                search_results = gr.Textbox(label="Search Results", lines=10)
        
        search_button.click(
            fn=search,
            inputs=[query_input],
            outputs=[search_results]
        )
        
    return demo

# Launch the Gradio app
demo = gradio_app()
demo.launch()