import gradio as gr
import pandas as pd
import numpy as np
from transformers import pipeline, BertTokenizer, BertModel
import faiss
import torch
import json
import spaces

# Load CSV data
data = pd.read_csv('RBD10kstats.csv')

# Function to safely convert JSON strings to numpy arrays
def safe_json_loads(x):
    try:
        return np.array(json.loads(x))
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}")
        return np.zeros(128)  # Return a default array of zeros

# Apply the safe_json_loads function to the embedding column
data['embedding'] = data['embedding'].apply(safe_json_loads)

# Filter out any rows with empty embeddings
data = data[data['embedding'].apply(lambda x: x is not None and len(x) > 0)]

# Check if the DataFrame is empty after filtering
if data.empty:
    print("No valid embeddings found in the data. Using default values.")
else:
    # Initialize FAISS index
    dimension = len(data['embedding'].iloc[0])
    res = faiss.StandardGpuResources()  # use a single GPU

    # Check available GPU devices
    num_gpus = faiss.get_num_gpus()
    if num_gpus > 0:
        gpu_index = faiss.IndexFlatL2(dimension)
        gpu_index = faiss.index_cpu_to_gpu(res, 0, gpu_index)  # move to GPU
    else:
        raise RuntimeError("No GPU devices available.")

    gpu_index.add(np.stack(data['embedding'].values))

    # Check if GPU is available
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Load QA model
    qa_model = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad", device=0 if torch.cuda.is_available() else -1)

    # Load BERT model and tokenizer
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    model = BertModel.from_pretrained('bert-base-uncased').to(device)

    # Function to embed the question using BERT
    def embed_question(question, model, tokenizer):
        inputs = tokenizer(question, return_tensors='pt').to(device)
        with torch.no_grad():
            outputs = model(**inputs)
        return outputs.last_hidden_state.mean(dim=1).cpu().numpy()

    # Function to retrieve the relevant document and generate a response
    @spaces.GPU(duration=120)
    def retrieve_and_generate(question):
        # Embed the question
        question_embedding = embed_question(question, model, tokenizer)
        
        # Search in FAISS index
        _, indices = gpu_index.search(question_embedding, k=1)
        
        # Retrieve the most relevant document
        relevant_doc = data.iloc[indices[0][0]]
        
        # Use the QA model to generate the answer
        context = relevant_doc['Abstract']
        response = qa_model(question=question, context=context)
        
        return response['answer']

    # Create a Gradio interface
    interface = gr.Interface(
        fn=retrieve_and_generate,
        inputs=gr.Textbox(lines=2, placeholder="Ask a question about the documents..."),
        outputs="text",
        title="RAG Chatbot",
        description="Ask questions about the documents in the CSV file."
    )

    # Launch the Gradio app
    interface.launch()