import gradio as gr
import fitz  # PyMuPDF for PDF text extraction
import faiss  # FAISS for vector search
import numpy as np
from sentence_transformers import SentenceTransformer
from huggingface_hub import InferenceClient
from typing import List, Tuple
from fastapi import FastAPI, Query
import uvicorn

# Default settings
class ChatConfig:
    MODEL = "google/gemma-3-27b-it"
    DEFAULT_SYSTEM_MSG = "You are an AI assistant answering only based on the uploaded PDF."
    DEFAULT_MAX_TOKENS = 512
    DEFAULT_TEMP = 0.3
    DEFAULT_TOP_P = 0.95

client = InferenceClient(ChatConfig.MODEL)
embed_model = SentenceTransformer("all-MiniLM-L6-v2")  # Lightweight embedding model
vector_dim = 384  # Embedding size
index = faiss.IndexFlatL2(vector_dim)  # FAISS index

documents = []  # Store extracted text

def extract_text_from_pdf(pdf_path):
    """Extracts text from PDF"""
    doc = fitz.open(pdf_path)
    text_chunks = [page.get_text("text") for page in doc]
    return text_chunks

def create_vector_db(text_chunks):
    """Embeds text chunks and adds them to FAISS index"""
    global documents, index
    documents = text_chunks
    embeddings = embed_model.encode(text_chunks)
    index.add(np.array(embeddings, dtype=np.float32))

def search_relevant_text(query):
    """Finds the most relevant text chunk for the given query"""
    query_embedding = embed_model.encode([query])
    _, closest_idx = index.search(np.array(query_embedding, dtype=np.float32), k=3)
    return "\n".join([documents[i] for i in closest_idx[0]])

def generate_response_sync(message: str) -> str:
    """Generates response synchronously for FastAPI"""
    if not documents:
        return "Please upload a PDF first."

    context = search_relevant_text(message)  # Get relevant content from PDF
    messages = [
        {"role": "system", "content": ChatConfig.DEFAULT_SYSTEM_MSG},
        {"role": "user", "content": f"Context: {context}\nQuestion: {message}"}
    ]

    response = ""
    for chunk in client.chat_completion(
        messages,
        max_tokens=ChatConfig.DEFAULT_MAX_TOKENS,
        stream=True,
        temperature=ChatConfig.DEFAULT_TEMP,
        top_p=ChatConfig.DEFAULT_TOP_P,
    ):
        token = chunk.choices[0].delta.content or ""
        response += token

    return response

def handle_upload(pdf_file):
    """Handles PDF upload and creates vector DB"""
    text_chunks = extract_text_from_pdf(pdf_file.name)
    create_vector_db(text_chunks)
    return "PDF uploaded and indexed successfully!"

def create_interface() -> gr.Blocks:
    """Creates the Gradio interface"""
    with gr.Blocks() as interface:
        gr.Markdown("# PDF-Based Chatbot using Google Gemma")
        
        with gr.Row():
            chatbot = gr.Chatbot(label="Chat with Your PDF", type="messages")
            pdf_upload = gr.File(label="Upload PDF", type="filepath")
        
        with gr.Row():
            user_input = gr.Textbox(label="Ask a question", placeholder="Type here...")
            send_button = gr.Button("Send")
        
        output = gr.Textbox(label="Response", lines=5)

        # Upload PDF handler
        pdf_upload.change(handle_upload, inputs=[pdf_upload], outputs=[])

        # Chat function
        send_button.click(
            generate_response_sync, 
            inputs=[user_input], 
            outputs=[output]
        )

    return interface

# FastAPI Integration
app = FastAPI()

@app.get("/chat")
def chat_with_pdf(msg: str = Query(..., title="User Message")):
    """API endpoint to receive a message and return AI response"""
    response = generate_response_sync(msg)
    return {"response": response}

def run_gradio():
    gradio_app = create_interface()
    gradio_app.launch(server_name="0.0.0.0", server_port=7860, share=True)

if __name__ == "__main__":
    # Start Gradio in a separate thread
    gradio_thread = threading.Thread(target=run_gradio, daemon=True)
    gradio_thread.start()

    # Run FastAPI with Uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000