from fastapi import FastAPI, Query
from fastapi.responses import FileResponse, JSONResponse
import uvicorn
import fitz  # PyMuPDF for PDF text extraction
import faiss  # FAISS for vector search
import numpy as np
from sentence_transformers import SentenceTransformer
from huggingface_hub import InferenceClient
import os

# Default settings
class ChatConfig:
    MODEL = "google/gemma-3-27b-it"
    DEFAULT_SYSTEM_MSG = "You are an AI assistant answering only based on the uploaded PDF."
    DEFAULT_MAX_TOKENS = 512
    DEFAULT_TEMP = 0.3
    DEFAULT_TOP_P = 0.95

HF_TOKEN = os.getenv("HF_TOKEN")  # Fetch from environment variables

client = InferenceClient(ChatConfig.MODEL, token=HF_TOKEN)
embed_model = SentenceTransformer("all-MiniLM-L6-v2")  # Lightweight embedding model
vector_dim = 384  # Embedding size
index = faiss.IndexFlatL2(vector_dim)  # FAISS index

documents = []  # Store extracted text

app = FastAPI()

@app.get("/")
def serve_homepage():
    """Serves the HTML interface."""
    return FileResponse("index.html")

@app.post("/upload_pdf/")
async def upload_pdf(file_path: str):
    """Handles PDF file processing."""
    global documents

    # Extract text from PDF
    doc = fitz.open(file_path)
    text_chunks = [page.get_text("text") for page in doc]
    
    # Create vector database
    documents = text_chunks
    embeddings = embed_model.encode(text_chunks)
    index.add(np.array(embeddings, dtype=np.float32))

    return JSONResponse({"message": "PDF uploaded and indexed successfully!"})

@app.get("/chat/")
def chat_with_pdf(msg: str = Query(..., title="User Message")):
    """Handles user queries and returns AI-generated responses."""
    if not documents:
        return JSONResponse({"response": "Please upload a PDF first."})

    # Retrieve relevant context
    query_embedding = embed_model.encode([msg])
    _, closest_idx = index.search(np.array(query_embedding, dtype=np.float32), k=3)
    context = "\n".join([documents[i] for i in closest_idx[0]])

    # Generate AI response
    messages = [
        {"role": "system", "content": ChatConfig.DEFAULT_SYSTEM_MSG},
        {"role": "user", "content": f"Context: {context}\nQuestion: {msg}"}
    ]

    response_text = ""
    for chunk in client.chat_completion(
        messages,
        max_tokens=ChatConfig.DEFAULT_MAX_TOKENS,
        stream=True,
        temperature=ChatConfig.DEFAULT_TEMP,
        top_p=ChatConfig.DEFAULT_TOP_P,
    ):
        token = chunk.choices[0].delta.content or ""
        response_text += token

    return JSONResponse({"response": response_text})

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)