from fastapi import FastAPI, Query from fastapi.responses import FileResponse, JSONResponse import uvicorn import fitz # PyMuPDF for PDF text extraction import faiss # FAISS for vector search import numpy as np from sentence_transformers import SentenceTransformer from huggingface_hub import InferenceClient import os # Default settings class ChatConfig: MODEL = "google/gemma-3-27b-it" DEFAULT_SYSTEM_MSG = "You are an AI assistant answering only based on the uploaded PDF." DEFAULT_MAX_TOKENS = 512 DEFAULT_TEMP = 0.3 DEFAULT_TOP_P = 0.95 HF_TOKEN = os.getenv("HF_TOKEN") # Fetch from environment variables client = InferenceClient(ChatConfig.MODEL, token=HF_TOKEN) embed_model = SentenceTransformer("all-MiniLM-L6-v2") # Lightweight embedding model vector_dim = 384 # Embedding size index = faiss.IndexFlatL2(vector_dim) # FAISS index documents = [] # Store extracted text app = FastAPI() @app.get("/") def serve_homepage(): """Serves the HTML interface.""" return FileResponse("index.html") @app.post("/upload_pdf/") async def upload_pdf(file_path: str): """Handles PDF file processing.""" global documents # Extract text from PDF doc = fitz.open(file_path) text_chunks = [page.get_text("text") for page in doc] # Create vector database documents = text_chunks embeddings = embed_model.encode(text_chunks) index.add(np.array(embeddings, dtype=np.float32)) return JSONResponse({"message": "PDF uploaded and indexed successfully!"}) @app.get("/chat/") def chat_with_pdf(msg: str = Query(..., title="User Message")): """Handles user queries and returns AI-generated responses.""" if not documents: return JSONResponse({"response": "Please upload a PDF first."}) # Retrieve relevant context query_embedding = embed_model.encode([msg]) _, closest_idx = index.search(np.array(query_embedding, dtype=np.float32), k=3) context = "\n".join([documents[i] for i in closest_idx[0]]) # Generate AI response messages = [ {"role": "system", "content": ChatConfig.DEFAULT_SYSTEM_MSG}, {"role": "user", "content": f"Context: {context}\nQuestion: {msg}"} ] response_text = "" for chunk in client.chat_completion( messages, max_tokens=ChatConfig.DEFAULT_MAX_TOKENS, stream=True, temperature=ChatConfig.DEFAULT_TEMP, top_p=ChatConfig.DEFAULT_TOP_P, ): token = chunk.choices[0].delta.content or "" response_text += token return JSONResponse({"response": response_text}) if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=8000)