import gradio as gr import fitz # PyMuPDF for PDF text extraction import faiss # FAISS for vector search import numpy as np from sentence_transformers import SentenceTransformer from huggingface_hub import InferenceClient from typing import List, Tuple from fastapi import FastAPI, Query import uvicorn # Default settings class ChatConfig: MODEL = "google/gemma-3-27b-it" DEFAULT_SYSTEM_MSG = "You are an AI assistant answering only based on the uploaded PDF." DEFAULT_MAX_TOKENS = 512 DEFAULT_TEMP = 0.3 DEFAULT_TOP_P = 0.95 client = InferenceClient(ChatConfig.MODEL) embed_model = SentenceTransformer("all-MiniLM-L6-v2") # Lightweight embedding model vector_dim = 384 # Embedding size index = faiss.IndexFlatL2(vector_dim) # FAISS index documents = [] # Store extracted text def extract_text_from_pdf(pdf_path): """Extracts text from PDF""" doc = fitz.open(pdf_path) text_chunks = [page.get_text("text") for page in doc] return text_chunks def create_vector_db(text_chunks): """Embeds text chunks and adds them to FAISS index""" global documents, index documents = text_chunks embeddings = embed_model.encode(text_chunks) index.add(np.array(embeddings, dtype=np.float32)) def search_relevant_text(query): """Finds the most relevant text chunk for the given query""" query_embedding = embed_model.encode([query]) _, closest_idx = index.search(np.array(query_embedding, dtype=np.float32), k=3) return "\n".join([documents[i] for i in closest_idx[0]]) def generate_response_sync(message: str) -> str: """Generates response synchronously for FastAPI""" if not documents: return "Please upload a PDF first." context = search_relevant_text(message) # Get relevant content from PDF messages = [ {"role": "system", "content": ChatConfig.DEFAULT_SYSTEM_MSG}, {"role": "user", "content": f"Context: {context}\nQuestion: {message}"} ] response = "" for chunk in client.chat_completion( messages, max_tokens=ChatConfig.DEFAULT_MAX_TOKENS, stream=True, temperature=ChatConfig.DEFAULT_TEMP, top_p=ChatConfig.DEFAULT_TOP_P, ): token = chunk.choices[0].delta.content or "" response += token return response def handle_upload(pdf_file): """Handles PDF upload and creates vector DB""" text_chunks = extract_text_from_pdf(pdf_file.name) create_vector_db(text_chunks) return "PDF uploaded and indexed successfully!" def create_interface() -> gr.Blocks: """Creates the Gradio interface""" with gr.Blocks() as interface: gr.Markdown("# PDF-Based Chatbot using Google Gemma") with gr.Row(): chatbot = gr.Chatbot(label="Chat with Your PDF", type="messages") pdf_upload = gr.File(label="Upload PDF", type="filepath") with gr.Row(): user_input = gr.Textbox(label="Ask a question", placeholder="Type here...") send_button = gr.Button("Send") output = gr.Textbox(label="Response", lines=5) # Upload PDF handler pdf_upload.change(handle_upload, inputs=[pdf_upload], outputs=[]) # Chat function send_button.click( generate_response_sync, inputs=[user_input], outputs=[output] ) return interface # FastAPI Integration app = FastAPI() @app.get("/chat") def chat_with_pdf(msg: str = Query(..., title="User Message")): """API endpoint to receive a message and return AI response""" response = generate_response_sync(msg) return {"response": response} def run_gradio(): gradio_app = create_interface() gradio_app.launch(server_name="0.0.0.0", server_port=7860, share=True) if __name__ == "__main__": # Start Gradio in a separate thread gradio_thread = threading.Thread(target=run_gradio, daemon=True) gradio_thread.start() # Run FastAPI with Uvicorn uvicorn.run(app, host="0.0.0.0", port=8000