from flask import Flask, request, jsonify, render_template import fitz # PyMuPDF for PDF text extraction import faiss # FAISS for vector search import numpy as np from sentence_transformers import SentenceTransformer from huggingface_hub import InferenceClient from typing import List, Tuple app = Flask(__name__) # Default settings class ChatConfig: MODEL = "google/gemma-3-27b-it" DEFAULT_SYSTEM_MSG = "You are an AI assistant answering only based on the uploaded PDF." DEFAULT_MAX_TOKENS = 512 DEFAULT_TEMP = 0.3 DEFAULT_TOP_P = 0.95 client = InferenceClient(ChatConfig.MODEL) embed_model = SentenceTransformer("all-MiniLM-L6-v2") # Lightweight embedding model vector_dim = 384 # Embedding size index = faiss.IndexFlatL2(vector_dim) # FAISS index documents = [] # Store extracted text def extract_text_from_pdf(pdf_path): """Extracts text from PDF""" doc = fitz.open(pdf_path) text_chunks = [page.get_text("text") for page in doc] return text_chunks def create_vector_db(text_chunks): """Embeds text chunks and adds them to FAISS index""" global documents, index documents = text_chunks embeddings = embed_model.encode(text_chunks) index.add(np.array(embeddings, dtype=np.float32)) def search_relevant_text(query): """Finds the most relevant text chunk for the given query""" query_embedding = embed_model.encode([query]) _, closest_idx = index.search(np.array(query_embedding, dtype=np.float32), k=3) return "\n".join([documents[i] for i in closest_idx[0]]) def generate_response( message: str, history: List[Tuple[str, str]], system_message: str = ChatConfig.DEFAULT_SYSTEM_MSG, max_tokens: int = ChatConfig.DEFAULT_MAX_TOKENS, temperature: float = ChatConfig.DEFAULT_TEMP, top_p: float = ChatConfig.DEFAULT_TOP_P ) -> str: if not documents: return "Please upload a PDF first." context = search_relevant_text(message) # Get relevant content from PDF messages = [{"role": "system", "content": system_message}] for user_msg, bot_msg in history: if user_msg: messages.append({"role": "user", "content": user_msg}) if bot_msg: messages.append({"role": "assistant", "content": bot_msg}) messages.append({"role": "user", "content": f"Context: {context}\nQuestion: {message}"}) response = "" for chunk in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = chunk.choices[0].delta.content or "" response += token return response @app.route('/') def index(): """Serve the HTML page for the user interface""" return render_template('index.html') @app.route('/upload_pdf', methods=['POST']) def upload_pdf(): """Handle PDF upload""" file = request.files['pdf'] pdf_path = f"uploaded_files/{file.filename}" file.save(pdf_path) # Extract text and create vector database text_chunks = extract_text_from_pdf(pdf_path) create_vector_db(text_chunks) return jsonify({"message": "PDF uploaded and indexed successfully!"}) @app.route('/ask_question', methods=['POST']) def ask_question(): """Handle user question""" message = request.json.get('message') history = request.json.get('history', []) response = generate_response(message, history) return jsonify({"response": response}) if __name__ == '__main__': app.run(debug=True)