import os import fitz # PyMuPDF for PDFs import pytesseract from PIL import Image import io from flask import Flask, request, jsonify from mistralai.client import MistralClient from mistralai.models.chat_completion import ChatMessage # Initialize Flask app app = Flask(__name__) # Set Mistral API Key os.environ["MISTRAL_API_KEY"] = "your_api_key_here" client = MistralClient(api_key=os.getenv("MISTRAL_API_KEY")) # Set Tesseract Path for Windows (if needed) # pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe" def extract_text_from_pdf(pdf_path): """Extract text from PDFs, using OCR for scanned pages.""" doc = fitz.open(pdf_path) text = "" for page in doc: extracted_text = page.get_text("text") # If no text, apply OCR (for scanned PDFs) if not extracted_text.strip(): pix = page.get_pixmap() img = Image.open(io.BytesIO(pix.tobytes())) extracted_text = pytesseract.image_to_string(img) text += extracted_text + "\n" return text def query_mistral(pdf_text, user_query): """Send extracted text and user query to Mistral AI.""" messages = [ ChatMessage(role="system", content="You are an AI that answers questions based on PDFs."), ChatMessage(role="user", content=f"Document content: {pdf_text[:3000]}... (truncated)"), ChatMessage(role="user", content=f"User question: {user_query}") ] response = client.chat(model="mistral-7b", messages=messages) return response.choices[0].message.content @app.route("/upload", methods=["POST"]) def upload_pdf(): if "file" not in request.files: return jsonify({"error": "No file uploaded"}), 400 file = request.files["file"] pdf_path = "uploaded.pdf" file.save(pdf_path) # Extract text pdf_text = extract_text_from_pdf(pdf_path) return jsonify({"message": "PDF uploaded and processed", "text": pdf_text[:500]}) # Preview @app.route("/chat", methods=["POST"]) def chat(): data = request.json user_query = data.get("query", "") pdf_text = extract_text_from_pdf("uploaded.pdf") response = query_mistral(pdf_text, user_query) return jsonify({"response": response}) if __name__ == "__main__": app.run(debug=True)