|
import os |
|
import fitz |
|
import pytesseract |
|
from PIL import Image |
|
import io |
|
from flask import Flask, request, jsonify |
|
from mistralai.client import MistralClient |
|
from mistralai.models.chat_completion import ChatMessage |
|
|
|
|
|
app = Flask(__name__) |
|
|
|
|
|
os.environ["MISTRAL_API_KEY"] = "your_api_key_here" |
|
client = MistralClient(api_key=os.getenv("MISTRAL_API_KEY")) |
|
|
|
|
|
|
|
|
|
def extract_text_from_pdf(pdf_path): |
|
"""Extract text from PDFs, using OCR for scanned pages.""" |
|
doc = fitz.open(pdf_path) |
|
text = "" |
|
|
|
for page in doc: |
|
extracted_text = page.get_text("text") |
|
|
|
|
|
if not extracted_text.strip(): |
|
pix = page.get_pixmap() |
|
img = Image.open(io.BytesIO(pix.tobytes())) |
|
extracted_text = pytesseract.image_to_string(img) |
|
|
|
text += extracted_text + "\n" |
|
|
|
return text |
|
|
|
def query_mistral(pdf_text, user_query): |
|
"""Send extracted text and user query to Mistral AI.""" |
|
messages = [ |
|
ChatMessage(role="system", content="You are an AI that answers questions based on PDFs."), |
|
ChatMessage(role="user", content=f"Document content: {pdf_text[:3000]}... (truncated)"), |
|
ChatMessage(role="user", content=f"User question: {user_query}") |
|
] |
|
|
|
response = client.chat(model="mistral-7b", messages=messages) |
|
return response.choices[0].message.content |
|
|
|
@app.route("/upload", methods=["POST"]) |
|
def upload_pdf(): |
|
if "file" not in request.files: |
|
return jsonify({"error": "No file uploaded"}), 400 |
|
|
|
file = request.files["file"] |
|
pdf_path = "uploaded.pdf" |
|
file.save(pdf_path) |
|
|
|
|
|
pdf_text = extract_text_from_pdf(pdf_path) |
|
return jsonify({"message": "PDF uploaded and processed", "text": pdf_text[:500]}) |
|
|
|
@app.route("/chat", methods=["POST"]) |
|
def chat(): |
|
data = request.json |
|
user_query = data.get("query", "") |
|
pdf_text = extract_text_from_pdf("uploaded.pdf") |
|
response = query_mistral(pdf_text, user_query) |
|
return jsonify({"response": response}) |
|
|
|
if __name__ == "__main__": |
|
app.run(debug=True) |
|
|