sdas2485 commited on
Commit
90cf652
·
verified ·
1 Parent(s): 0ecbc19

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import fitz # PyMuPDF for PDFs
3
+ import pytesseract
4
+ from PIL import Image
5
+ import io
6
+ from flask import Flask, request, jsonify
7
+ from mistralai.client import MistralClient
8
+ from mistralai.models.chat_completion import ChatMessage
9
+
10
+ # Initialize Flask app
11
+ app = Flask(__name__)
12
+
13
+ # Set Mistral API Key
14
+ os.environ["MISTRAL_API_KEY"] = "your_api_key_here"
15
+ client = MistralClient(api_key=os.getenv("MISTRAL_API_KEY"))
16
+
17
+ # Set Tesseract Path for Windows (if needed)
18
+ # pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
19
+
20
+ def extract_text_from_pdf(pdf_path):
21
+ """Extract text from PDFs, using OCR for scanned pages."""
22
+ doc = fitz.open(pdf_path)
23
+ text = ""
24
+
25
+ for page in doc:
26
+ extracted_text = page.get_text("text")
27
+
28
+ # If no text, apply OCR (for scanned PDFs)
29
+ if not extracted_text.strip():
30
+ pix = page.get_pixmap()
31
+ img = Image.open(io.BytesIO(pix.tobytes()))
32
+ extracted_text = pytesseract.image_to_string(img)
33
+
34
+ text += extracted_text + "\n"
35
+
36
+ return text
37
+
38
+ def query_mistral(pdf_text, user_query):
39
+ """Send extracted text and user query to Mistral AI."""
40
+ messages = [
41
+ ChatMessage(role="system", content="You are an AI that answers questions based on PDFs."),
42
+ ChatMessage(role="user", content=f"Document content: {pdf_text[:3000]}... (truncated)"),
43
+ ChatMessage(role="user", content=f"User question: {user_query}")
44
+ ]
45
+
46
+ response = client.chat(model="mistral-7b", messages=messages)
47
+ return response.choices[0].message.content
48
+
49
+ @app.route("/upload", methods=["POST"])
50
+ def upload_pdf():
51
+ if "file" not in request.files:
52
+ return jsonify({"error": "No file uploaded"}), 400
53
+
54
+ file = request.files["file"]
55
+ pdf_path = "uploaded.pdf"
56
+ file.save(pdf_path)
57
+
58
+ # Extract text
59
+ pdf_text = extract_text_from_pdf(pdf_path)
60
+ return jsonify({"message": "PDF uploaded and processed", "text": pdf_text[:500]}) # Preview
61
+
62
+ @app.route("/chat", methods=["POST"])
63
+ def chat():
64
+ data = request.json
65
+ user_query = data.get("query", "")
66
+ pdf_text = extract_text_from_pdf("uploaded.pdf")
67
+ response = query_mistral(pdf_text, user_query)
68
+ return jsonify({"response": response})
69
+
70
+ if __name__ == "__main__":
71
+ app.run(debug=True)