gemma-3-chat-api

Sleeping

Pamudu13 commited on Apr 3

Commit

a0d55b9

verified ·

1 Parent(s): a3dcdff

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -24,10 +24,11 @@ index = faiss.IndexFlatL2(vector_dim)  # FAISS index
 documents = []  # Store extracted text
-def extract_text_from_pdf(pdf_path):
-    """Extracts text from PDF"""
-    doc = fitz.open(pdf_path)
     text_chunks = [page.get_text("text") for page in doc]
     return text_chunks
 def create_vector_db(text_chunks):
@@ -96,26 +97,30 @@ def index():
     """Serve the HTML page for the user interface"""
     return render_template('index.html')
-UPLOAD_FOLDER = "/tmp/uploaded_files"
-os.makedirs(UPLOAD_FOLDER, exist_ok=True)  # Ensure the folder exists
 @app.route('/upload_pdf', methods=['POST'])
 def upload_pdf():
     """Handle PDF upload"""
     if 'pdf' not in request.files:
-        return jsonify({"error": "No file part"}), 400  # Handle missing file
     file = request.files['pdf']
     if file.filename == "":
-        return jsonify({"error": "No selected file"}), 400  # Handle empty filename
-    pdf_path = os.path.join(UPLOAD_FOLDER, file.filename)
     try:
-        file.save(pdf_path)  # Save the uploaded PDF
-        # Extract text and create vector database
-        text_chunks = extract_text_from_pdf(pdf_path)
         create_vector_db(text_chunks)
         return jsonify({"message": "PDF uploaded and indexed successfully!"}), 200

 documents = []  # Store extracted text
+def extract_text_from_pdf(pdf_stream):
+    """Extracts text from PDF stream"""
+    doc = fitz.open(stream=pdf_stream, filetype="pdf")
     text_chunks = [page.get_text("text") for page in doc]
+    doc.close()
     return text_chunks
 def create_vector_db(text_chunks):
     """Serve the HTML page for the user interface"""
     return render_template('index.html')
 @app.route('/upload_pdf', methods=['POST'])
 def upload_pdf():
     """Handle PDF upload"""
     if 'pdf' not in request.files:
+        return jsonify({"error": "No file part"}), 400
     file = request.files['pdf']
     if file.filename == "":
+        return jsonify({"error": "No selected file"}), 400
     try:
+        # Read the file directly into memory instead of saving to disk
+        pdf_stream = file.read()
+        # Create a BytesIO object to work with the PDF in memory
+        from io import BytesIO
+        pdf_stream = BytesIO(pdf_stream)
+        # Use fitz to open the PDF from memory
+        doc = fitz.open(stream=pdf_stream, filetype="pdf")
+        text_chunks = [page.get_text("text") for page in doc]
+        doc.close()
+        # Create vector database
         create_vector_db(text_chunks)
         return jsonify({"message": "PDF uploaded and indexed successfully!"}), 200