Spaces:

MJobe
/

document-vqa-v2

Running

App Files Files Community

MJobe commited on Dec 17, 2023

Commit

1106695

1 Parent(s): 6911b84

Update main.py

Browse files

Files changed (1) hide show

main.py +28 -3

main.py CHANGED Viewed

@@ -74,12 +74,33 @@ async def pdf_question_answering(
         # Read the uploaded file as bytes
         contents = await file.read()
-        # Convert PDF to images
-        images = convert_from_bytes(contents)
         # Perform document question answering for each image
         answers_dict = {}
-        for idx, image in enumerate(images):
             for question in questions.split(','):
                 result = nlp_qa(
                     image,
@@ -89,6 +110,10 @@ async def pdf_question_answering(
                 formatted_question = f"{question.strip('[]')} (Page {idx + 1})"
                 answers_dict[formatted_question] = answer
         return answers_dict
     except Exception as e:

         # Read the uploaded file as bytes
         contents = await file.read()
+        # Save the PDF bytes to a temporary file
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
+            temp_pdf.write(contents)
+            temp_pdf_path = temp_pdf.name
+        # Initialize an empty list to store image bytes
+        images = []
+        # Use PyMuPDF to process the PDF and convert each page to an image
+        pdf_document = fitz.open(temp_pdf_path)
+        for page_num in range(pdf_document.page_count):
+            page = pdf_document.load_page(page_num)
+            print(f"Converting page {page_num + 1} to image...")
+            # Convert the page to an image
+            image = Image.frombytes("RGB", page.get_size(), page.get_pixmap().samples)
+            # Convert the image to bytes
+            img_byte_array = BytesIO()
+            image.save(img_byte_array, format='PNG')
+            images.append(img_byte_array.getvalue())
         # Perform document question answering for each image
         answers_dict = {}
+        for idx, image_bytes in enumerate(images):
+            image = Image.open(BytesIO(image_bytes))
             for question in questions.split(','):
                 result = nlp_qa(
                     image,
                 formatted_question = f"{question.strip('[]')} (Page {idx + 1})"
                 answers_dict[formatted_question] = answer
+        # Delete the temporary PDF file
+        temp_pdf.close()
+        os.remove(temp_pdf_path)
         return answers_dict
     except Exception as e: