Spaces:

physician-ai
/

doc-scan-gemini

Sleeping

App Files Files Community

neuralleap commited on May 30

Commit

54f6bea

verified ·

1 Parent(s): b5a8842

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -22

app.py CHANGED Viewed

@@ -3,16 +3,14 @@ import google.generativeai as genai
 import base64
 import io
 from PIL import Image
 import os
-import json
-# Configure Google Cloud credentials (replace with your actual API key or setup)
 genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
-# Select the Gemini Pro Vision model
 model = genai.GenerativeModel('gemini-1.5-flash')
-# Prompt definition
 prompt = """
 You are analyzing a medical document or an application form from patient.
 Extract the following fields as JSON:
@@ -30,28 +28,43 @@ Extract the following fields as JSON:
 - Phone number (mobile)
 """
-def process_image(image: Image.Image):
-    buffered = io.BytesIO()
-    image.save(buffered, format="JPEG")
-    base64_image = buffered.getvalue()
-    response = model.generate_content([
-        prompt,
-        {
-            "mime_type": "image/jpeg",
-            "data": base64_image
-        }
-    ])
-    return response.text
 # Gradio interface
 demo = gr.Interface(
-    fn=process_image,
-    inputs=gr.Image(type="pil"),
     outputs="textbox",
-    title="Healthelic Form Data Extractor (Doc Scanner) - Gemini 1.5-flash",
-    description="Upload a scanned medical form to extract key fields."
 )
 if __name__ == "__main__":
-    demo.launch()

 import base64
 import io
 from PIL import Image
+import fitz  # PyMuPDF
 import os
+# Configure Gemini API
 genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
 model = genai.GenerativeModel('gemini-1.5-flash')
+# Prompt for Gemini
 prompt = """
 You are analyzing a medical document or an application form from patient.
 Extract the following fields as JSON:
 - Phone number (mobile)
 """
+def process_pdf(pdf_bytes: bytes):
+    doc = fitz.open(stream=pdf_bytes, filetype="pdf")
+    results = []
+    for page_num in range(len(doc)):
+        page = doc.load_page(page_num)
+        pix = page.get_pixmap(dpi=200)
+        # Convert to PIL image
+        image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+        # Convert to JPEG bytes
+        buffered = io.BytesIO()
+        image.save(buffered, format="JPEG")
+        jpeg_bytes = buffered.getvalue()
+        # Send to Gemini
+        response = model.generate_content([
+            prompt,
+            {
+                "mime_type": "image/jpeg",
+                "data": jpeg_bytes
+            }
+        ])
+        results.append(response.text.strip())
+    return "\n\n---\n\n".join(results)
 # Gradio interface
 demo = gr.Interface(
+    fn=process_pdf,
+    inputs=gr.File(type="binary", label="Upload PDF Form"),
     outputs="textbox",
+    title="Healthelic Form Data Extractor (PDF Scanner) - Gemini 1.5 Flash",
+    description="Upload a scanned medical form in PDF format to extract key fields using Gemini 1.5 Flash."
 )
 if __name__ == "__main__":
+    demo.launch()