Spaces:

physician-ai
/

doc-scan-openai

Running

App Files Files Community

neuralleap commited on 7 days ago

Commit

3ca07d3

verified ·

1 Parent(s): c16556f

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -25

app.py CHANGED Viewed

@@ -3,12 +3,13 @@ import openai
 import base64
 import io
 from PIL import Image
 import os
-# Use Hugging Face Secrets to hide API Key
 openai.api_key = os.getenv("OPENAI_API_KEY")
-# Prompt definition
 prompt = """
 You are analyzing a medical document or an application form from a patient.
 Extract the following fields as JSON:
@@ -26,31 +27,45 @@ Extract the following fields as JSON:
 - Phone number (mobile)
 """
-def process_image(image: Image.Image):
-    buffered = io.BytesIO()
-    image.save(buffered, format="JPEG")
-    base64_image = base64.b64encode(buffered.getvalue()).decode()
-    response = openai.chat.completions.create(
-        model="gpt-4o",
-        messages=[
-            {"role": "user", "content": [
-                {"type": "text", "text": prompt},
-                {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
-            ]}
-        ],
-        max_tokens=1000
-    )
-    return response.choices[0].message.content
-# Gradio interface
 demo = gr.Interface(
-    fn=process_image,
-    inputs=gr.Image(type="pil"),
     outputs="textbox",
-    title="Healthelic Form Data Extractor (Doc Scanner) - OpenAI gpt 4o",
-    description="Upload a scanned medical form to extract key fields."
 )
 if __name__ == "__main__":

 import base64
 import io
 from PIL import Image
+import fitz  # PyMuPDF
 import os
+# Load API key
 openai.api_key = os.getenv("OPENAI_API_KEY")
+# Prompt for extraction
 prompt = """
 You are analyzing a medical document or an application form from a patient.
 Extract the following fields as JSON:
 - Phone number (mobile)
 """
+def process_pdf(pdf_file):
+    doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
+    results = []
+    for page_num in range(len(doc)):
+        page = doc.load_page(page_num)
+        pix = page.get_pixmap(dpi=200)  # Adjust DPI if needed
+        # Convert pixmap to PIL Image
+        image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+        # Convert to base64 JPEG
+        buffered = io.BytesIO()
+        image.save(buffered, format="JPEG")
+        base64_image = base64.b64encode(buffered.getvalue()).decode()
+        # Send to GPT-4o
+        response = openai.chat.completions.create(
+            model="gpt-4o",
+            messages=[
+                {"role": "user", "content": [
+                    {"type": "text", "text": prompt},
+                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
+                ]}
+            ],
+            max_tokens=1000
+        )
+        results.append(response.choices[0].message.content.strip())
+    return "\n\n---\n\n".join(results)
+# Gradio UI
 demo = gr.Interface(
+    fn=process_pdf,
+    inputs=gr.File(type="binary", label="Upload PDF Form"),
     outputs="textbox",
+    title="Healthelic Form Data Extractor (PDF Scanner) - OpenAI GPT-4o",
+    description="Upload a scanned medical form in PDF format to extract key fields using GPT-4o vision model."
 )
 if __name__ == "__main__":