Spaces:
Running
Running
import gradio as gr | |
import openai | |
import base64 | |
import io | |
from PIL import Image | |
import fitz # PyMuPDF | |
import os | |
# Load API key | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
# Prompt for extraction | |
prompt = """ | |
You are analyzing a medical document or an application form from a patient. | |
Extract the following fields as JSON: | |
- Position applied for | |
- Office/Ministry | |
- Duty station | |
- First name(s) | |
- Surname | |
- Date of birth | |
- Gender | |
- Citizenship | |
- Postal Address | |
- Residential Address | |
- Phone number (mobile) | |
""" | |
def process_pdf(pdf_file): | |
# pdf_file is already bytes when using gr.File(type="binary") | |
doc = fitz.open(stream=pdf_file, filetype="pdf") | |
results = [] | |
for page_num in range(len(doc)): | |
page = doc.load_page(page_num) | |
pix = page.get_pixmap(dpi=200) # Use 150-200 DPI for balance | |
# Convert to PIL Image | |
image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) | |
# Convert to base64 JPEG | |
buffered = io.BytesIO() | |
image.save(buffered, format="JPEG") | |
base64_image = base64.b64encode(buffered.getvalue()).decode() | |
# Send to GPT-4o | |
response = openai.chat.completions.create( | |
model="gpt-4o", | |
messages=[ | |
{"role": "user", "content": [ | |
{"type": "text", "text": prompt}, | |
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}} | |
]} | |
], | |
max_tokens=1000 | |
) | |
results.append(response.choices[0].message.content.strip()) | |
return "\n\n---\n\n".join(results) | |
# Gradio UI | |
demo = gr.Interface( | |
fn=process_pdf, | |
inputs=gr.File(type="binary", label="Upload PDF Form"), | |
outputs="textbox", | |
title="Healthelic Form Data Extractor (PDF Scanner) - OpenAI GPT-4o", | |
description="Upload a scanned medical form in PDF format to extract key fields using GPT-4o vision model." | |
) | |
if __name__ == "__main__": | |
demo.launch() | |