Spaces:

physician-ai
/

doc-scan-openai

Running

App Files Files Community

doc-scan-openai / app.py

neuralleap

Update app.py

2fdbea7 verified 6 days ago

raw

history blame contribute delete

2.01 kB

	import gradio as gr
	import openai
	import base64
	import io
	from PIL import Image
	import fitz # PyMuPDF
	import os

	# Load API key
	openai.api_key = os.getenv("OPENAI_API_KEY")

	# Prompt for extraction
	prompt = """
	You are analyzing a medical document or an application form from a patient.
	Extract the following fields as JSON:
	- Position applied for
	- Office/Ministry
	- Duty station
	- First name(s)
	- Surname
	- Date of birth
	- Gender
	- Citizenship
	- Postal Address
	- Residential Address
	- Email
	- Phone number (mobile)
	"""

	def process_pdf(pdf_file):
	# pdf_file is already bytes when using gr.File(type="binary")
	doc = fitz.open(stream=pdf_file, filetype="pdf")
	results = []

	for page_num in range(len(doc)):
	page = doc.load_page(page_num)
	pix = page.get_pixmap(dpi=200) # Use 150-200 DPI for balance

	# Convert to PIL Image
	image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)

	# Convert to base64 JPEG
	buffered = io.BytesIO()
	image.save(buffered, format="JPEG")
	base64_image = base64.b64encode(buffered.getvalue()).decode()

	# Send to GPT-4o
	response = openai.chat.completions.create(
	model="gpt-4o",
	messages=[
	{"role": "user", "content": [
	{"type": "text", "text": prompt},
	{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
	]}
	],
	max_tokens=1000
	)

	results.append(response.choices[0].message.content.strip())

	return "\n\n---\n\n".join(results)

	# Gradio UI
	demo = gr.Interface(
	fn=process_pdf,
	inputs=gr.File(type="binary", label="Upload PDF Form"),
	outputs="textbox",
	title="Healthelic Form Data Extractor (PDF Scanner) - OpenAI GPT-4o",
	description="Upload a scanned medical form in PDF format to extract key fields using GPT-4o vision model."
	)

	if __name__ == "__main__":
	demo.launch()