neuralleap commited on
Commit
54f6bea
·
verified ·
1 Parent(s): b5a8842

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -22
app.py CHANGED
@@ -3,16 +3,14 @@ import google.generativeai as genai
3
  import base64
4
  import io
5
  from PIL import Image
 
6
  import os
7
- import json
8
 
9
- # Configure Google Cloud credentials (replace with your actual API key or setup)
10
  genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
11
-
12
- # Select the Gemini Pro Vision model
13
  model = genai.GenerativeModel('gemini-1.5-flash')
14
 
15
- # Prompt definition
16
  prompt = """
17
  You are analyzing a medical document or an application form from patient.
18
  Extract the following fields as JSON:
@@ -30,28 +28,43 @@ Extract the following fields as JSON:
30
  - Phone number (mobile)
31
  """
32
 
33
- def process_image(image: Image.Image):
34
- buffered = io.BytesIO()
35
- image.save(buffered, format="JPEG")
36
- base64_image = buffered.getvalue()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
- response = model.generate_content([
39
- prompt,
40
- {
41
- "mime_type": "image/jpeg",
42
- "data": base64_image
43
- }
44
- ])
45
- return response.text
46
 
47
  # Gradio interface
48
  demo = gr.Interface(
49
- fn=process_image,
50
- inputs=gr.Image(type="pil"),
51
  outputs="textbox",
52
- title="Healthelic Form Data Extractor (Doc Scanner) - Gemini 1.5-flash",
53
- description="Upload a scanned medical form to extract key fields."
54
  )
55
 
56
  if __name__ == "__main__":
57
- demo.launch()
 
3
  import base64
4
  import io
5
  from PIL import Image
6
+ import fitz # PyMuPDF
7
  import os
 
8
 
9
+ # Configure Gemini API
10
  genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
 
 
11
  model = genai.GenerativeModel('gemini-1.5-flash')
12
 
13
+ # Prompt for Gemini
14
  prompt = """
15
  You are analyzing a medical document or an application form from patient.
16
  Extract the following fields as JSON:
 
28
  - Phone number (mobile)
29
  """
30
 
31
+ def process_pdf(pdf_bytes: bytes):
32
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
33
+ results = []
34
+
35
+ for page_num in range(len(doc)):
36
+ page = doc.load_page(page_num)
37
+ pix = page.get_pixmap(dpi=200)
38
+
39
+ # Convert to PIL image
40
+ image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
41
+
42
+ # Convert to JPEG bytes
43
+ buffered = io.BytesIO()
44
+ image.save(buffered, format="JPEG")
45
+ jpeg_bytes = buffered.getvalue()
46
+
47
+ # Send to Gemini
48
+ response = model.generate_content([
49
+ prompt,
50
+ {
51
+ "mime_type": "image/jpeg",
52
+ "data": jpeg_bytes
53
+ }
54
+ ])
55
+
56
+ results.append(response.text.strip())
57
 
58
+ return "\n\n---\n\n".join(results)
 
 
 
 
 
 
 
59
 
60
  # Gradio interface
61
  demo = gr.Interface(
62
+ fn=process_pdf,
63
+ inputs=gr.File(type="binary", label="Upload PDF Form"),
64
  outputs="textbox",
65
+ title="Healthelic Form Data Extractor (PDF Scanner) - Gemini 1.5 Flash",
66
+ description="Upload a scanned medical form in PDF format to extract key fields using Gemini 1.5 Flash."
67
  )
68
 
69
  if __name__ == "__main__":
70
+ demo.launch()