rohitashva commited on
Commit
3fa9239
·
verified ·
1 Parent(s): c42136a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -40
app.py CHANGED
@@ -1,48 +1,26 @@
1
- import streamlit as st
2
  import google.generativeai as genai
3
- from transformers import AutoModel, AutoTokenizer
4
- from pdf2image import convert_from_path
5
- import torch
6
- import os
7
- import os
8
- import subprocess
9
  import streamlit as st
10
- import google.generativeai as genai
11
- from transformers import AutoModel, AutoTokenizer
12
- from pdf2image import convert_from_path
13
- import torch
14
- import os
15
- # Ensure Poppler is installed
16
- poppler_path = "/usr/bin"
17
-
18
- # Load the OCR model
19
- tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
20
- model = AutoModel.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True, low_cpu_mem_usage=True,
21
- device_map="cuda" if torch.cuda.is_available() else "cpu",
22
- use_safetensors=True, pad_token_id=tokenizer.eos_token_id).eval()
23
 
24
  def extract_text_from_pdf(pdf_path):
25
- """Converts PDF pages to images and extracts text using the GOT-OCR2_0 model."""
26
  text = ""
27
  try:
28
- images = convert_from_path(pdf_path)
29
- for idx, image in enumerate(images):
30
- image_path = f"temp_page_{idx}.png"
31
- image.save(image_path, "PNG")
32
- extracted_text = model.chat(tokenizer, image_path, ocr_type="ocr")
33
- text += extracted_text + "\n"
34
- os.remove(image_path) # Clean up the temporary image file
35
  except Exception as e:
36
- st.error(f"Error extracting text: {e}")
37
  return text
38
 
39
  def analyze_health_data(text):
40
  """Analyzes extracted text using Google Generative AI (Free Tier API)."""
41
  try:
42
- genai.configure(api_key=os.getenv("GEMINI")) # Replace with your Google API key
43
- model = genai.GenerativeModel("gemini-pro")
 
44
  response = model.generate_content(
45
- f"Analyze this medical report and provide trends, risks, and health suggestions:\n{text}"
46
  )
47
  return response.text
48
  except Exception as e:
@@ -51,18 +29,12 @@ def analyze_health_data(text):
51
  def main():
52
  st.title("Health Report Analyzer")
53
  uploaded_file = st.file_uploader("Upload your health report (PDF)", type=["pdf"])
54
-
55
  if uploaded_file is not None:
56
- pdf_path = "temp.pdf"
57
- with open(pdf_path, "wb") as f:
58
  f.write(uploaded_file.getbuffer())
59
-
60
- with st.spinner("Extracting text from the report..."):
61
- extracted_text = extract_text_from_pdf(pdf_path)
62
-
63
  st.subheader("Extracted Report Text:")
64
  st.text_area("Extracted Text", extracted_text[:1000], height=200)
65
-
66
  if st.button("Analyze Report"):
67
  with st.spinner("Analyzing..."):
68
  analysis = analyze_health_data(extracted_text)
 
1
+ import fitz # PyMuPDF
2
  import google.generativeai as genai
 
 
 
 
 
 
3
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  def extract_text_from_pdf(pdf_path):
6
+ """Extracts text from a PDF file."""
7
  text = ""
8
  try:
9
+ with fitz.open(pdf_path) as doc:
10
+ for page in doc:
11
+ text += page.get_text("text") + "\n"
 
 
 
 
12
  except Exception as e:
13
+ st.error(f"Error reading PDF: {e}")
14
  return text
15
 
16
  def analyze_health_data(text):
17
  """Analyzes extracted text using Google Generative AI (Free Tier API)."""
18
  try:
19
+ # Get a free API key from Google AI Studio: https://aistudio.google.com/
20
+ genai.configure(api_key="AIzaSyAY6ZYxOzVV5N7mBZzDJ96WEPJGfuFx-mU") # Replace with free API key
21
+ model = genai.GenerativeModel("gemini-pro") # Choose appropriate model
22
  response = model.generate_content(
23
+ f"Analyze this blood report and provide trends, risks, and health suggestions:\n{text}"
24
  )
25
  return response.text
26
  except Exception as e:
 
29
  def main():
30
  st.title("Health Report Analyzer")
31
  uploaded_file = st.file_uploader("Upload your health report (PDF)", type=["pdf"])
 
32
  if uploaded_file is not None:
33
+ with open("temp.pdf", "wb") as f:
 
34
  f.write(uploaded_file.getbuffer())
35
+ extracted_text = extract_text_from_pdf("temp.pdf")
 
 
 
36
  st.subheader("Extracted Report Text:")
37
  st.text_area("Extracted Text", extracted_text[:1000], height=200)
 
38
  if st.button("Analyze Report"):
39
  with st.spinner("Analyzing..."):
40
  analysis = analyze_health_data(extracted_text)