rohitashva commited on
Commit
1ec5d32
·
verified ·
1 Parent(s): 3b24287

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -44
app.py CHANGED
@@ -1,45 +1,63 @@
1
- import fitz # PyMuPDF
2
- import google.generativeai as genai
3
- import streamlit as st
4
-
5
- def extract_text_from_pdf(pdf_path):
6
- """Extracts text from a PDF file."""
7
- text = ""
8
- try:
9
- with fitz.open(pdf_path) as doc:
10
- for page in doc:
11
- text += page.get_text("text") + "\n"
12
- except Exception as e:
13
- st.error(f"Error reading PDF: {e}")
14
- return text
15
-
16
- def analyze_health_data(text):
17
- """Analyzes extracted text using Google Generative AI (Free Tier API)."""
18
- try:
19
- # Get a free API key from Google AI Studio: https://aistudio.google.com/
20
- genai.configure(api_key="AIzaSyAY6ZYxOzVV5N7mBZzDJ96WEPJGfuFx-mU") # Replace with free API key
21
- model = genai.GenerativeModel("gemini-pro") # Choose appropriate model
22
- response = model.generate_content(
23
- f"Analyze this blood report and provide trends, risks, and health suggestions:\n{text}"
24
- )
25
- return response.text
26
- except Exception as e:
27
- return f"Error in LLM response: {e}"
28
-
29
- def main():
30
- st.title("Health Report Analyzer")
31
- uploaded_file = st.file_uploader("Upload your health report (PDF)", type=["pdf"])
32
- if uploaded_file is not None:
33
- with open("temp.pdf", "wb") as f:
34
- f.write(uploaded_file.getbuffer())
35
- extracted_text = extract_text_from_pdf("temp.pdf")
36
- st.subheader("Extracted Report Text:")
37
- st.text_area("Extracted Text", extracted_text[:1000], height=200)
38
- if st.button("Analyze Report"):
39
- with st.spinner("Analyzing..."):
40
- analysis = analyze_health_data(extracted_text)
41
- st.subheader("Health Analysis:")
42
- st.write(analysis)
43
-
44
- if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  main()
 
1
+ import streamlit as st
2
+ import google.generativeai as genai
3
+ from transformers import AutoModel, AutoTokenizer
4
+ from pdf2image import convert_from_path
5
+ import torch
6
+ import os
7
+
8
+ # Load the OCR model
9
+ tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
10
+ model = AutoModel.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True, low_cpu_mem_usage=True,
11
+ device_map="cuda" if torch.cuda.is_available() else "cpu",
12
+ use_safetensors=True, pad_token_id=tokenizer.eos_token_id).eval()
13
+
14
+ def extract_text_from_pdf(pdf_path):
15
+ """Converts PDF pages to images and extracts text using the GOT-OCR2_0 model."""
16
+ text = ""
17
+ try:
18
+ images = convert_from_path(pdf_path)
19
+ for idx, image in enumerate(images):
20
+ image_path = f"temp_page_{idx}.png"
21
+ image.save(image_path, "PNG")
22
+ extracted_text = model.chat(tokenizer, image_path, ocr_type="ocr")
23
+ text += extracted_text + "\n"
24
+ os.remove(image_path) # Clean up the temporary image file
25
+ except Exception as e:
26
+ st.error(f"Error extracting text: {e}")
27
+ return text
28
+
29
+ def analyze_health_data(text):
30
+ """Analyzes extracted text using Google Generative AI (Free Tier API)."""
31
+ try:
32
+ genai.configure(api_key="AIzaSyAY6ZYxOzVV5N7mBZzDJ96WEPJGfuFx-mU") # Replace with your Google API key
33
+ model = genai.GenerativeModel("gemini-pro")
34
+ response = model.generate_content(
35
+ f"Analyze this medical report and provide trends, risks, and health suggestions:\n{text}"
36
+ )
37
+ return response.text
38
+ except Exception as e:
39
+ return f"Error in LLM response: {e}"
40
+
41
+ def main():
42
+ st.title("Health Report Analyzer")
43
+ uploaded_file = st.file_uploader("Upload your health report (PDF)", type=["pdf"])
44
+
45
+ if uploaded_file is not None:
46
+ pdf_path = "temp.pdf"
47
+ with open(pdf_path, "wb") as f:
48
+ f.write(uploaded_file.getbuffer())
49
+
50
+ with st.spinner("Extracting text from the report..."):
51
+ extracted_text = extract_text_from_pdf(pdf_path)
52
+
53
+ st.subheader("Extracted Report Text:")
54
+ st.text_area("Extracted Text", extracted_text[:1000], height=200)
55
+
56
+ if st.button("Analyze Report"):
57
+ with st.spinner("Analyzing..."):
58
+ analysis = analyze_health_data(extracted_text)
59
+ st.subheader("Health Analysis:")
60
+ st.write(analysis)
61
+
62
+ if __name__ == "__main__":
63
  main()