Spaces:

rohitashva
/

Report-Analyzer

Sleeping

App Files Files Community

rohitashva commited on Feb 23

Commit

3fa9239

verified ·

1 Parent(s): c42136a

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -40

app.py CHANGED Viewed

@@ -1,48 +1,26 @@
-import streamlit as st
 import google.generativeai as genai
-from transformers import AutoModel, AutoTokenizer
-from pdf2image import convert_from_path
-import torch
-import os
-import os
-import subprocess
 import streamlit as st
-import google.generativeai as genai
-from transformers import AutoModel, AutoTokenizer
-from pdf2image import convert_from_path
-import torch
-import os
-# Ensure Poppler is installed
-poppler_path = "/usr/bin"
-# Load the OCR model
-tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
-model = AutoModel.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True, low_cpu_mem_usage=True,
-                                  device_map="cuda" if torch.cuda.is_available() else "cpu",
-                                  use_safetensors=True, pad_token_id=tokenizer.eos_token_id).eval()
 def extract_text_from_pdf(pdf_path):
-    """Converts PDF pages to images and extracts text using the GOT-OCR2_0 model."""
     text = ""
     try:
-        images = convert_from_path(pdf_path)
-        for idx, image in enumerate(images):
-            image_path = f"temp_page_{idx}.png"
-            image.save(image_path, "PNG")
-            extracted_text = model.chat(tokenizer, image_path, ocr_type="ocr")
-            text += extracted_text + "\n"
-            os.remove(image_path)  # Clean up the temporary image file
     except Exception as e:
-        st.error(f"Error extracting text: {e}")
     return text
 def analyze_health_data(text):
     """Analyzes extracted text using Google Generative AI (Free Tier API)."""
     try:
-        genai.configure(api_key=os.getenv("GEMINI"))  # Replace with your Google API key
-        model = genai.GenerativeModel("gemini-pro")
         response = model.generate_content(
-            f"Analyze this medical report and provide trends, risks, and health suggestions:\n{text}"
         )
         return response.text
     except Exception as e:
@@ -51,18 +29,12 @@ def analyze_health_data(text):
 def main():
     st.title("Health Report Analyzer")
     uploaded_file = st.file_uploader("Upload your health report (PDF)", type=["pdf"])
     if uploaded_file is not None:
-        pdf_path = "temp.pdf"
-        with open(pdf_path, "wb") as f:
             f.write(uploaded_file.getbuffer())
-        with st.spinner("Extracting text from the report..."):
-            extracted_text = extract_text_from_pdf(pdf_path)
         st.subheader("Extracted Report Text:")
         st.text_area("Extracted Text", extracted_text[:1000], height=200)
         if st.button("Analyze Report"):
             with st.spinner("Analyzing..."):
                 analysis = analyze_health_data(extracted_text)

+import fitz  # PyMuPDF
 import google.generativeai as genai
 import streamlit as st
 def extract_text_from_pdf(pdf_path):
+    """Extracts text from a PDF file."""
     text = ""
     try:
+        with fitz.open(pdf_path) as doc:
+            for page in doc:
+                text += page.get_text("text") + "\n"
     except Exception as e:
+        st.error(f"Error reading PDF: {e}")
     return text
 def analyze_health_data(text):
     """Analyzes extracted text using Google Generative AI (Free Tier API)."""
     try:
+        # Get a free API key from Google AI Studio: https://aistudio.google.com/
+        genai.configure(api_key="AIzaSyAY6ZYxOzVV5N7mBZzDJ96WEPJGfuFx-mU")  # Replace with free API key
+        model = genai.GenerativeModel("gemini-pro")  # Choose appropriate model
         response = model.generate_content(
+            f"Analyze this blood report and provide trends, risks, and health suggestions:\n{text}"
         )
         return response.text
     except Exception as e:
 def main():
     st.title("Health Report Analyzer")
     uploaded_file = st.file_uploader("Upload your health report (PDF)", type=["pdf"])
     if uploaded_file is not None:
+        with open("temp.pdf", "wb") as f:
             f.write(uploaded_file.getbuffer())
+        extracted_text = extract_text_from_pdf("temp.pdf")
         st.subheader("Extracted Report Text:")
         st.text_area("Extracted Text", extracted_text[:1000], height=200)
         if st.button("Analyze Report"):
             with st.spinner("Analyzing..."):
                 analysis = analyze_health_data(extracted_text)