import streamlit as st import google.generativeai as genai from transformers import AutoModel, AutoTokenizer from pdf2image import convert_from_path import torch import os import os import subprocess import streamlit as st import google.generativeai as genai from transformers import AutoModel, AutoTokenizer from pdf2image import convert_from_path import torch # Ensure Poppler is installed def install_poppler(): if not os.path.exists("/usr/bin/pdftoppm"): # Check if Poppler is installed st.warning("Installing Poppler for PDF processing...") subprocess.run(["apt-get", "update"]) subprocess.run(["apt-get", "install", "-y", "poppler-utils"]) # Load the OCR model tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True) model = AutoModel.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True, low_cpu_mem_usage=True, device_map="cuda" if torch.cuda.is_available() else "cpu", use_safetensors=True, pad_token_id=tokenizer.eos_token_id).eval() def extract_text_from_pdf(pdf_path): """Converts PDF pages to images and extracts text using the GOT-OCR2_0 model.""" text = "" try: images = convert_from_path(pdf_path) for idx, image in enumerate(images): image_path = f"temp_page_{idx}.png" image.save(image_path, "PNG") extracted_text = model.chat(tokenizer, image_path, ocr_type="ocr") text += extracted_text + "\n" os.remove(image_path) # Clean up the temporary image file except Exception as e: st.error(f"Error extracting text: {e}") return text def analyze_health_data(text): """Analyzes extracted text using Google Generative AI (Free Tier API).""" try: genai.configure(api_key="AIzaSyAY6ZYxOzVV5N7mBZzDJ96WEPJGfuFx-mU") # Replace with your Google API key model = genai.GenerativeModel("gemini-pro") response = model.generate_content( f"Analyze this medical report and provide trends, risks, and health suggestions:\n{text}" ) return response.text except Exception as e: return f"Error in LLM response: {e}" def main(): st.title("Health Report Analyzer") uploaded_file = st.file_uploader("Upload your health report (PDF)", type=["pdf"]) if uploaded_file is not None: pdf_path = "temp.pdf" with open(pdf_path, "wb") as f: f.write(uploaded_file.getbuffer()) with st.spinner("Extracting text from the report..."): extracted_text = extract_text_from_pdf(pdf_path) st.subheader("Extracted Report Text:") st.text_area("Extracted Text", extracted_text[:1000], height=200) if st.button("Analyze Report"): with st.spinner("Analyzing..."): analysis = analyze_health_data(extracted_text) st.subheader("Health Analysis:") st.write(analysis) if __name__ == "__main__": main()