File size: 2,548 Bytes
1ec5d32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c44cab
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import streamlit as st
import google.generativeai as genai
from transformers import AutoModel, AutoTokenizer
from pdf2image import convert_from_path
import torch
import os

# Load the OCR model
tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
model = AutoModel.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True, low_cpu_mem_usage=True, 
                                  device_map="cuda" if torch.cuda.is_available() else "cpu", 
                                  use_safetensors=True, pad_token_id=tokenizer.eos_token_id).eval()

def extract_text_from_pdf(pdf_path):
    """Converts PDF pages to images and extracts text using the GOT-OCR2_0 model."""
    text = ""
    try:
        images = convert_from_path(pdf_path)
        for idx, image in enumerate(images):
            image_path = f"temp_page_{idx}.png"
            image.save(image_path, "PNG")
            extracted_text = model.chat(tokenizer, image_path, ocr_type="ocr")
            text += extracted_text + "\n"
            os.remove(image_path)  # Clean up the temporary image file
    except Exception as e:
        st.error(f"Error extracting text: {e}")
    return text

def analyze_health_data(text):
    """Analyzes extracted text using Google Generative AI (Free Tier API)."""
    try:
        genai.configure(api_key="AIzaSyAY6ZYxOzVV5N7mBZzDJ96WEPJGfuFx-mU")  # Replace with your Google API key
        model = genai.GenerativeModel("gemini-pro")
        response = model.generate_content(
            f"Analyze this medical report and provide trends, risks, and health suggestions:\n{text}"
        )
        return response.text
    except Exception as e:
        return f"Error in LLM response: {e}"

def main():
    st.title("Health Report Analyzer")
    uploaded_file = st.file_uploader("Upload your health report (PDF)", type=["pdf"])
    
    if uploaded_file is not None:
        pdf_path = "temp.pdf"
        with open(pdf_path, "wb") as f:
            f.write(uploaded_file.getbuffer())
        
        with st.spinner("Extracting text from the report..."):
            extracted_text = extract_text_from_pdf(pdf_path)
        
        st.subheader("Extracted Report Text:")
        st.text_area("Extracted Text", extracted_text[:1000], height=200)

        if st.button("Analyze Report"):
            with st.spinner("Analyzing..."):
                analysis = analyze_health_data(extracted_text)
            st.subheader("Health Analysis:")
            st.write(analysis)

if __name__ == "__main__":
    main()