File size: 3,804 Bytes
ae7d660
2e7c2af
375547d
 
d2271c1
375547d
d2271c1
 
 
 
 
375547d
 
d2271c1
ae7d660
375547d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ddb299c
d2271c1
375547d
 
ae7d660
375547d
 
 
 
ae7d660
375547d
 
 
d2271c1
375547d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ae7d660
375547d
 
 
 
 
d2271c1
375547d
 
 
 
d2271c1
375547d
 
d2271c1
375547d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ae7d660
 
7be0cb3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import streamlit as st
from transformers import pipeline
from PIL import Image
import pytesseract
import logging
import PyPDF2

# Setup logging
def setup_logging():
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s - %(levelname)s - %(message)s",
        handlers=[logging.StreamHandler()],
    )

# Text extraction from image
def extract_text_from_image(image):
    try:
        text = pytesseract.image_to_string(image)
        return text
    except Exception as e:
        logging.error(f"Error during OCR: {e}")
        return "Error occurred during text extraction."

# Text extraction from PDF
def extract_text_from_pdf(file):
    try:
        pdf_reader = PyPDF2.PdfReader(file)
        text = ""
        for page in pdf_reader.pages:
            text += page.extract_text()
        return text
    except Exception as e:
        logging.error(f"Error during PDF text extraction: {e}")
        return "Error occurred during text extraction."

# Main function
def main():
    setup_logging()
    st.title("Lab Report Analyzer")
    st.write("Analyze lab reports from images, PDFs, or text and get summaries in English, Hindi, and Urdu.")

    # Hugging Face pipelines
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")  # Summarization model
    translator_hi = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi")  # English to Hindi
    translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur")  # English to Urdu

    # File upload section
    uploaded_file = st.file_uploader("Upload a file (Image or PDF):", type=["png", "jpg", "jpeg", "pdf"])
    text_input = st.text_area("Or paste your text here:")

    if st.button("Analyze"):
        extracted_text = ""
        
        # Extract text based on file type
        if uploaded_file:
            if uploaded_file.name.endswith(".pdf"):
                st.info("Extracting text from PDF...")
                extracted_text = extract_text_from_pdf(uploaded_file)
            else:
                st.info("Extracting text from image...")
                image = Image.open(uploaded_file)
                extracted_text = extract_text_from_image(image)
        elif text_input:
            extracted_text = text_input
        else:
            st.warning("Please upload a file or enter text.")
            return

        # Display extracted text
        st.subheader("Extracted Text")
        st.text_area("Extracted Text:", extracted_text, height=200)

        # Summarize the text
        try:
            st.info("Summarizing text...")
            summary = summarizer(extracted_text, max_length=150, min_length=30, do_sample=False)[0]['summary_text']
            st.subheader("Summary (English)")
            st.write(summary)
        except Exception as e:
            logging.error(f"Error during summarization: {e}")
            st.error("An error occurred during summarization.")

        # Translate summary to Hindi
        try:
            st.info("Translating summary to Hindi...")
            summary_hi = translator_hi(summary)[0]['translation_text']
            st.subheader("Summary (Hindi)")
            st.write(summary_hi)
        except Exception as e:
            logging.error(f"Error during Hindi translation: {e}")
            st.error("An error occurred during Hindi translation.")

        # Translate summary to Urdu
        try:
            st.info("Translating summary to Urdu...")
            summary_ur = translator_ur(summary)[0]['translation_text']
            st.subheader("Summary (Urdu)")
            st.write(summary_ur)
        except Exception as e:
            logging.error(f"Error during Urdu translation: {e}")
            st.error("An error occurred during Urdu translation.")

if __name__ == "__main__":
    main()