Spaces:

mdasad3617
/

lab-report-analyzer

Running

File size: 3,804 Bytes

import streamlit as st
from transformers import pipeline
from PIL import Image
import pytesseract
import logging
import PyPDF2

# Setup logging
def setup_logging():
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s - %(levelname)s - %(message)s",
        handlers=[logging.StreamHandler()],
    )

# Text extraction from image
def extract_text_from_image(image):
    try:
        text = pytesseract.image_to_string(image)
        return text
    except Exception as e:
        logging.error(f"Error during OCR: {e}")
        return "Error occurred during text extraction."

# Text extraction from PDF
def extract_text_from_pdf(file):
    try:
        pdf_reader = PyPDF2.PdfReader(file)
        text = ""
        for page in pdf_reader.pages:
            text += page.extract_text()
        return text
    except Exception as e:
        logging.error(f"Error during PDF text extraction: {e}")
        return "Error occurred during text extraction."

# Main function
def main():
    setup_logging()
    st.title("Lab Report Analyzer")
    st.write("Analyze lab reports from images, PDFs, or text and get summaries in English, Hindi, and Urdu.")

    # Hugging Face pipelines
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")  # Summarization model
    translator_hi = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi")  # English to Hindi
    translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur")  # English to Urdu

    # File upload section
    uploaded_file = st.file_uploader("Upload a file (Image or PDF):", type=["png", "jpg", "jpeg", "pdf"])
    text_input = st.text_area("Or paste your text here:")

    if st.button("Analyze"):
        extracted_text = ""
        
        # Extract text based on file type
        if uploaded_file:
            if uploaded_file.name.endswith(".pdf"):
                st.info("Extracting text from PDF...")
                extracted_text = extract_text_from_pdf(uploaded_file)
            else:
                st.info("Extracting text from image...")
                image = Image.open(uploaded_file)
                extracted_text = extract_text_from_image(image)
        elif text_input:
            extracted_text = text_input
        else:
            st.warning("Please upload a file or enter text.")
            return

        # Display extracted text
        st.subheader("Extracted Text")
        st.text_area("Extracted Text:", extracted_text, height=200)

        # Summarize the text
        try:
            st.info("Summarizing text...")
            summary = summarizer(extracted_text, max_length=150, min_length=30, do_sample=False)[0]['summary_text']
            st.subheader("Summary (English)")
            st.write(summary)
        except Exception as e:
            logging.error(f"Error during summarization: {e}")
            st.error("An error occurred during summarization.")

        # Translate summary to Hindi
        try:
            st.info("Translating summary to Hindi...")
            summary_hi = translator_hi(summary)[0]['translation_text']
            st.subheader("Summary (Hindi)")
            st.write(summary_hi)
        except Exception as e:
            logging.error(f"Error during Hindi translation: {e}")
            st.error("An error occurred during Hindi translation.")

        # Translate summary to Urdu
        try:
            st.info("Translating summary to Urdu...")
            summary_ur = translator_ur(summary)[0]['translation_text']
            st.subheader("Summary (Urdu)")
            st.write(summary_ur)
        except Exception as e:
            logging.error(f"Error during Urdu translation: {e}")
            st.error("An error occurred during Urdu translation.")

if __name__ == "__main__":
    main()