Spaces:

mdasad3617
/

lab-report-analyzer

Running

File size: 3,552 Bytes

ae7d660
2e7c2af
375547d
fcfc162
 
d2271c1
fcfc162
d2271c1
 
 
 
 
375547d
d2271c1
ae7d660
fcfc162
 
 
 
 
 
 
 
 
 
375547d
fcfc162
 
375547d
fcfc162
 
 
 
 
 
 
 
375547d
fcfc162
 
 
 
 
 
 
 
 
ddb299c
d2271c1
375547d
fcfc162
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d2271c1
fcfc162
 
 
 
ae7d660
fcfc162
 
 
375547d
fcfc162
 
 
d2271c1
fcfc162
 
375547d
fcfc162
 
 
 
375547d
fcfc162
 
 
 
ae7d660
 
7be0cb3

import streamlit as st
from transformers import pipeline
import pytesseract
from PIL import Image
import fitz  # PyMuPDF for PDF processing
import logging
from concurrent.futures import ThreadPoolExecutor

# Setup logging
def setup_logging():
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s - %(levelname)s - %(message)s",
    )

# Load models globally for faster performance
@st.cache_resource
def load_models():
    logging.info("Loading Hugging Face models...")
    translator_hi = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi")
    translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur")
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
    return translator_hi, translator_ur, summarizer

# Function to extract text from images
def extract_text_from_image(image):
    logging.info("Extracting text from image...")
    return pytesseract.image_to_string(image)

# Function to extract text from PDFs
def extract_text_from_pdf(pdf_file):
    logging.info("Extracting text from PDF...")
    doc = fitz.open(pdf_file)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

# Function to process text in chunks for better performance
def process_chunks(text, model, chunk_size=500):
    chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
    results = []
    with ThreadPoolExecutor() as executor:
        results = list(executor.map(lambda chunk: model(chunk, max_length=200), chunks))
    return " ".join([result[0]["translation_text"] for result in results])

# Main app logic
def main():
    setup_logging()
    st.title("Lab Report Analyzer")
    st.write("Upload a file (Image, PDF, or Text) to analyze and summarize the lab report in English, Hindi, and Urdu.")
    
    translator_hi, translator_ur, summarizer = load_models()
    
    file = st.file_uploader("Upload a file (Image, PDF, or Text):", type=["jpg", "png", "jpeg", "pdf", "txt"])
    if file:
        text = ""
        try:
            if file.type in ["image/jpeg", "image/png", "image/jpg"]:
                image = Image.open(file)
                text = extract_text_from_image(image)
            elif file.type == "application/pdf":
                text = extract_text_from_pdf(file)
            elif file.type == "text/plain":
                text = file.read().decode("utf-8")

            if text:
                with st.spinner("Analyzing the report..."):
                    # Generate summary
                    summary = summarizer(text, max_length=130, min_length=30)[0]["summary_text"]

                    # Generate translations
                    hindi_translation = process_chunks(text, translator_hi)
                    urdu_translation = process_chunks(text, translator_ur)

                    # Display results
                    st.subheader("Analysis Summary (English):")
                    st.write(summary)

                    st.subheader("Hindi Translation:")
                    st.write(hindi_translation)

                    st.subheader("Urdu Translation:")
                    st.write(urdu_translation)
            else:
                st.warning("No text could be extracted. Please check the file and try again.")
        except Exception as e:
            logging.error(f"Error processing the file: {e}")
            st.error("An error occurred while processing the file. Please try again.")
    else:
        st.info("Please upload a file to begin.")

if __name__ == "__main__":
    main()