import streamlit as st from transformers import pipeline from PIL import Image import pytesseract import logging import PyPDF2 # Setup logging def setup_logging(): logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", handlers=[logging.StreamHandler()], ) # Text extraction from image def extract_text_from_image(image): try: text = pytesseract.image_to_string(image) return text except Exception as e: logging.error(f"Error during OCR: {e}") return "Error occurred during text extraction." # Text extraction from PDF def extract_text_from_pdf(file): try: pdf_reader = PyPDF2.PdfReader(file) text = "" for page in pdf_reader.pages: text += page.extract_text() return text except Exception as e: logging.error(f"Error during PDF text extraction: {e}") return "Error occurred during text extraction." # Main function def main(): setup_logging() st.title("Lab Report Analyzer") st.write("Analyze lab reports from images, PDFs, or text and get summaries in English, Hindi, and Urdu.") # Hugging Face pipelines summarizer = pipeline("summarization", model="facebook/bart-large-cnn") # Summarization model translator_hi = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi") # English to Hindi translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur") # English to Urdu # File upload section uploaded_file = st.file_uploader("Upload a file (Image or PDF):", type=["png", "jpg", "jpeg", "pdf"]) text_input = st.text_area("Or paste your text here:") if st.button("Analyze"): extracted_text = "" # Extract text based on file type if uploaded_file: if uploaded_file.name.endswith(".pdf"): st.info("Extracting text from PDF...") extracted_text = extract_text_from_pdf(uploaded_file) else: st.info("Extracting text from image...") image = Image.open(uploaded_file) extracted_text = extract_text_from_image(image) elif text_input: extracted_text = text_input else: st.warning("Please upload a file or enter text.") return # Display extracted text st.subheader("Extracted Text") st.text_area("Extracted Text:", extracted_text, height=200) # Summarize the text try: st.info("Summarizing text...") summary = summarizer(extracted_text, max_length=150, min_length=30, do_sample=False)[0]['summary_text'] st.subheader("Summary (English)") st.write(summary) except Exception as e: logging.error(f"Error during summarization: {e}") st.error("An error occurred during summarization.") # Translate summary to Hindi try: st.info("Translating summary to Hindi...") summary_hi = translator_hi(summary)[0]['translation_text'] st.subheader("Summary (Hindi)") st.write(summary_hi) except Exception as e: logging.error(f"Error during Hindi translation: {e}") st.error("An error occurred during Hindi translation.") # Translate summary to Urdu try: st.info("Translating summary to Urdu...") summary_ur = translator_ur(summary)[0]['translation_text'] st.subheader("Summary (Urdu)") st.write(summary_ur) except Exception as e: logging.error(f"Error during Urdu translation: {e}") st.error("An error occurred during Urdu translation.") if __name__ == "__main__": main()