mdasad3617's picture
Update app.py
375547d verified
raw
history blame
3.8 kB
import streamlit as st
from transformers import pipeline
from PIL import Image
import pytesseract
import logging
import PyPDF2
# Setup logging
def setup_logging():
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
handlers=[logging.StreamHandler()],
)
# Text extraction from image
def extract_text_from_image(image):
try:
text = pytesseract.image_to_string(image)
return text
except Exception as e:
logging.error(f"Error during OCR: {e}")
return "Error occurred during text extraction."
# Text extraction from PDF
def extract_text_from_pdf(file):
try:
pdf_reader = PyPDF2.PdfReader(file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
return text
except Exception as e:
logging.error(f"Error during PDF text extraction: {e}")
return "Error occurred during text extraction."
# Main function
def main():
setup_logging()
st.title("Lab Report Analyzer")
st.write("Analyze lab reports from images, PDFs, or text and get summaries in English, Hindi, and Urdu.")
# Hugging Face pipelines
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") # Summarization model
translator_hi = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi") # English to Hindi
translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur") # English to Urdu
# File upload section
uploaded_file = st.file_uploader("Upload a file (Image or PDF):", type=["png", "jpg", "jpeg", "pdf"])
text_input = st.text_area("Or paste your text here:")
if st.button("Analyze"):
extracted_text = ""
# Extract text based on file type
if uploaded_file:
if uploaded_file.name.endswith(".pdf"):
st.info("Extracting text from PDF...")
extracted_text = extract_text_from_pdf(uploaded_file)
else:
st.info("Extracting text from image...")
image = Image.open(uploaded_file)
extracted_text = extract_text_from_image(image)
elif text_input:
extracted_text = text_input
else:
st.warning("Please upload a file or enter text.")
return
# Display extracted text
st.subheader("Extracted Text")
st.text_area("Extracted Text:", extracted_text, height=200)
# Summarize the text
try:
st.info("Summarizing text...")
summary = summarizer(extracted_text, max_length=150, min_length=30, do_sample=False)[0]['summary_text']
st.subheader("Summary (English)")
st.write(summary)
except Exception as e:
logging.error(f"Error during summarization: {e}")
st.error("An error occurred during summarization.")
# Translate summary to Hindi
try:
st.info("Translating summary to Hindi...")
summary_hi = translator_hi(summary)[0]['translation_text']
st.subheader("Summary (Hindi)")
st.write(summary_hi)
except Exception as e:
logging.error(f"Error during Hindi translation: {e}")
st.error("An error occurred during Hindi translation.")
# Translate summary to Urdu
try:
st.info("Translating summary to Urdu...")
summary_ur = translator_ur(summary)[0]['translation_text']
st.subheader("Summary (Urdu)")
st.write(summary_ur)
except Exception as e:
logging.error(f"Error during Urdu translation: {e}")
st.error("An error occurred during Urdu translation.")
if __name__ == "__main__":
main()