Spaces:
Running
Running
import streamlit as st | |
from transformers import pipeline | |
from PIL import Image | |
import pytesseract | |
import logging | |
import PyPDF2 | |
# Setup logging | |
def setup_logging(): | |
logging.basicConfig( | |
level=logging.INFO, | |
format="%(asctime)s - %(levelname)s - %(message)s", | |
handlers=[logging.StreamHandler()], | |
) | |
# Text extraction from image | |
def extract_text_from_image(image): | |
try: | |
text = pytesseract.image_to_string(image) | |
return text | |
except Exception as e: | |
logging.error(f"Error during OCR: {e}") | |
return "Error occurred during text extraction." | |
# Text extraction from PDF | |
def extract_text_from_pdf(file): | |
try: | |
pdf_reader = PyPDF2.PdfReader(file) | |
text = "" | |
for page in pdf_reader.pages: | |
text += page.extract_text() | |
return text | |
except Exception as e: | |
logging.error(f"Error during PDF text extraction: {e}") | |
return "Error occurred during text extraction." | |
# Main function | |
def main(): | |
setup_logging() | |
st.title("Lab Report Analyzer") | |
st.write("Analyze lab reports from images, PDFs, or text and get summaries in English, Hindi, and Urdu.") | |
# Hugging Face pipelines | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") # Summarization model | |
translator_hi = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi") # English to Hindi | |
translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur") # English to Urdu | |
# File upload section | |
uploaded_file = st.file_uploader("Upload a file (Image or PDF):", type=["png", "jpg", "jpeg", "pdf"]) | |
text_input = st.text_area("Or paste your text here:") | |
if st.button("Analyze"): | |
extracted_text = "" | |
# Extract text based on file type | |
if uploaded_file: | |
if uploaded_file.name.endswith(".pdf"): | |
st.info("Extracting text from PDF...") | |
extracted_text = extract_text_from_pdf(uploaded_file) | |
else: | |
st.info("Extracting text from image...") | |
image = Image.open(uploaded_file) | |
extracted_text = extract_text_from_image(image) | |
elif text_input: | |
extracted_text = text_input | |
else: | |
st.warning("Please upload a file or enter text.") | |
return | |
# Display extracted text | |
st.subheader("Extracted Text") | |
st.text_area("Extracted Text:", extracted_text, height=200) | |
# Summarize the text | |
try: | |
st.info("Summarizing text...") | |
summary = summarizer(extracted_text, max_length=150, min_length=30, do_sample=False)[0]['summary_text'] | |
st.subheader("Summary (English)") | |
st.write(summary) | |
except Exception as e: | |
logging.error(f"Error during summarization: {e}") | |
st.error("An error occurred during summarization.") | |
# Translate summary to Hindi | |
try: | |
st.info("Translating summary to Hindi...") | |
summary_hi = translator_hi(summary)[0]['translation_text'] | |
st.subheader("Summary (Hindi)") | |
st.write(summary_hi) | |
except Exception as e: | |
logging.error(f"Error during Hindi translation: {e}") | |
st.error("An error occurred during Hindi translation.") | |
# Translate summary to Urdu | |
try: | |
st.info("Translating summary to Urdu...") | |
summary_ur = translator_ur(summary)[0]['translation_text'] | |
st.subheader("Summary (Urdu)") | |
st.write(summary_ur) | |
except Exception as e: | |
logging.error(f"Error during Urdu translation: {e}") | |
st.error("An error occurred during Urdu translation.") | |
if __name__ == "__main__": | |
main() | |