Spaces:
Running
Running
File size: 3,804 Bytes
ae7d660 2e7c2af 375547d d2271c1 375547d d2271c1 375547d d2271c1 ae7d660 375547d ddb299c d2271c1 375547d ae7d660 375547d ae7d660 375547d d2271c1 375547d ae7d660 375547d d2271c1 375547d d2271c1 375547d d2271c1 375547d ae7d660 7be0cb3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
import streamlit as st
from transformers import pipeline
from PIL import Image
import pytesseract
import logging
import PyPDF2
# Setup logging
def setup_logging():
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
handlers=[logging.StreamHandler()],
)
# Text extraction from image
def extract_text_from_image(image):
try:
text = pytesseract.image_to_string(image)
return text
except Exception as e:
logging.error(f"Error during OCR: {e}")
return "Error occurred during text extraction."
# Text extraction from PDF
def extract_text_from_pdf(file):
try:
pdf_reader = PyPDF2.PdfReader(file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
return text
except Exception as e:
logging.error(f"Error during PDF text extraction: {e}")
return "Error occurred during text extraction."
# Main function
def main():
setup_logging()
st.title("Lab Report Analyzer")
st.write("Analyze lab reports from images, PDFs, or text and get summaries in English, Hindi, and Urdu.")
# Hugging Face pipelines
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") # Summarization model
translator_hi = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi") # English to Hindi
translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur") # English to Urdu
# File upload section
uploaded_file = st.file_uploader("Upload a file (Image or PDF):", type=["png", "jpg", "jpeg", "pdf"])
text_input = st.text_area("Or paste your text here:")
if st.button("Analyze"):
extracted_text = ""
# Extract text based on file type
if uploaded_file:
if uploaded_file.name.endswith(".pdf"):
st.info("Extracting text from PDF...")
extracted_text = extract_text_from_pdf(uploaded_file)
else:
st.info("Extracting text from image...")
image = Image.open(uploaded_file)
extracted_text = extract_text_from_image(image)
elif text_input:
extracted_text = text_input
else:
st.warning("Please upload a file or enter text.")
return
# Display extracted text
st.subheader("Extracted Text")
st.text_area("Extracted Text:", extracted_text, height=200)
# Summarize the text
try:
st.info("Summarizing text...")
summary = summarizer(extracted_text, max_length=150, min_length=30, do_sample=False)[0]['summary_text']
st.subheader("Summary (English)")
st.write(summary)
except Exception as e:
logging.error(f"Error during summarization: {e}")
st.error("An error occurred during summarization.")
# Translate summary to Hindi
try:
st.info("Translating summary to Hindi...")
summary_hi = translator_hi(summary)[0]['translation_text']
st.subheader("Summary (Hindi)")
st.write(summary_hi)
except Exception as e:
logging.error(f"Error during Hindi translation: {e}")
st.error("An error occurred during Hindi translation.")
# Translate summary to Urdu
try:
st.info("Translating summary to Urdu...")
summary_ur = translator_ur(summary)[0]['translation_text']
st.subheader("Summary (Urdu)")
st.write(summary_ur)
except Exception as e:
logging.error(f"Error during Urdu translation: {e}")
st.error("An error occurred during Urdu translation.")
if __name__ == "__main__":
main()
|