Spaces:
Running
Running
File size: 3,552 Bytes
ae7d660 2e7c2af 375547d fcfc162 d2271c1 fcfc162 d2271c1 375547d d2271c1 ae7d660 fcfc162 375547d fcfc162 375547d fcfc162 375547d fcfc162 ddb299c d2271c1 375547d fcfc162 d2271c1 fcfc162 ae7d660 fcfc162 375547d fcfc162 d2271c1 fcfc162 375547d fcfc162 375547d fcfc162 ae7d660 7be0cb3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
import streamlit as st
from transformers import pipeline
import pytesseract
from PIL import Image
import fitz # PyMuPDF for PDF processing
import logging
from concurrent.futures import ThreadPoolExecutor
# Setup logging
def setup_logging():
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
)
# Load models globally for faster performance
@st.cache_resource
def load_models():
logging.info("Loading Hugging Face models...")
translator_hi = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi")
translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur")
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
return translator_hi, translator_ur, summarizer
# Function to extract text from images
def extract_text_from_image(image):
logging.info("Extracting text from image...")
return pytesseract.image_to_string(image)
# Function to extract text from PDFs
def extract_text_from_pdf(pdf_file):
logging.info("Extracting text from PDF...")
doc = fitz.open(pdf_file)
text = ""
for page in doc:
text += page.get_text()
return text
# Function to process text in chunks for better performance
def process_chunks(text, model, chunk_size=500):
chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
results = []
with ThreadPoolExecutor() as executor:
results = list(executor.map(lambda chunk: model(chunk, max_length=200), chunks))
return " ".join([result[0]["translation_text"] for result in results])
# Main app logic
def main():
setup_logging()
st.title("Lab Report Analyzer")
st.write("Upload a file (Image, PDF, or Text) to analyze and summarize the lab report in English, Hindi, and Urdu.")
translator_hi, translator_ur, summarizer = load_models()
file = st.file_uploader("Upload a file (Image, PDF, or Text):", type=["jpg", "png", "jpeg", "pdf", "txt"])
if file:
text = ""
try:
if file.type in ["image/jpeg", "image/png", "image/jpg"]:
image = Image.open(file)
text = extract_text_from_image(image)
elif file.type == "application/pdf":
text = extract_text_from_pdf(file)
elif file.type == "text/plain":
text = file.read().decode("utf-8")
if text:
with st.spinner("Analyzing the report..."):
# Generate summary
summary = summarizer(text, max_length=130, min_length=30)[0]["summary_text"]
# Generate translations
hindi_translation = process_chunks(text, translator_hi)
urdu_translation = process_chunks(text, translator_ur)
# Display results
st.subheader("Analysis Summary (English):")
st.write(summary)
st.subheader("Hindi Translation:")
st.write(hindi_translation)
st.subheader("Urdu Translation:")
st.write(urdu_translation)
else:
st.warning("No text could be extracted. Please check the file and try again.")
except Exception as e:
logging.error(f"Error processing the file: {e}")
st.error("An error occurred while processing the file. Please try again.")
else:
st.info("Please upload a file to begin.")
if __name__ == "__main__":
main()
|