Spaces:
Sleeping
Sleeping
File size: 4,165 Bytes
ca69a0e 5244794 8b18b7b ed4ebee 8b18b7b ae7d660 8b18b7b fcfc162 8b18b7b 5244794 8b18b7b 5244794 8b18b7b 5244794 ca69a0e 5244794 8b18b7b 5244794 ca69a0e 8b18b7b ca69a0e 8b18b7b ca69a0e 8b18b7b ddb299c 8b18b7b 5244794 8b18b7b 5244794 8b18b7b 5244794 8b18b7b 5244794 8b18b7b 5244794 8b18b7b 5244794 8b18b7b 5244794 8b18b7b 5244794 8b18b7b 5244794 8b18b7b 5244794 8b18b7b 5244794 8b18b7b 5244794 8b18b7b ae7d660 5244794 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import streamlit as st
from transformers import pipeline
from PIL import Image
import fitz # PyMuPDF for PDF processing
import logging
from concurrent.futures import ThreadPoolExecutor
# Setup logging
def setup_logging():
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
)
# Load models globally for faster performance
@st.cache_resource
def load_models():
logging.info("Loading Hugging Face models...")
# Use most popular image-to-text model
image_to_text = pipeline("image-to-text", model="microsoft/trocr-large-printed")
# Translation models
translator_hi = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi")
translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur")
# Summarization model
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
return image_to_text, translator_hi, translator_ur, summarizer
# Function to extract text from images
def extract_text_from_image(image):
logging.info("Extracting text from image...")
# Use TrOCR for more accurate text extraction
image_to_text = load_models()[0]
results = image_to_text(image)
# Combine all detected text
return " ".join([result['generated_text'] for result in results])
# Function to extract text from PDFs
def extract_text_from_pdf(pdf_file):
logging.info("Extracting text from PDF...")
doc = fitz.open(pdf_file)
text = ""
for page in doc:
text += page.get_text()
return text
# Function to process text in chunks for better performance
def process_chunks(text, model, chunk_size=500):
chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
results = []
with ThreadPoolExecutor() as executor:
results = list(executor.map(lambda chunk: model(chunk, max_length=200), chunks))
return " ".join([result[0]["translation_text"] for result in results])
# Main app logic
def main():
setup_logging()
st.title("Advanced Lab Report Analyzer")
st.write("Upload a file (Image, PDF, or Text) to analyze and summarize the lab report in English, Hindi, and Urdu.")
# Load all models
image_to_text, translator_hi, translator_ur, summarizer = load_models()
file = st.file_uploader("Upload a file (Image, PDF, or Text):", type=["jpg", "png", "jpeg", "pdf", "txt"])
if file:
text = ""
try:
if file.type in ["image/jpeg", "image/png", "image/jpg"]:
image = Image.open(file)
text = extract_text_from_image(image)
elif file.type == "application/pdf":
text = extract_text_from_pdf(file)
elif file.type == "text/plain":
text = file.read().decode("utf-8")
if text:
with st.spinner("Analyzing the report..."):
# Generate summary
summary = summarizer(text, max_length=130, min_length=30)[0]["summary_text"]
# Generate translations
hindi_translation = process_chunks(text, translator_hi)
urdu_translation = process_chunks(text, translator_ur)
# Display results
st.subheader("Original Text:")
st.write(text)
st.subheader("Analysis Summary (English):")
st.write(summary)
st.subheader("Hindi Translation:")
st.write(hindi_translation)
st.subheader("Urdu Translation:")
st.write(urdu_translation)
else:
st.warning("No text could be extracted. Please check the file and try again.")
except Exception as e:
logging.error(f"Error processing the file: {e}")
st.error(f"An error occurred while processing the file: {e}")
else:
st.info("Please upload a file to begin.")
if __name__ == "__main__":
main() |