Spaces:

mdasad3617
/

lab-report-analyzer

Running

App Files Files Community

lab-report-analyzer / app.py

mdasad3617

Update app.py

5244794 verified 8 months ago

raw

history blame

4.17 kB

	import streamlit as st
	from transformers import pipeline
	from PIL import Image
	import fitz # PyMuPDF for PDF processing
	import logging
	from concurrent.futures import ThreadPoolExecutor

	# Setup logging
	def setup_logging():
	logging.basicConfig(
	level=logging.INFO,
	format="%(asctime)s - %(levelname)s - %(message)s",
	)

	# Load models globally for faster performance
	@st.cache_resource
	def load_models():
	logging.info("Loading Hugging Face models...")
	# Use most popular image-to-text model
	image_to_text = pipeline("image-to-text", model="microsoft/trocr-large-printed")

	# Translation models
	translator_hi = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi")
	translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur")

	# Summarization model
	summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

	return image_to_text, translator_hi, translator_ur, summarizer

	# Function to extract text from images
	def extract_text_from_image(image):
	logging.info("Extracting text from image...")
	# Use TrOCR for more accurate text extraction
	image_to_text = load_models()[0]
	results = image_to_text(image)
	# Combine all detected text
	return " ".join([result['generated_text'] for result in results])

	# Function to extract text from PDFs
	def extract_text_from_pdf(pdf_file):
	logging.info("Extracting text from PDF...")
	doc = fitz.open(pdf_file)
	text = ""
	for page in doc:
	text += page.get_text()
	return text

	# Function to process text in chunks for better performance
	def process_chunks(text, model, chunk_size=500):
	chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
	results = []
	with ThreadPoolExecutor() as executor:
	results = list(executor.map(lambda chunk: model(chunk, max_length=200), chunks))
	return " ".join([result[0]["translation_text"] for result in results])

	# Main app logic
	def main():
	setup_logging()
	st.title("Advanced Lab Report Analyzer")
	st.write("Upload a file (Image, PDF, or Text) to analyze and summarize the lab report in English, Hindi, and Urdu.")

	# Load all models
	image_to_text, translator_hi, translator_ur, summarizer = load_models()

	file = st.file_uploader("Upload a file (Image, PDF, or Text):", type=["jpg", "png", "jpeg", "pdf", "txt"])

	if file:
	text = ""
	try:
	if file.type in ["image/jpeg", "image/png", "image/jpg"]:
	image = Image.open(file)
	text = extract_text_from_image(image)
	elif file.type == "application/pdf":
	text = extract_text_from_pdf(file)
	elif file.type == "text/plain":
	text = file.read().decode("utf-8")

	if text:
	with st.spinner("Analyzing the report..."):
	# Generate summary
	summary = summarizer(text, max_length=130, min_length=30)[0]["summary_text"]

	# Generate translations
	hindi_translation = process_chunks(text, translator_hi)
	urdu_translation = process_chunks(text, translator_ur)

	# Display results
	st.subheader("Original Text:")
	st.write(text)

	st.subheader("Analysis Summary (English):")
	st.write(summary)

	st.subheader("Hindi Translation:")
	st.write(hindi_translation)

	st.subheader("Urdu Translation:")
	st.write(urdu_translation)
	else:
	st.warning("No text could be extracted. Please check the file and try again.")

	except Exception as e:
	logging.error(f"Error processing the file: {e}")
	st.error(f"An error occurred while processing the file: {e}")
	else:
	st.info("Please upload a file to begin.")

	if __name__ == "__main__":
	main()