Spaces:

mdasad3617
/

lab-report-analyzer

Running

App Files Files Community

mdasad3617 commited on Nov 30, 2024

Commit

ca69a0e

verified ·

1 Parent(s): 51a6db9

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -79

app.py CHANGED Viewed

@@ -1,94 +1,60 @@
-import streamlit as st
-from transformers import pipeline
 import pytesseract
 from PIL import Image
-import fitz  # PyMuPDF for PDF processing
-import logging
-from concurrent.futures import ThreadPoolExecutor
-# Setup logging
-def setup_logging():
-    logging.basicConfig(
-        level=logging.INFO,
-        format="%(asctime)s - %(levelname)s - %(message)s",
-    )
-# Load models globally for faster performance
-@st.cache_resource
-def load_models():
-    logging.info("Loading Hugging Face models...")
-    translator_hi = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi")
-    translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur")
-    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
-    return translator_hi, translator_ur, summarizer
-# Function to extract text from images
 def extract_text_from_image(image):
-    logging.info("Extracting text from image...")
-    return pytesseract.image_to_string(image)
-# Function to extract text from PDFs
-def extract_text_from_pdf(pdf_file):
-    logging.info("Extracting text from PDF...")
-    doc = fitz.open(pdf_file)
-    text = ""
-    for page in doc:
-        text += page.get_text()
-    return text
-# Function to process text in chunks for better performance
-def process_chunks(text, model, chunk_size=500):
-    chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
-    results = []
-    with ThreadPoolExecutor() as executor:
-        results = list(executor.map(lambda chunk: model(chunk, max_length=200), chunks))
-    return " ".join([result[0]["translation_text"] for result in results])
-# Main app logic
 def main():
-    setup_logging()
     st.title("Lab Report Analyzer")
-    st.write("Upload a file (Image, PDF, or Text) to analyze and summarize the lab report in English, Hindi, and Urdu.")
-    translator_hi, translator_ur, summarizer = load_models()
-    file = st.file_uploader("Upload a file (Image, PDF, or Text):", type=["jpg", "png", "jpeg", "pdf", "txt"])
-    if file:
-        text = ""
-        try:
-            if file.type in ["image/jpeg", "image/png", "image/jpg"]:
-                image = Image.open(file)
-                text = extract_text_from_image(image)
-            elif file.type == "application/pdf":
-                text = extract_text_from_pdf(file)
-            elif file.type == "text/plain":
-                text = file.read().decode("utf-8")
-            if text:
-                with st.spinner("Analyzing the report..."):
-                    # Generate summary
-                    summary = summarizer(text, max_length=130, min_length=30)[0]["summary_text"]
-                    # Generate translations
-                    hindi_translation = process_chunks(text, translator_hi)
-                    urdu_translation = process_chunks(text, translator_ur)
-                    # Display results
-                    st.subheader("Analysis Summary (English):")
-                    st.write(summary)
-                    st.subheader("Hindi Translation:")
-                    st.write(hindi_translation)
-                    st.subheader("Urdu Translation:")
-                    st.write(urdu_translation)
-            else:
-                st.warning("No text could be extracted. Please check the file and try again.")
-        except Exception as e:
-            logging.error(f"Error processing the file: {e}")
-            st.error("An error occurred while processing the file. Please try again.")
-    else:
-        st.info("Please upload a file to begin.")
 if __name__ == "__main__":
     main()

+import logging
 import pytesseract
 from PIL import Image
+import os
+import streamlit as st
+# Configure logging to display debug information
+logging.basicConfig(level=logging.DEBUG)
+# Function to extract text from an image
 def extract_text_from_image(image):
+    try:
+        logging.info("Starting text extraction from image...")
+        # Verify the image is not corrupted
+        image.verify()  # Verifies the image is not corrupted
+        logging.info("Image opened and verified successfully.")
+        # Resize the image to improve performance (optional)
+        image = image.resize((image.width // 2, image.height // 2))  # Resize image to 50% of the original size
+        # Extract text using pytesseract
+        text = pytesseract.image_to_string(image)
+        logging.info("Text extraction completed successfully.")
+        return text
+    except Exception as e:
+        logging.error(f"An error occurred while processing the image: {str(e)}")
+        return f"Error: {str(e)}"
+# Streamlit web application
 def main():
     st.title("Lab Report Analyzer")
+    st.markdown("Upload an image file to extract text from it.")
+    # File uploader widget
+    uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"])
+    if uploaded_file is not None:
+        # Save the uploaded file temporarily
+        with open("temp_image.jpg", "wb") as f:
+            f.write(uploaded_file.getbuffer())
+        # Open the image file
+        image = Image.open("temp_image.jpg")
+        # Extract text from the uploaded image
+        extracted_text = extract_text_from_image(image)
+        # Display extracted text
+        st.subheader("Extracted Text")
+        st.text(extracted_text)
+        # Optionally, delete the temporary file after processing
+        os.remove("temp_image.jpg")
 if __name__ == "__main__":
     main()