Spaces:

mdasad3617
/

lab-report-analyzer

Sleeping

App Files Files Community

mdasad3617 commited on Nov 30, 2024

Commit

8b18b7b

verified ·

1 Parent(s): ca69a0e

Update app.py

Browse files

Files changed (1) hide show

app.py +90 -43

app.py CHANGED Viewed

@@ -1,60 +1,107 @@
-import logging
-import pytesseract
-from PIL import Image
-import os
 import streamlit as st
-# Configure logging to display debug information
-logging.basicConfig(level=logging.DEBUG)
-# Function to extract text from an image
-def extract_text_from_image(image):
-    try:
-        logging.info("Starting text extraction from image...")
-        # Verify the image is not corrupted
-        image.verify()  # Verifies the image is not corrupted
-        logging.info("Image opened and verified successfully.")
-        # Resize the image to improve performance (optional)
-        image = image.resize((image.width // 2, image.height // 2))  # Resize image to 50% of the original size
-        # Extract text using pytesseract
-        text = pytesseract.image_to_string(image)
-        logging.info("Text extraction completed successfully.")
-        return text
-    except Exception as e:
-        logging.error(f"An error occurred while processing the image: {str(e)}")
-        return f"Error: {str(e)}"
-# Streamlit web application
 def main():
     st.title("Lab Report Analyzer")
-    st.markdown("Upload an image file to extract text from it.")
-    # File uploader widget
-    uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"])
-    if uploaded_file is not None:
-        # Save the uploaded file temporarily
-        with open("temp_image.jpg", "wb") as f:
-            f.write(uploaded_file.getbuffer())
-        # Open the image file
-        image = Image.open("temp_image.jpg")
-        # Extract text from the uploaded image
-        extracted_text = extract_text_from_image(image)
-        # Display extracted text
-        st.subheader("Extracted Text")
-        st.text(extracted_text)
-        # Optionally, delete the temporary file after processing
-        os.remove("temp_image.jpg")
 if __name__ == "__main__":
     main()

 import streamlit as st
+from transformers import pipeline, VisionEncoderDecoderModel, ViTImageProcessor
+from PIL import Image
+import fitz
+import logging
+from concurrent.futures import ThreadPoolExecutor
+import torch
+# Setup logging
+def setup_logging():
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s - %(levelname)s - %(message)s",
+    )
+# Load models globally for faster performance
+@st.cache_resource
+def load_models():
+    logging.info("Loading Hugging Face models...")
+    # Load image-to-text model from Hugging Face
+    processor = ViTImageProcessor.from_pretrained("microsoft/vision-transformation-transformer")
+    model = VisionEncoderDecoderModel.from_pretrained("microsoft/vision-transformation-transformer")
+    # Load translation models
+    translator_hi = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi")
+    translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur")
+    # Summarization model
+    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+    return processor, model, translator_hi, translator_ur, summarizer
+# Function to extract text from images using Hugging Face model
+def extract_text_from_image(image, processor, model):
+    logging.info("Extracting text from image...")
+    # Preprocess image
+    inputs = processor(images=image, return_tensors="pt")
+    # Use the model to generate captions
+    out = model.generate(**inputs)
+    return processor.decode(out[0], skip_special_tokens=True)
+# Function to extract text from PDFs
+def extract_text_from_pdf(pdf_file):
+    logging.info("Extracting text from PDF...")
+    doc = fitz.open(pdf_file)
+    text = ""
+    for page in doc:
+        text += page.get_text()
+    return text
+# Function to process text in chunks for better performance
+def process_chunks(text, model, chunk_size=500):
+    chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
+    results = []
+    with ThreadPoolExecutor() as executor:
+        results = list(executor.map(lambda chunk: model(chunk, max_length=200), chunks))
+    return " ".join([result[0]["translation_text"] for result in results])
+# Main app logic
 def main():
+    setup_logging()
     st.title("Lab Report Analyzer")
+    st.write("Upload a file (Image, PDF, or Text) to analyze and summarize the lab report in English, Hindi, and Urdu.")
+    # Load models
+    processor, model, translator_hi, translator_ur, summarizer = load_models()
+    file = st.file_uploader("Upload a file (Image, PDF, or Text):", type=["jpg", "png", "jpeg", "pdf", "txt"])
+    if file:
+        text = ""
+        try:
+            if file.type in ["image/jpeg", "image/png", "image/jpg"]:
+                image = Image.open(file)
+                text = extract_text_from_image(image, processor, model)
+            elif file.type == "application/pdf":
+                text = extract_text_from_pdf(file)
+            elif file.type == "text/plain":
+                text = file.read().decode("utf-8")
+            if text:
+                with st.spinner("Analyzing the report..."):
+                    # Generate summary
+                    summary = summarizer(text, max_length=130, min_length=30)[0]["summary_text"]
+                    # Generate translations
+                    hindi_translation = process_chunks(text, translator_hi)
+                    urdu_translation = process_chunks(text, translator_ur)
+                    # Display results
+                    st.subheader("Analysis Summary (English):")
+                    st.write(summary)
+                    st.subheader("Hindi Translation:")
+                    st.write(hindi_translation)
+                    st.subheader("Urdu Translation:")
+                    st.write(urdu_translation)
+            else:
+                st.warning("No text could be extracted. Please check the file and try again.")
+        except Exception as e:
+            logging.error(f"Error processing the file: {e}")
+            st.error("An error occurred while processing the file. Please try again.")
+    else:
+        st.info("Please upload a file to begin.")
 if __name__ == "__main__":
     main()