Spaces:

mdasad3617
/

lab-report-analyzer

Running

App Files Files Community

mdasad3617 commited on Nov 30, 2024

Commit

fcfc162

verified ·

1 Parent(s): 82535dc

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -78

app.py CHANGED Viewed

@@ -1,105 +1,94 @@
 import streamlit as st
 from transformers import pipeline
-from PIL import Image
 import pytesseract
 import logging
-import PyPDF2
 # Setup logging
 def setup_logging():
     logging.basicConfig(
         level=logging.INFO,
         format="%(asctime)s - %(levelname)s - %(message)s",
-        handlers=[logging.StreamHandler()],
     )
-# Text extraction from image
 def extract_text_from_image(image):
-    try:
-        text = pytesseract.image_to_string(image)
-        return text
-    except Exception as e:
-        logging.error(f"Error during OCR: {e}")
-        return "Error occurred during text extraction."
-# Text extraction from PDF
-def extract_text_from_pdf(file):
-    try:
-        pdf_reader = PyPDF2.PdfReader(file)
-        text = ""
-        for page in pdf_reader.pages:
-            text += page.extract_text()
-        return text
-    except Exception as e:
-        logging.error(f"Error during PDF text extraction: {e}")
-        return "Error occurred during text extraction."
-# Main function
 def main():
     setup_logging()
     st.title("Lab Report Analyzer")
-    st.write("Analyze lab reports from images, PDFs, or text and get summaries in English, Hindi, and Urdu.")
-    # Hugging Face pipelines
-    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")  # Summarization model
-    translator_hi = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi")  # English to Hindi
-    translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur")  # English to Urdu
-    # File upload section
-    uploaded_file = st.file_uploader("Upload a file (Image or PDF):", type=["png", "jpg", "jpeg", "pdf"])
-    text_input = st.text_area("Or paste your text here:")
-    if st.button("Analyze"):
-        extracted_text = ""
-        # Extract text based on file type
-        if uploaded_file:
-            if uploaded_file.name.endswith(".pdf"):
-                st.info("Extracting text from PDF...")
-                extracted_text = extract_text_from_pdf(uploaded_file)
-            else:
-                st.info("Extracting text from image...")
-                image = Image.open(uploaded_file)
-                extracted_text = extract_text_from_image(image)
-        elif text_input:
-            extracted_text = text_input
-        else:
-            st.warning("Please upload a file or enter text.")
-            return
-        # Display extracted text
-        st.subheader("Extracted Text")
-        st.text_area("Extracted Text:", extracted_text, height=200)
-        # Summarize the text
-        try:
-            st.info("Summarizing text...")
-            summary = summarizer(extracted_text, max_length=150, min_length=30, do_sample=False)[0]['summary_text']
-            st.subheader("Summary (English)")
-            st.write(summary)
-        except Exception as e:
-            logging.error(f"Error during summarization: {e}")
-            st.error("An error occurred during summarization.")
-        # Translate summary to Hindi
-        try:
-            st.info("Translating summary to Hindi...")
-            summary_hi = translator_hi(summary)[0]['translation_text']
-            st.subheader("Summary (Hindi)")
-            st.write(summary_hi)
-        except Exception as e:
-            logging.error(f"Error during Hindi translation: {e}")
-            st.error("An error occurred during Hindi translation.")
-        # Translate summary to Urdu
-        try:
-            st.info("Translating summary to Urdu...")
-            summary_ur = translator_ur(summary)[0]['translation_text']
-            st.subheader("Summary (Urdu)")
-            st.write(summary_ur)
         except Exception as e:
-            logging.error(f"Error during Urdu translation: {e}")
-            st.error("An error occurred during Urdu translation.")
 if __name__ == "__main__":
     main()

 import streamlit as st
 from transformers import pipeline
 import pytesseract
+from PIL import Image
+import fitz  # PyMuPDF for PDF processing
 import logging
+from concurrent.futures import ThreadPoolExecutor
 # Setup logging
 def setup_logging():
     logging.basicConfig(
         level=logging.INFO,
         format="%(asctime)s - %(levelname)s - %(message)s",
     )
+# Load models globally for faster performance
+@st.cache_resource
+def load_models():
+    logging.info("Loading Hugging Face models...")
+    translator_hi = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi")
+    translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur")
+    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+    return translator_hi, translator_ur, summarizer
+# Function to extract text from images
 def extract_text_from_image(image):
+    logging.info("Extracting text from image...")
+    return pytesseract.image_to_string(image)
+# Function to extract text from PDFs
+def extract_text_from_pdf(pdf_file):
+    logging.info("Extracting text from PDF...")
+    doc = fitz.open(pdf_file)
+    text = ""
+    for page in doc:
+        text += page.get_text()
+    return text
+# Function to process text in chunks for better performance
+def process_chunks(text, model, chunk_size=500):
+    chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
+    results = []
+    with ThreadPoolExecutor() as executor:
+        results = list(executor.map(lambda chunk: model(chunk, max_length=200), chunks))
+    return " ".join([result[0]["translation_text"] for result in results])
+# Main app logic
 def main():
     setup_logging()
     st.title("Lab Report Analyzer")
+    st.write("Upload a file (Image, PDF, or Text) to analyze and summarize the lab report in English, Hindi, and Urdu.")
+    translator_hi, translator_ur, summarizer = load_models()
+    file = st.file_uploader("Upload a file (Image, PDF, or Text):", type=["jpg", "png", "jpeg", "pdf", "txt"])
+    if file:
+        text = ""
+        try:
+            if file.type in ["image/jpeg", "image/png", "image/jpg"]:
+                image = Image.open(file)
+                text = extract_text_from_image(image)
+            elif file.type == "application/pdf":
+                text = extract_text_from_pdf(file)
+            elif file.type == "text/plain":
+                text = file.read().decode("utf-8")
+            if text:
+                with st.spinner("Analyzing the report..."):
+                    # Generate summary
+                    summary = summarizer(text, max_length=130, min_length=30)[0]["summary_text"]
+                    # Generate translations
+                    hindi_translation = process_chunks(text, translator_hi)
+                    urdu_translation = process_chunks(text, translator_ur)
+                    # Display results
+                    st.subheader("Analysis Summary (English):")
+                    st.write(summary)
+                    st.subheader("Hindi Translation:")
+                    st.write(hindi_translation)
+                    st.subheader("Urdu Translation:")
+                    st.write(urdu_translation)
+            else:
+                st.warning("No text could be extracted. Please check the file and try again.")
         except Exception as e:
+            logging.error(f"Error processing the file: {e}")
+            st.error("An error occurred while processing the file. Please try again.")
+    else:
+        st.info("Please upload a file to begin.")
 if __name__ == "__main__":
     main()