Spaces:

mdasad3617
/

lab-report-analyzer

Running

App Files Files Community

mdasad3617 commited on Dec 1, 2024

Commit

8cf2395

verified ·

1 Parent(s): 4f48910

Update app.py

Browse files

Files changed (1) hide show

app.py +110 -59

app.py CHANGED Viewed

@@ -1,65 +1,116 @@
 import streamlit as st
-from models import initialize_models
-from models.pdf_handler import parse_pdf
-from models.image_handler import analyze_image
-from models.summarizer import summarize_text
-from models.translator import translate_text
-from models.problem_checker import flag_lab_problems
 from PIL import Image
 # Initialize Models
-models = initialize_models()
 # Streamlit App
-st.title("Lab Test Analyzer Dashboard")
-# File Upload
-uploaded_file = st.file_uploader("Upload a Lab Report (PDF, Image, or Text)", type=["pdf", "png", "jpg", "jpeg", "txt"])
-if uploaded_file:
-    file_type = uploaded_file.name.split(".")[-1].lower()
-    # Extract Text Based on File Type
-    extracted_text = ""
-    if file_type == "pdf":
-        st.write("Processing PDF file...")
-        extracted_text = parse_pdf(uploaded_file)
-    elif file_type in ["png", "jpg", "jpeg"]:
-        st.write("Processing Image file...")
-        image = Image.open(uploaded_file)
-        extracted_text = analyze_image(image, models["image_model"])
-    elif file_type == "txt":
-        st.write("Processing Text file...")
-        extracted_text = uploaded_file.read().decode("utf-8")
-    else:
-        st.error("Unsupported file type.")
-    # Display Extracted Text
-    if extracted_text:
-        st.subheader("Extracted Content")
-        st.text_area("Extracted Text", extracted_text, height=200)
-        # Summarization
-        summary = summarize_text(extracted_text, models["summarize_model"])
-        st.subheader("Summary of the Report")
-        st.text_area("Summary", summary, height=150)
-        # Sentiment Analysis
-        st.subheader("Sentiment Analysis")
-        sentiment, confidence = analyze_sentiment(extracted_text, models["sentiment_model"])
-        st.write(f"**Sentiment**: {sentiment} (Confidence: {confidence:.2f})")
-        # Problem Detection
-        problems = flag_lab_problems(summary)
-        st.subheader("Detected Problems")
-        st.write(problems)
-        # Translation
-        st.subheader("Translations")
-        translations = translate_content(summary, models["translation_model"])
-        st.write("**English**: ", translations["English"])
-        st.write("**Hindi**: ", translations["Hindi"])
-        st.write("**Urdu**: ", translations["Urdu"])
-    else:
-        st.error("Could not extract text from the uploaded file.")

 import streamlit as st
+from transformers import pipeline
+import pdfplumber
 from PIL import Image
+import pytesseract
+from langdetect import detect
 # Initialize Models
+@st.cache_resource
+def initialize_models():
+    return {
+        "report_check_model": pipeline("text-classification", model="facebook/bart-large-mnli"),
+        "sentiment_model": pipeline("sentiment-analysis"),
+        "summarize_model": pipeline("summarization", model="facebook/bart-large-cnn"),
+        "translation_model": {
+            "en": pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en"),
+            "hi": pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi"),
+            "ur": pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur")
+        }
+    }
+# Extract text from PDF
+def extract_text_from_pdf(pdf_file):
+    text = ""
+    with pdfplumber.open(pdf_file) as pdf:
+        for page in pdf.pages:
+            text += page.extract_text()
+    return text.strip()
+# Extract text from Image
+def extract_text_from_image(image_file):
+    image = Image.open(image_file)
+    text = pytesseract.image_to_string(image)
+    return text.strip()
+# Check if content is a lab report
+def is_lab_report(text, model):
+    result = model(text, candidate_labels=["lab report", "not lab report"])
+    return result["labels"][0] == "lab report"
+# Analyze sentiment
+def analyze_sentiment(text, sentiment_model):
+    result = sentiment_model(text)[0]
+    sentiment = "Positive" if result["label"] == "POSITIVE" else "Negative"
+    return sentiment, result["score"]
+# Summarize content
+def summarize_content(text, summarize_model):
+    summary = summarize_model(text, max_length=130, min_length=30, do_sample=False)
+    return summary[0]['summary_text']
+# Translate content
+def translate_content(text, translation_models):
+    return {
+        "English": text,
+        "Hindi": translation_models["hi"](text)[0]["translation_text"],
+        "Urdu": translation_models["ur"](text)[0]["translation_text"]
+    }
 # Streamlit App
+def main():
+    st.title("Lab Test Analyzer")
+    models = initialize_models()
+    uploaded_file = st.file_uploader("Upload a Lab Report (PDF, Image, or Text)", type=["pdf", "png", "jpg", "jpeg", "txt"])
+    if uploaded_file:
+        file_type = uploaded_file.name.split(".")[-1].lower()
+        extracted_text = ""
+        if file_type == "pdf":
+            st.write("Processing PDF file...")
+            extracted_text = extract_text_from_pdf(uploaded_file)
+        elif file_type in ["png", "jpg", "jpeg"]:
+            st.write("Processing Image file...")
+            extracted_text = extract_text_from_image(uploaded_file)
+        elif file_type == "txt":
+            st.write("Processing Text file...")
+            extracted_text = uploaded_file.read().decode("utf-8")
+        else:
+            st.error("Unsupported file type.")
+        if extracted_text:
+            st.subheader("Extracted Content")
+            st.text_area("Extracted Text", extracted_text, height=200)
+            # Check if it's a lab report
+            if not is_lab_report(extracted_text, models["report_check_model"]):
+                st.error("The uploaded file does not appear to be a lab report.")
+                return
+            st.success("The uploaded file is a valid lab report.")
+            # Sentiment Analysis
+            sentiment, confidence = analyze_sentiment(extracted_text, models["sentiment_model"])
+            st.subheader("Sentiment Analysis")
+            st.write(f"**Sentiment**: {sentiment} (Confidence: {confidence:.2f})")
+            # Summarization
+            summary = summarize_content(extracted_text, models["summarize_model"])
+            st.subheader("Summary")
+            st.text_area("Summary", summary, height=150)
+            # Translation
+            translations = translate_content(summary, models["translation_model"])
+            st.subheader("Translations")
+            st.write("**English**: ", translations["English"])
+            st.write("**Hindi**: ", translations["Hindi"])
+            st.write("**Urdu**: ", translations["Urdu"])
+        else:
+            st.error("Could not extract text from the uploaded file.")
+if __name__ == "__main__":
+    main()