Spaces:

mdasad3617
/

lab-report-analyzer

Running

App Files Files Community

mdasad3617 commited on Dec 1, 2024

Commit

b8905fc

verified ·

1 Parent(s): f9aa9e9

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -6

app.py CHANGED Viewed

@@ -3,13 +3,12 @@ from transformers import pipeline
 import pdfplumber
 from PIL import Image
 import easyocr
-from langdetect import detect
 # Initialize Models
 @st.cache_resource
 def initialize_models():
     return {
-        "report_check_model": pipeline("text-classification", model="facebook/bart-large-mnli"),
         "sentiment_model": pipeline("sentiment-analysis"),
         "summarize_model": pipeline("summarization", model="facebook/bart-large-cnn"),
         "translation_model": {
@@ -34,6 +33,10 @@ def extract_text_from_image(image_file):
     result = reader.readtext(image, detail=0)  # `detail=0` returns only the text
     return " ".join(result).strip()
 # Check if content is a lab report
 def is_lab_report(text, model):
     result = model(text, candidate_labels=["lab report", "not lab report"])
@@ -81,25 +84,29 @@ def main():
             extracted_text = uploaded_file.read().decode("utf-8")
         else:
             st.error("Unsupported file type.")
         if extracted_text:
             st.subheader("Extracted Content")
             st.text_area("Extracted Text", extracted_text, height=200)
             # Check if it's a lab report
-            if not is_lab_report(extracted_text, models["report_check_model"]):
                 st.error("The uploaded file does not appear to be a lab report.")
                 return
             st.success("The uploaded file is a valid lab report.")
             # Sentiment Analysis
-            sentiment, confidence = analyze_sentiment(extracted_text, models["sentiment_model"])
             st.subheader("Sentiment Analysis")
             st.write(f"**Sentiment**: {sentiment} (Confidence: {confidence:.2f})")
             # Summarization
-            summary = summarize_content(extracted_text, models["summarize_model"])
             st.subheader("Summary")
             st.text_area("Summary", summary, height=150)
@@ -114,4 +121,4 @@ def main():
             st.error("Could not extract text from the uploaded file.")
 if __name__ == "__main__":
-    main()

 import pdfplumber
 from PIL import Image
 import easyocr
 # Initialize Models
 @st.cache_resource
 def initialize_models():
     return {
+        "report_check_model": pipeline("zero-shot-classification", model="facebook/bart-large-mnli"),
         "sentiment_model": pipeline("sentiment-analysis"),
         "summarize_model": pipeline("summarization", model="facebook/bart-large-cnn"),
         "translation_model": {
     result = reader.readtext(image, detail=0)  # `detail=0` returns only the text
     return " ".join(result).strip()
+# Preprocess text for model input
+def preprocess_text(text, max_length=1024):
+    return text[:max_length] if len(text) > max_length else text
 # Check if content is a lab report
 def is_lab_report(text, model):
     result = model(text, candidate_labels=["lab report", "not lab report"])
             extracted_text = uploaded_file.read().decode("utf-8")
         else:
             st.error("Unsupported file type.")
+            return
         if extracted_text:
             st.subheader("Extracted Content")
             st.text_area("Extracted Text", extracted_text, height=200)
+            # Preprocess text
+            preprocessed_text = preprocess_text(extracted_text)
             # Check if it's a lab report
+            if not is_lab_report(preprocessed_text, models["report_check_model"]):
                 st.error("The uploaded file does not appear to be a lab report.")
                 return
             st.success("The uploaded file is a valid lab report.")
             # Sentiment Analysis
+            sentiment, confidence = analyze_sentiment(preprocessed_text, models["sentiment_model"])
             st.subheader("Sentiment Analysis")
             st.write(f"**Sentiment**: {sentiment} (Confidence: {confidence:.2f})")
             # Summarization
+            summary = summarize_content(preprocessed_text, models["summarize_model"])
             st.subheader("Summary")
             st.text_area("Summary", summary, height=150)
             st.error("Could not extract text from the uploaded file.")
 if __name__ == "__main__":
+    main()