mdasad3617 commited on
Commit
b8905fc
·
verified ·
1 Parent(s): f9aa9e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -6
app.py CHANGED
@@ -3,13 +3,12 @@ from transformers import pipeline
3
  import pdfplumber
4
  from PIL import Image
5
  import easyocr
6
- from langdetect import detect
7
 
8
  # Initialize Models
9
  @st.cache_resource
10
  def initialize_models():
11
  return {
12
- "report_check_model": pipeline("text-classification", model="facebook/bart-large-mnli"),
13
  "sentiment_model": pipeline("sentiment-analysis"),
14
  "summarize_model": pipeline("summarization", model="facebook/bart-large-cnn"),
15
  "translation_model": {
@@ -34,6 +33,10 @@ def extract_text_from_image(image_file):
34
  result = reader.readtext(image, detail=0) # `detail=0` returns only the text
35
  return " ".join(result).strip()
36
 
 
 
 
 
37
  # Check if content is a lab report
38
  def is_lab_report(text, model):
39
  result = model(text, candidate_labels=["lab report", "not lab report"])
@@ -81,25 +84,29 @@ def main():
81
  extracted_text = uploaded_file.read().decode("utf-8")
82
  else:
83
  st.error("Unsupported file type.")
 
84
 
85
  if extracted_text:
86
  st.subheader("Extracted Content")
87
  st.text_area("Extracted Text", extracted_text, height=200)
88
 
 
 
 
89
  # Check if it's a lab report
90
- if not is_lab_report(extracted_text, models["report_check_model"]):
91
  st.error("The uploaded file does not appear to be a lab report.")
92
  return
93
 
94
  st.success("The uploaded file is a valid lab report.")
95
 
96
  # Sentiment Analysis
97
- sentiment, confidence = analyze_sentiment(extracted_text, models["sentiment_model"])
98
  st.subheader("Sentiment Analysis")
99
  st.write(f"**Sentiment**: {sentiment} (Confidence: {confidence:.2f})")
100
 
101
  # Summarization
102
- summary = summarize_content(extracted_text, models["summarize_model"])
103
  st.subheader("Summary")
104
  st.text_area("Summary", summary, height=150)
105
 
@@ -114,4 +121,4 @@ def main():
114
  st.error("Could not extract text from the uploaded file.")
115
 
116
  if __name__ == "__main__":
117
- main()
 
3
  import pdfplumber
4
  from PIL import Image
5
  import easyocr
 
6
 
7
  # Initialize Models
8
  @st.cache_resource
9
  def initialize_models():
10
  return {
11
+ "report_check_model": pipeline("zero-shot-classification", model="facebook/bart-large-mnli"),
12
  "sentiment_model": pipeline("sentiment-analysis"),
13
  "summarize_model": pipeline("summarization", model="facebook/bart-large-cnn"),
14
  "translation_model": {
 
33
  result = reader.readtext(image, detail=0) # `detail=0` returns only the text
34
  return " ".join(result).strip()
35
 
36
+ # Preprocess text for model input
37
+ def preprocess_text(text, max_length=1024):
38
+ return text[:max_length] if len(text) > max_length else text
39
+
40
  # Check if content is a lab report
41
  def is_lab_report(text, model):
42
  result = model(text, candidate_labels=["lab report", "not lab report"])
 
84
  extracted_text = uploaded_file.read().decode("utf-8")
85
  else:
86
  st.error("Unsupported file type.")
87
+ return
88
 
89
  if extracted_text:
90
  st.subheader("Extracted Content")
91
  st.text_area("Extracted Text", extracted_text, height=200)
92
 
93
+ # Preprocess text
94
+ preprocessed_text = preprocess_text(extracted_text)
95
+
96
  # Check if it's a lab report
97
+ if not is_lab_report(preprocessed_text, models["report_check_model"]):
98
  st.error("The uploaded file does not appear to be a lab report.")
99
  return
100
 
101
  st.success("The uploaded file is a valid lab report.")
102
 
103
  # Sentiment Analysis
104
+ sentiment, confidence = analyze_sentiment(preprocessed_text, models["sentiment_model"])
105
  st.subheader("Sentiment Analysis")
106
  st.write(f"**Sentiment**: {sentiment} (Confidence: {confidence:.2f})")
107
 
108
  # Summarization
109
+ summary = summarize_content(preprocessed_text, models["summarize_model"])
110
  st.subheader("Summary")
111
  st.text_area("Summary", summary, height=150)
112
 
 
121
  st.error("Could not extract text from the uploaded file.")
122
 
123
  if __name__ == "__main__":
124
+ main()