Spaces:
Running
Running
mdasad3617
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -3,13 +3,12 @@ from transformers import pipeline
|
|
3 |
import pdfplumber
|
4 |
from PIL import Image
|
5 |
import easyocr
|
6 |
-
from langdetect import detect
|
7 |
|
8 |
# Initialize Models
|
9 |
@st.cache_resource
|
10 |
def initialize_models():
|
11 |
return {
|
12 |
-
"report_check_model": pipeline("
|
13 |
"sentiment_model": pipeline("sentiment-analysis"),
|
14 |
"summarize_model": pipeline("summarization", model="facebook/bart-large-cnn"),
|
15 |
"translation_model": {
|
@@ -34,6 +33,10 @@ def extract_text_from_image(image_file):
|
|
34 |
result = reader.readtext(image, detail=0) # `detail=0` returns only the text
|
35 |
return " ".join(result).strip()
|
36 |
|
|
|
|
|
|
|
|
|
37 |
# Check if content is a lab report
|
38 |
def is_lab_report(text, model):
|
39 |
result = model(text, candidate_labels=["lab report", "not lab report"])
|
@@ -81,25 +84,29 @@ def main():
|
|
81 |
extracted_text = uploaded_file.read().decode("utf-8")
|
82 |
else:
|
83 |
st.error("Unsupported file type.")
|
|
|
84 |
|
85 |
if extracted_text:
|
86 |
st.subheader("Extracted Content")
|
87 |
st.text_area("Extracted Text", extracted_text, height=200)
|
88 |
|
|
|
|
|
|
|
89 |
# Check if it's a lab report
|
90 |
-
if not is_lab_report(
|
91 |
st.error("The uploaded file does not appear to be a lab report.")
|
92 |
return
|
93 |
|
94 |
st.success("The uploaded file is a valid lab report.")
|
95 |
|
96 |
# Sentiment Analysis
|
97 |
-
sentiment, confidence = analyze_sentiment(
|
98 |
st.subheader("Sentiment Analysis")
|
99 |
st.write(f"**Sentiment**: {sentiment} (Confidence: {confidence:.2f})")
|
100 |
|
101 |
# Summarization
|
102 |
-
summary = summarize_content(
|
103 |
st.subheader("Summary")
|
104 |
st.text_area("Summary", summary, height=150)
|
105 |
|
@@ -114,4 +121,4 @@ def main():
|
|
114 |
st.error("Could not extract text from the uploaded file.")
|
115 |
|
116 |
if __name__ == "__main__":
|
117 |
-
main()
|
|
|
3 |
import pdfplumber
|
4 |
from PIL import Image
|
5 |
import easyocr
|
|
|
6 |
|
7 |
# Initialize Models
|
8 |
@st.cache_resource
|
9 |
def initialize_models():
|
10 |
return {
|
11 |
+
"report_check_model": pipeline("zero-shot-classification", model="facebook/bart-large-mnli"),
|
12 |
"sentiment_model": pipeline("sentiment-analysis"),
|
13 |
"summarize_model": pipeline("summarization", model="facebook/bart-large-cnn"),
|
14 |
"translation_model": {
|
|
|
33 |
result = reader.readtext(image, detail=0) # `detail=0` returns only the text
|
34 |
return " ".join(result).strip()
|
35 |
|
36 |
+
# Preprocess text for model input
|
37 |
+
def preprocess_text(text, max_length=1024):
|
38 |
+
return text[:max_length] if len(text) > max_length else text
|
39 |
+
|
40 |
# Check if content is a lab report
|
41 |
def is_lab_report(text, model):
|
42 |
result = model(text, candidate_labels=["lab report", "not lab report"])
|
|
|
84 |
extracted_text = uploaded_file.read().decode("utf-8")
|
85 |
else:
|
86 |
st.error("Unsupported file type.")
|
87 |
+
return
|
88 |
|
89 |
if extracted_text:
|
90 |
st.subheader("Extracted Content")
|
91 |
st.text_area("Extracted Text", extracted_text, height=200)
|
92 |
|
93 |
+
# Preprocess text
|
94 |
+
preprocessed_text = preprocess_text(extracted_text)
|
95 |
+
|
96 |
# Check if it's a lab report
|
97 |
+
if not is_lab_report(preprocessed_text, models["report_check_model"]):
|
98 |
st.error("The uploaded file does not appear to be a lab report.")
|
99 |
return
|
100 |
|
101 |
st.success("The uploaded file is a valid lab report.")
|
102 |
|
103 |
# Sentiment Analysis
|
104 |
+
sentiment, confidence = analyze_sentiment(preprocessed_text, models["sentiment_model"])
|
105 |
st.subheader("Sentiment Analysis")
|
106 |
st.write(f"**Sentiment**: {sentiment} (Confidence: {confidence:.2f})")
|
107 |
|
108 |
# Summarization
|
109 |
+
summary = summarize_content(preprocessed_text, models["summarize_model"])
|
110 |
st.subheader("Summary")
|
111 |
st.text_area("Summary", summary, height=150)
|
112 |
|
|
|
121 |
st.error("Could not extract text from the uploaded file.")
|
122 |
|
123 |
if __name__ == "__main__":
|
124 |
+
main()
|