mdasad3617 commited on
Commit
8cf2395
·
verified ·
1 Parent(s): 4f48910

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -59
app.py CHANGED
@@ -1,65 +1,116 @@
1
  import streamlit as st
2
- from models import initialize_models
3
- from models.pdf_handler import parse_pdf
4
- from models.image_handler import analyze_image
5
- from models.summarizer import summarize_text
6
- from models.translator import translate_text
7
- from models.problem_checker import flag_lab_problems
8
  from PIL import Image
 
 
9
 
10
  # Initialize Models
11
- models = initialize_models()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  # Streamlit App
14
- st.title("Lab Test Analyzer Dashboard")
15
-
16
- # File Upload
17
- uploaded_file = st.file_uploader("Upload a Lab Report (PDF, Image, or Text)", type=["pdf", "png", "jpg", "jpeg", "txt"])
18
-
19
- if uploaded_file:
20
- file_type = uploaded_file.name.split(".")[-1].lower()
21
-
22
- # Extract Text Based on File Type
23
- extracted_text = ""
24
- if file_type == "pdf":
25
- st.write("Processing PDF file...")
26
- extracted_text = parse_pdf(uploaded_file)
27
- elif file_type in ["png", "jpg", "jpeg"]:
28
- st.write("Processing Image file...")
29
- image = Image.open(uploaded_file)
30
- extracted_text = analyze_image(image, models["image_model"])
31
- elif file_type == "txt":
32
- st.write("Processing Text file...")
33
- extracted_text = uploaded_file.read().decode("utf-8")
34
- else:
35
- st.error("Unsupported file type.")
36
-
37
- # Display Extracted Text
38
- if extracted_text:
39
- st.subheader("Extracted Content")
40
- st.text_area("Extracted Text", extracted_text, height=200)
41
-
42
- # Summarization
43
- summary = summarize_text(extracted_text, models["summarize_model"])
44
- st.subheader("Summary of the Report")
45
- st.text_area("Summary", summary, height=150)
46
-
47
- # Sentiment Analysis
48
- st.subheader("Sentiment Analysis")
49
- sentiment, confidence = analyze_sentiment(extracted_text, models["sentiment_model"])
50
- st.write(f"**Sentiment**: {sentiment} (Confidence: {confidence:.2f})")
51
-
52
- # Problem Detection
53
- problems = flag_lab_problems(summary)
54
- st.subheader("Detected Problems")
55
- st.write(problems)
56
-
57
- # Translation
58
- st.subheader("Translations")
59
- translations = translate_content(summary, models["translation_model"])
60
- st.write("**English**: ", translations["English"])
61
- st.write("**Hindi**: ", translations["Hindi"])
62
- st.write("**Urdu**: ", translations["Urdu"])
63
-
64
- else:
65
- st.error("Could not extract text from the uploaded file.")
 
 
 
 
 
1
  import streamlit as st
2
+ from transformers import pipeline
3
+ import pdfplumber
 
 
 
 
4
  from PIL import Image
5
+ import pytesseract
6
+ from langdetect import detect
7
 
8
  # Initialize Models
9
+ @st.cache_resource
10
+ def initialize_models():
11
+ return {
12
+ "report_check_model": pipeline("text-classification", model="facebook/bart-large-mnli"),
13
+ "sentiment_model": pipeline("sentiment-analysis"),
14
+ "summarize_model": pipeline("summarization", model="facebook/bart-large-cnn"),
15
+ "translation_model": {
16
+ "en": pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en"),
17
+ "hi": pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi"),
18
+ "ur": pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur")
19
+ }
20
+ }
21
+
22
+ # Extract text from PDF
23
+ def extract_text_from_pdf(pdf_file):
24
+ text = ""
25
+ with pdfplumber.open(pdf_file) as pdf:
26
+ for page in pdf.pages:
27
+ text += page.extract_text()
28
+ return text.strip()
29
+
30
+ # Extract text from Image
31
+ def extract_text_from_image(image_file):
32
+ image = Image.open(image_file)
33
+ text = pytesseract.image_to_string(image)
34
+ return text.strip()
35
+
36
+ # Check if content is a lab report
37
+ def is_lab_report(text, model):
38
+ result = model(text, candidate_labels=["lab report", "not lab report"])
39
+ return result["labels"][0] == "lab report"
40
+
41
+ # Analyze sentiment
42
+ def analyze_sentiment(text, sentiment_model):
43
+ result = sentiment_model(text)[0]
44
+ sentiment = "Positive" if result["label"] == "POSITIVE" else "Negative"
45
+ return sentiment, result["score"]
46
+
47
+ # Summarize content
48
+ def summarize_content(text, summarize_model):
49
+ summary = summarize_model(text, max_length=130, min_length=30, do_sample=False)
50
+ return summary[0]['summary_text']
51
+
52
+ # Translate content
53
+ def translate_content(text, translation_models):
54
+ return {
55
+ "English": text,
56
+ "Hindi": translation_models["hi"](text)[0]["translation_text"],
57
+ "Urdu": translation_models["ur"](text)[0]["translation_text"]
58
+ }
59
 
60
  # Streamlit App
61
+ def main():
62
+ st.title("Lab Test Analyzer")
63
+
64
+ models = initialize_models()
65
+
66
+ uploaded_file = st.file_uploader("Upload a Lab Report (PDF, Image, or Text)", type=["pdf", "png", "jpg", "jpeg", "txt"])
67
+
68
+ if uploaded_file:
69
+ file_type = uploaded_file.name.split(".")[-1].lower()
70
+ extracted_text = ""
71
+
72
+ if file_type == "pdf":
73
+ st.write("Processing PDF file...")
74
+ extracted_text = extract_text_from_pdf(uploaded_file)
75
+ elif file_type in ["png", "jpg", "jpeg"]:
76
+ st.write("Processing Image file...")
77
+ extracted_text = extract_text_from_image(uploaded_file)
78
+ elif file_type == "txt":
79
+ st.write("Processing Text file...")
80
+ extracted_text = uploaded_file.read().decode("utf-8")
81
+ else:
82
+ st.error("Unsupported file type.")
83
+
84
+ if extracted_text:
85
+ st.subheader("Extracted Content")
86
+ st.text_area("Extracted Text", extracted_text, height=200)
87
+
88
+ # Check if it's a lab report
89
+ if not is_lab_report(extracted_text, models["report_check_model"]):
90
+ st.error("The uploaded file does not appear to be a lab report.")
91
+ return
92
+
93
+ st.success("The uploaded file is a valid lab report.")
94
+
95
+ # Sentiment Analysis
96
+ sentiment, confidence = analyze_sentiment(extracted_text, models["sentiment_model"])
97
+ st.subheader("Sentiment Analysis")
98
+ st.write(f"**Sentiment**: {sentiment} (Confidence: {confidence:.2f})")
99
+
100
+ # Summarization
101
+ summary = summarize_content(extracted_text, models["summarize_model"])
102
+ st.subheader("Summary")
103
+ st.text_area("Summary", summary, height=150)
104
+
105
+ # Translation
106
+ translations = translate_content(summary, models["translation_model"])
107
+ st.subheader("Translations")
108
+ st.write("**English**: ", translations["English"])
109
+ st.write("**Hindi**: ", translations["Hindi"])
110
+ st.write("**Urdu**: ", translations["Urdu"])
111
+
112
+ else:
113
+ st.error("Could not extract text from the uploaded file.")
114
+
115
+ if __name__ == "__main__":
116
+ main()