Spaces:
Sleeping
Sleeping
Delete app.py
Browse files
app.py
DELETED
@@ -1,133 +0,0 @@
|
|
1 |
-
from transformers import AutoTokenizer, TFAutoModel
|
2 |
-
import tensorflow as tf
|
3 |
-
#from keras.preprocessing.sequence import pad_sequences
|
4 |
-
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
5 |
-
import pickle
|
6 |
-
import numpy as np
|
7 |
-
from keras.models import load_model
|
8 |
-
import streamlit as st
|
9 |
-
import io
|
10 |
-
import PyPDF2
|
11 |
-
import re
|
12 |
-
|
13 |
-
|
14 |
-
def read_uploaded_file(uploaded_file):
|
15 |
-
content = None
|
16 |
-
if uploaded_file is not None:
|
17 |
-
content_type = uploaded_file.type
|
18 |
-
if content_type == 'application/pdf':
|
19 |
-
content = read_pdf_file(uploaded_file)
|
20 |
-
elif content_type == 'text/plain':
|
21 |
-
content = read_text_file(uploaded_file)
|
22 |
-
return content
|
23 |
-
|
24 |
-
def read_pdf_file(uploaded_file):
|
25 |
-
with io.BytesIO(uploaded_file.read()) as f:
|
26 |
-
pdf_reader = PyPDF2.PdfReader(f)
|
27 |
-
text = ''
|
28 |
-
for page_num in range(len(pdf_reader.pages)):
|
29 |
-
page = pdf_reader.pages[page_num]
|
30 |
-
text += page.extract_text()
|
31 |
-
return text
|
32 |
-
|
33 |
-
def read_text_file(uploaded_file):
|
34 |
-
with io.StringIO(uploaded_file.read().decode()) as f:
|
35 |
-
text = f.read()
|
36 |
-
return text
|
37 |
-
|
38 |
-
|
39 |
-
def preprocess(text):
|
40 |
-
# Define a regular expression pattern for URLs, non-alphabetic characters, and user names
|
41 |
-
pattern = re.compile(r'https?://\S+|[^0-9A-Za-z\' t]|@\w+')
|
42 |
-
# Use the regular expression to find all URLs, non-alphabetic characters, and user names in the text
|
43 |
-
matches = pattern.findall(text)
|
44 |
-
#Replace the URLs, non-alphabetic characters, and user names with an empty string
|
45 |
-
for match in matches:
|
46 |
-
text = text.replace(match, ' ')
|
47 |
-
return text
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
def predict(new_data):
|
52 |
-
#Load the trained model
|
53 |
-
|
54 |
-
X_tokens = [tokenizer.encode(text, add_special_tokens=True) for text in new_data.split()]
|
55 |
-
X_padded = pad_sequences(X_tokens, maxlen=22, dtype='long', truncating='post', padding='post')
|
56 |
-
X_tensor = tf.convert_to_tensor(X_padded)
|
57 |
-
X_embeddings = biobert_model(X_tensor)[0]
|
58 |
-
pred=model.predict(X_embeddings)
|
59 |
-
predicted_labels = list(le.inverse_transform(np.argmax(pred, axis=1)))
|
60 |
-
text=new_data.split()
|
61 |
-
prev_label=" "
|
62 |
-
data=[]
|
63 |
-
labels=[]
|
64 |
-
for i,(word,label) in enumerate(zip(text,predicted_labels)):
|
65 |
-
if label!="Other":
|
66 |
-
label=label.split('-')[1]
|
67 |
-
if prev_label==label:
|
68 |
-
data[-1]=data[-1]+" "+word
|
69 |
-
else:
|
70 |
-
data.append(word)
|
71 |
-
labels.append(label)
|
72 |
-
prev_label=label
|
73 |
-
return(data,labels)
|
74 |
-
|
75 |
-
def highlight(sentence):
|
76 |
-
highlighted_text = ""
|
77 |
-
entity_colors = {"Symptom":"#87cefa","Medical Condition":"#ffb6c1"}
|
78 |
-
words, labels = predict(sentence)
|
79 |
-
for words, label in zip(words, labels):
|
80 |
-
if label!="Other" and words!="a":
|
81 |
-
if label in ["Medical Condition","Symptom"]:
|
82 |
-
word_color = entity_colors.get(label, "yellow")
|
83 |
-
label_color = entity_colors.get(label + '-label', "<b>black</b>")
|
84 |
-
highlighted_text += f'<mark style="background-color: {word_color}; color: {label_color}; padding: 0 0.25rem; border-radius: 0.25rem; border: 2px solid {word_color}; border-bottom-width: 1px">{words}<sup style="background-color: white; color: black; border: 1px solid black; border-radius: 2px; padding: 0 0.15rem; font-size: 70%; margin-left: 0.15rem; font-weight: bold;">{label}</sup></mark> '
|
85 |
-
else:
|
86 |
-
highlighted_text += f'{words} '
|
87 |
-
else:
|
88 |
-
highlighted_text += f'{words} '
|
89 |
-
st.markdown(highlighted_text, unsafe_allow_html=True)
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
# Create a LabelEncoder object
|
96 |
-
with open("labelencoder1.pkl", 'rb') as f:
|
97 |
-
le = pickle.load(f)
|
98 |
-
model= tf.keras.models.load_model("biobert-rnn1.h5")
|
99 |
-
tokenizer = AutoTokenizer.from_pretrained("dmis-lab/biobert-base-cased-v1.1")
|
100 |
-
biobert_model = TFAutoModel.from_pretrained("dmis-lab/biobert-base-cased-v1.1", from_pt=True)
|
101 |
-
|
102 |
-
st.title('Named Entity Recognition')
|
103 |
-
sentence = st.text_input('Enter a sentence:')
|
104 |
-
|
105 |
-
st.write("OR")
|
106 |
-
uploaded_file = st.file_uploader("Upload a file")
|
107 |
-
|
108 |
-
if uploaded_file is not None:
|
109 |
-
# Do something with the file
|
110 |
-
st.write("File uploaded!")
|
111 |
-
|
112 |
-
st.write("OR")
|
113 |
-
selected_options = st.selectbox(
|
114 |
-
'Choose a text from dropdown: ',
|
115 |
-
(" ",
|
116 |
-
'Anemia and gingival bleeding are connected in that anemia can be a contributing cause to the occurrence of gingival bleeding . Anemia is a condition characterized by a shortage in the number or quality of red blood cells, which can lead to a reduced ability of the blood to carry oxygen throughout the body.',
|
117 |
-
'Hemophilia is a genetic illness that mainly affects the blood ability to clot properly. Individuals with significant hemophilia are at an elevated possibility of experiencing unforeseen bleeding episodes, which can occur in various parts of the body, including the mouth. Gingival bleeding can be a sign of hemophilia and can present as gum bleeding or mouth sores.',
|
118 |
-
"Von Willebrand disease VWD is a genetic condition that impairs the blood's ability to clot properly. One of the symptoms of VWD is spontaneous gingival bleeding , which can occur without any apparent cause or trauma")) # set default to None
|
119 |
-
|
120 |
-
|
121 |
-
# Define the colors for each label
|
122 |
-
|
123 |
-
if st.button('Analyze'):
|
124 |
-
if sentence:
|
125 |
-
highlight(sentence)
|
126 |
-
elif uploaded_file:
|
127 |
-
text=read_uploaded_file(uploaded_file)
|
128 |
-
text=preprocess(text)
|
129 |
-
highlight(text)
|
130 |
-
elif selected_options:
|
131 |
-
highlight(selected_options)
|
132 |
-
else:
|
133 |
-
st.write('Please enter a sentence or select an option from the dropdown or upload a file.')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|