ANLPRL commited on
Commit
01c4e0e
1 Parent(s): 69f7505

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -0
app.py CHANGED
@@ -6,6 +6,9 @@ from transformers import AutoTokenizer,AutoModel
6
  import torch
7
  import tensorflow as tf
8
  from keras.models import load_model
 
 
 
9
 
10
 
11
  def predict(new_data):
@@ -46,6 +49,44 @@ def highlight(sentence):
46
  else:
47
  highlighted_text += f'{words} '
48
  st.markdown(highlighted_text, unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  #Load the trained model
51
  with open("biobert_rf.pkl", 'rb') as f:
@@ -59,6 +100,13 @@ st.subheader('Named Entity Recoginition System For Oral Medicine ')
59
  sentence = st.text_area('Enter a sentence:')
60
 
61
  st.write("OR")
 
 
 
 
 
 
 
62
  selected_options = st.selectbox(
63
  'Choose a text from dropdown: ',
64
  (" ",
@@ -71,6 +119,10 @@ selected_options = st.selectbox(
71
  if st.button('Analyze'):
72
  if sentence:
73
  highlight(sentence)
 
 
 
 
74
  elif selected_options:
75
  highlight(selected_options)
76
  else:
 
6
  import torch
7
  import tensorflow as tf
8
  from keras.models import load_model
9
+ import re
10
+ import io
11
+ import PyPDF2
12
 
13
 
14
  def predict(new_data):
 
49
  else:
50
  highlighted_text += f'{words} '
51
  st.markdown(highlighted_text, unsafe_allow_html=True)
52
+
53
+
54
+
55
+ def read_uploaded_file(uploaded_file):
56
+ content = None
57
+ if uploaded_file is not None:
58
+ content_type = uploaded_file.type
59
+ if content_type == 'application/pdf':
60
+ content = read_pdf_file(uploaded_file)
61
+ elif content_type == 'text/plain':
62
+ content = read_text_file(uploaded_file)
63
+ return content
64
+
65
+ def read_pdf_file(uploaded_file):
66
+ with io.BytesIO(uploaded_file.read()) as f:
67
+ pdf_reader = PyPDF2.PdfReader(f)
68
+ text = ''
69
+ for page_num in range(len(pdf_reader.pages)):
70
+ page = pdf_reader.pages[page_num]
71
+ text += page.extract_text()
72
+ return text
73
+
74
+ def read_text_file(uploaded_file):
75
+ with io.StringIO(uploaded_file.read().decode()) as f:
76
+ text = f.read()
77
+ return text
78
+
79
+
80
+ def preprocess(text):
81
+ # Define a regular expression pattern for URLs, non-alphabetic characters, and user names
82
+ pattern = re.compile(r'https?://\S+|[^0-9A-Za-z t]|@\w+')
83
+ # Use the regular expression to find all URLs, non-alphabetic characters, and user names in the text
84
+ matches = pattern.findall(text)
85
+ #Replace the URLs, non-alphabetic characters, and user names with an empty string
86
+ for match in matches:
87
+ text = text.replace(match, ' ')
88
+ return text
89
+
90
 
91
  #Load the trained model
92
  with open("biobert_rf.pkl", 'rb') as f:
 
100
  sentence = st.text_area('Enter a sentence:')
101
 
102
  st.write("OR")
103
+ uploaded_file = st.file_uploader("Upload a file")
104
+
105
+ if uploaded_file is not None:
106
+ # Do something with the file
107
+ st.write("File uploaded!")
108
+
109
+ st.write("OR")
110
  selected_options = st.selectbox(
111
  'Choose a text from dropdown: ',
112
  (" ",
 
119
  if st.button('Analyze'):
120
  if sentence:
121
  highlight(sentence)
122
+ elif uploaded_file:
123
+ text=read_uploaded_file(uploaded_file)
124
+ text=preprocess(text)
125
+ highlight(text)
126
  elif selected_options:
127
  highlight(selected_options)
128
  else: