Spaces:

ANLPRL
/

NER_On_Oral_Medicine

Sleeping

App Files Files Community

ANLPRL commited on Apr 22, 2023

Commit

01c4e0e

1 Parent(s): 69f7505

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -0

app.py CHANGED Viewed

@@ -6,6 +6,9 @@ from transformers import AutoTokenizer,AutoModel
 import torch
 import tensorflow as tf
 from keras.models import load_model
 def predict(new_data):
@@ -46,6 +49,44 @@ def highlight(sentence):
         else:
             highlighted_text += f'{words} '
     st.markdown(highlighted_text, unsafe_allow_html=True)
 #Load the trained model
 with open("biobert_rf.pkl", 'rb') as f:
@@ -59,6 +100,13 @@ st.subheader('Named Entity Recoginition System For Oral Medicine ')
 sentence = st.text_area('Enter a sentence:')
 st.write("OR")
 selected_options = st.selectbox(
 'Choose a text from dropdown: ',
 (" ",
@@ -71,6 +119,10 @@ selected_options = st.selectbox(
 if st.button('Analyze'):
     if sentence:
         highlight(sentence)
     elif selected_options:
         highlight(selected_options)
     else:

 import torch
 import tensorflow as tf
 from keras.models import load_model
+import re
+import io
+import PyPDF2
 def predict(new_data):
         else:
             highlighted_text += f'{words} '
     st.markdown(highlighted_text, unsafe_allow_html=True)
+def read_uploaded_file(uploaded_file):
+    content = None
+    if uploaded_file is not None:
+        content_type = uploaded_file.type
+        if content_type == 'application/pdf':
+            content = read_pdf_file(uploaded_file)
+        elif content_type == 'text/plain':
+            content = read_text_file(uploaded_file)
+    return content
+def read_pdf_file(uploaded_file):
+    with io.BytesIO(uploaded_file.read()) as f:
+        pdf_reader = PyPDF2.PdfReader(f)
+        text = ''
+        for page_num in range(len(pdf_reader.pages)):
+            page = pdf_reader.pages[page_num]
+            text += page.extract_text()
+    return text
+def read_text_file(uploaded_file):
+    with io.StringIO(uploaded_file.read().decode()) as f:
+        text = f.read()
+    return text
+def preprocess(text):
+    # Define a regular expression pattern for URLs, non-alphabetic characters, and user names
+    pattern = re.compile(r'https?://\S+|[^0-9A-Za-z t]|@\w+')
+    # Use the regular expression to find all URLs, non-alphabetic characters, and user names in the text
+    matches = pattern.findall(text)
+    #Replace the URLs, non-alphabetic characters, and user names with an empty string
+    for match in matches:
+        text = text.replace(match, ' ')
+    return text
 #Load the trained model
 with open("biobert_rf.pkl", 'rb') as f:
 sentence = st.text_area('Enter a sentence:')
 st.write("OR")
+uploaded_file = st.file_uploader("Upload a file")
+if uploaded_file is not None:
+    # Do something with the file
+    st.write("File uploaded!")
+st.write("OR")
 selected_options = st.selectbox(
 'Choose a text from dropdown: ',
 (" ",
 if st.button('Analyze'):
     if sentence:
         highlight(sentence)
+    elif uploaded_file:
+        text=read_uploaded_file(uploaded_file)
+        text=preprocess(text)
+        highlight(text)
     elif selected_options:
         highlight(selected_options)
     else: