Update app.py
Browse files
app.py
CHANGED
@@ -52,7 +52,7 @@ import line_cor
|
|
52 |
import altair as alt
|
53 |
#pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
|
54 |
from PIL import Image
|
55 |
-
|
56 |
def read_pdf(file):
|
57 |
# images=pdf2image.convert_from_path(file)
|
58 |
# # print(type(images))
|
@@ -86,7 +86,6 @@ def read_pdf(file):
|
|
86 |
# all_page_text += text + " " #page.extractText()
|
87 |
# return all_page_text
|
88 |
st.title("NLP APPLICATION")
|
89 |
-
@st.experimental_singleton
|
90 |
#@st.cache_resource(experimental_allow_widgets=True)
|
91 |
def text_analyzer(my_text):
|
92 |
nlp = spacy.load('en_core_web_sm')
|
@@ -94,12 +93,13 @@ def text_analyzer(my_text):
|
|
94 |
# tokens = [ token.text for token in docx]
|
95 |
allData = [('"Token":{},\n"Lemma":{}'.format(token.text,token.lemma_))for token in docx ]
|
96 |
return allData
|
97 |
-
@st.experimental_singleton
|
98 |
#@st.cache_resource(experimental_allow_widgets=True)
|
99 |
def load_models():
|
100 |
tokenizer = AutoTokenizer.from_pretrained('gpt2-large')
|
101 |
model = GPT2LMHeadModel.from_pretrained('gpt2-large')
|
102 |
return tokenizer, model
|
|
|
|
|
103 |
# Function For Extracting Entities
|
104 |
@st.experimental_singleton
|
105 |
#@st.cache_resource(experimental_allow_widgets=True)
|
@@ -111,8 +111,6 @@ def entity_analyzer(my_text):
|
|
111 |
allData = ['"Token":{},\n"Entities":{}'.format(tokens,entities)]
|
112 |
return allData
|
113 |
def main():
|
114 |
-
tokenizer = AutoTokenizer.from_pretrained('t5-base')
|
115 |
-
model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True)
|
116 |
""" NLP Based Application with Streamlit """
|
117 |
st.markdown("""
|
118 |
#### Description
|
|
|
52 |
import altair as alt
|
53 |
#pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
|
54 |
from PIL import Image
|
55 |
+
|
56 |
def read_pdf(file):
|
57 |
# images=pdf2image.convert_from_path(file)
|
58 |
# # print(type(images))
|
|
|
86 |
# all_page_text += text + " " #page.extractText()
|
87 |
# return all_page_text
|
88 |
st.title("NLP APPLICATION")
|
|
|
89 |
#@st.cache_resource(experimental_allow_widgets=True)
|
90 |
def text_analyzer(my_text):
|
91 |
nlp = spacy.load('en_core_web_sm')
|
|
|
93 |
# tokens = [ token.text for token in docx]
|
94 |
allData = [('"Token":{},\n"Lemma":{}'.format(token.text,token.lemma_))for token in docx ]
|
95 |
return allData
|
|
|
96 |
#@st.cache_resource(experimental_allow_widgets=True)
|
97 |
def load_models():
|
98 |
tokenizer = AutoTokenizer.from_pretrained('gpt2-large')
|
99 |
model = GPT2LMHeadModel.from_pretrained('gpt2-large')
|
100 |
return tokenizer, model
|
101 |
+
tokenizer = AutoTokenizer.from_pretrained('t5-base')
|
102 |
+
model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True)
|
103 |
# Function For Extracting Entities
|
104 |
@st.experimental_singleton
|
105 |
#@st.cache_resource(experimental_allow_widgets=True)
|
|
|
111 |
allData = ['"Token":{},\n"Entities":{}'.format(tokens,entities)]
|
112 |
return allData
|
113 |
def main():
|
|
|
|
|
114 |
""" NLP Based Application with Streamlit """
|
115 |
st.markdown("""
|
116 |
#### Description
|