Update app.py
Browse files
app.py
CHANGED
@@ -1,20 +1,10 @@
|
|
1 |
"""
|
2 |
#App: NLP App with Streamlit
|
3 |
-
Credits: Streamlit Team, Marc Skov Madsen(For Awesome-streamlit gallery)
|
4 |
Description
|
5 |
This is a Natural Language Processing(NLP) base Application that is useful for basic NLP tasks such as follows;
|
6 |
|
7 |
-
+
|
8 |
|
9 |
-
+ Named Entity Recognition(NER)/Trigger word detection using SpaCy
|
10 |
-
|
11 |
-
+ Sentiment Analysis using TextBlob
|
12 |
-
|
13 |
-
+ Document/Text Summarization using Gensim/T5 both for Bangla Extractive and English Abstructive.
|
14 |
-
|
15 |
-
This is built with Streamlit Framework, an awesome framework for building ML and NLP tools.
|
16 |
-
Purpose
|
17 |
-
To perform basic and useful NLP tasks with Streamlit, Spacy, Textblob, and Gensim
|
18 |
"""
|
19 |
# Core Pkgs
|
20 |
import os
|
@@ -28,9 +18,8 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
28 |
#os.system('pip install -q pytesseract')
|
29 |
#os.system('conda install -c conda-forge poppler')
|
30 |
import streamlit as st
|
31 |
-
st.set_page_config(page_title="
|
32 |
import torch
|
33 |
-
#from transformers import AutoTokenizer, AutoModelWithLMHead, GPT2LMHeadModel
|
34 |
import docx2txt
|
35 |
from PIL import Image
|
36 |
from PyPDF2 import PdfFileReader
|
@@ -40,10 +29,10 @@ import pdfplumber
|
|
40 |
import pdf2image
|
41 |
|
42 |
|
43 |
-
# NLP Pkgs
|
44 |
-
from textblob import TextBlob
|
45 |
-
import spacy
|
46 |
-
#from gensim.summarization import summarize
|
47 |
import requests
|
48 |
import cv2
|
49 |
import numpy as np
|
@@ -61,9 +50,6 @@ def query(payload):
|
|
61 |
response = requests.post(API_URL, headers=headers, json=payload)
|
62 |
return response.json()
|
63 |
|
64 |
-
output = query({
|
65 |
-
"inputs": "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
|
66 |
-
})
|
67 |
def read_pdf(file):
|
68 |
# images=pdf2image.convert_from_path(file)
|
69 |
# # print(type(images))
|
@@ -98,29 +84,6 @@ def read_pdf(file):
|
|
98 |
# return all_page_text
|
99 |
st.title("NLP APPLICATION")
|
100 |
#@st.cache_resource(experimental_allow_widgets=True)
|
101 |
-
@st.experimental_singleton
|
102 |
-
def text_analyzer(my_text):
|
103 |
-
nlp = spacy.load('en_core_web_sm')
|
104 |
-
docx = nlp(my_text)
|
105 |
-
# tokens = [ token.text for token in docx]
|
106 |
-
allData = [('"Token":{},\n"Lemma":{}'.format(token.text,token.lemma_))for token in docx ]
|
107 |
-
return allData
|
108 |
-
#@st.cache_resource(experimental_allow_widgets=True)
|
109 |
-
# def load_models():
|
110 |
-
# tokenizer = AutoTokenizer.from_pretrained('gpt2-large')
|
111 |
-
# model = GPT2LMHeadModel.from_pretrained('gpt2-large')
|
112 |
-
# return tokenizer, model
|
113 |
-
|
114 |
-
# Function For Extracting Entities
|
115 |
-
@st.experimental_singleton
|
116 |
-
#@st.cache_resource(experimental_allow_widgets=True)
|
117 |
-
def entity_analyzer(my_text):
|
118 |
-
nlp = spacy.load('en_core_web_sm')
|
119 |
-
docx = nlp(my_text)
|
120 |
-
tokens = [ token.text for token in docx]
|
121 |
-
entities = [(entity.text,entity.label_)for entity in docx.ents]
|
122 |
-
allData = ['"Token":{},\n"Entities":{}'.format(tokens,entities)]
|
123 |
-
return allData
|
124 |
def main():
|
125 |
#global tokenizer, model
|
126 |
#tokenizer = AutoTokenizer.from_pretrained('t5-base')
|
@@ -129,14 +92,11 @@ def main():
|
|
129 |
st.markdown("""
|
130 |
#### Description
|
131 |
##This is a Natural Language Processing(NLP) base Application that is useful for basic NLP tasks such as follows:
|
132 |
-
|
133 |
-
+ Named Entity Recognition(NER)/Trigger word detection using SpaCy
|
134 |
-
+ Sentiment Analysis using TextBlob
|
135 |
-
+ Document/Text Summarization using T5 for English Abstractive.
|
136 |
""")
|
137 |
def change_photo_state():
|
138 |
st.session_state["photo"]="done"
|
139 |
-
st.subheader("Please, feed your
|
140 |
message = st.text_input("Type your text here!")
|
141 |
uploaded_photo = st.file_uploader("Upload your PDF",type=['jpg','png','jpeg','pdf'], on_change=change_photo_state)
|
142 |
camera_photo = st.camera_input("Take a photo, Containing English texts", on_change=change_photo_state)
|
@@ -181,16 +141,7 @@ def main():
|
|
181 |
#our_image=load_image("image.jpg")
|
182 |
#img = cv2.imread("scholarly_text.jpg")
|
183 |
text = message
|
184 |
-
|
185 |
-
entity_result = entity_analyzer(text)
|
186 |
-
st.json(entity_result)
|
187 |
-
|
188 |
-
if st.checkbox("Show Sentiment Analysis for English"):
|
189 |
-
blob = TextBlob(text)
|
190 |
-
result_sentiment = blob.sentiment
|
191 |
-
st.success(result_sentiment)
|
192 |
-
if st.checkbox("Spell Corrections for English"):
|
193 |
-
st.success(TextBlob(text).correct())
|
194 |
if st.checkbox("Text Generation"):
|
195 |
def query(payload):
|
196 |
response = requests.post(API_URL, headers=headers, json=payload)
|
|
|
1 |
"""
|
2 |
#App: NLP App with Streamlit
|
|
|
3 |
Description
|
4 |
This is a Natural Language Processing(NLP) base Application that is useful for basic NLP tasks such as follows;
|
5 |
|
6 |
+
+ Document/Text Summarization from Bangla and English Images and PDF files.
|
7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
"""
|
9 |
# Core Pkgs
|
10 |
import os
|
|
|
18 |
#os.system('pip install -q pytesseract')
|
19 |
#os.system('conda install -c conda-forge poppler')
|
20 |
import streamlit as st
|
21 |
+
st.set_page_config(page_title="Summarization Tool", layout="wide", initial_sidebar_state="expanded")
|
22 |
import torch
|
|
|
23 |
import docx2txt
|
24 |
from PIL import Image
|
25 |
from PyPDF2 import PdfFileReader
|
|
|
29 |
import pdf2image
|
30 |
|
31 |
|
32 |
+
# # NLP Pkgs
|
33 |
+
# from textblob import TextBlob
|
34 |
+
# import spacy
|
35 |
+
# #from gensim.summarization import summarize
|
36 |
import requests
|
37 |
import cv2
|
38 |
import numpy as np
|
|
|
50 |
response = requests.post(API_URL, headers=headers, json=payload)
|
51 |
return response.json()
|
52 |
|
|
|
|
|
|
|
53 |
def read_pdf(file):
|
54 |
# images=pdf2image.convert_from_path(file)
|
55 |
# # print(type(images))
|
|
|
84 |
# return all_page_text
|
85 |
st.title("NLP APPLICATION")
|
86 |
#@st.cache_resource(experimental_allow_widgets=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
def main():
|
88 |
#global tokenizer, model
|
89 |
#tokenizer = AutoTokenizer.from_pretrained('t5-base')
|
|
|
92 |
st.markdown("""
|
93 |
#### Description
|
94 |
##This is a Natural Language Processing(NLP) base Application that is useful for basic NLP tasks such as follows:
|
95 |
+
PDF Document/Image's Text Summarization.
|
|
|
|
|
|
|
96 |
""")
|
97 |
def change_photo_state():
|
98 |
st.session_state["photo"]="done"
|
99 |
+
st.subheader("Please, feed your pdf/images/text, features/services will appear automatically!")
|
100 |
message = st.text_input("Type your text here!")
|
101 |
uploaded_photo = st.file_uploader("Upload your PDF",type=['jpg','png','jpeg','pdf'], on_change=change_photo_state)
|
102 |
camera_photo = st.camera_input("Take a photo, Containing English texts", on_change=change_photo_state)
|
|
|
141 |
#our_image=load_image("image.jpg")
|
142 |
#img = cv2.imread("scholarly_text.jpg")
|
143 |
text = message
|
144 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
if st.checkbox("Text Generation"):
|
146 |
def query(payload):
|
147 |
response = requests.post(API_URL, headers=headers, json=payload)
|