Soumen commited on
Commit
3d7adba
·
1 Parent(s): 6c1c515

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -58
app.py CHANGED
@@ -1,20 +1,10 @@
1
  """
2
  #App: NLP App with Streamlit
3
- Credits: Streamlit Team, Marc Skov Madsen(For Awesome-streamlit gallery)
4
  Description
5
  This is a Natural Language Processing(NLP) base Application that is useful for basic NLP tasks such as follows;
6
 
7
- + Tokenization(POS tagging) & Lemmatization(root mean) using Spacy
8
 
9
- + Named Entity Recognition(NER)/Trigger word detection using SpaCy
10
-
11
- + Sentiment Analysis using TextBlob
12
-
13
- + Document/Text Summarization using Gensim/T5 both for Bangla Extractive and English Abstructive.
14
-
15
- This is built with Streamlit Framework, an awesome framework for building ML and NLP tools.
16
- Purpose
17
- To perform basic and useful NLP tasks with Streamlit, Spacy, Textblob, and Gensim
18
  """
19
  # Core Pkgs
20
  import os
@@ -28,9 +18,8 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
28
  #os.system('pip install -q pytesseract')
29
  #os.system('conda install -c conda-forge poppler')
30
  import streamlit as st
31
- st.set_page_config(page_title="Anomaly_Detection_Tool", layout="wide", initial_sidebar_state="expanded")
32
  import torch
33
- #from transformers import AutoTokenizer, AutoModelWithLMHead, GPT2LMHeadModel
34
  import docx2txt
35
  from PIL import Image
36
  from PyPDF2 import PdfFileReader
@@ -40,10 +29,10 @@ import pdfplumber
40
  import pdf2image
41
 
42
 
43
- # NLP Pkgs
44
- from textblob import TextBlob
45
- import spacy
46
- #from gensim.summarization import summarize
47
  import requests
48
  import cv2
49
  import numpy as np
@@ -61,9 +50,6 @@ def query(payload):
61
  response = requests.post(API_URL, headers=headers, json=payload)
62
  return response.json()
63
 
64
- output = query({
65
- "inputs": "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
66
- })
67
  def read_pdf(file):
68
  # images=pdf2image.convert_from_path(file)
69
  # # print(type(images))
@@ -98,29 +84,6 @@ def read_pdf(file):
98
  # return all_page_text
99
  st.title("NLP APPLICATION")
100
  #@st.cache_resource(experimental_allow_widgets=True)
101
- @st.experimental_singleton
102
- def text_analyzer(my_text):
103
- nlp = spacy.load('en_core_web_sm')
104
- docx = nlp(my_text)
105
- # tokens = [ token.text for token in docx]
106
- allData = [('"Token":{},\n"Lemma":{}'.format(token.text,token.lemma_))for token in docx ]
107
- return allData
108
- #@st.cache_resource(experimental_allow_widgets=True)
109
- # def load_models():
110
- # tokenizer = AutoTokenizer.from_pretrained('gpt2-large')
111
- # model = GPT2LMHeadModel.from_pretrained('gpt2-large')
112
- # return tokenizer, model
113
-
114
- # Function For Extracting Entities
115
- @st.experimental_singleton
116
- #@st.cache_resource(experimental_allow_widgets=True)
117
- def entity_analyzer(my_text):
118
- nlp = spacy.load('en_core_web_sm')
119
- docx = nlp(my_text)
120
- tokens = [ token.text for token in docx]
121
- entities = [(entity.text,entity.label_)for entity in docx.ents]
122
- allData = ['"Token":{},\n"Entities":{}'.format(tokens,entities)]
123
- return allData
124
  def main():
125
  #global tokenizer, model
126
  #tokenizer = AutoTokenizer.from_pretrained('t5-base')
@@ -129,14 +92,11 @@ def main():
129
  st.markdown("""
130
  #### Description
131
  ##This is a Natural Language Processing(NLP) base Application that is useful for basic NLP tasks such as follows:
132
- + Tokenization(POS tagging) & Lemmatization(root mean) using Spacy
133
- + Named Entity Recognition(NER)/Trigger word detection using SpaCy
134
- + Sentiment Analysis using TextBlob
135
- + Document/Text Summarization using T5 for English Abstractive.
136
  """)
137
  def change_photo_state():
138
  st.session_state["photo"]="done"
139
- st.subheader("Please, feed your image/text, features/services will appear automatically!")
140
  message = st.text_input("Type your text here!")
141
  uploaded_photo = st.file_uploader("Upload your PDF",type=['jpg','png','jpeg','pdf'], on_change=change_photo_state)
142
  camera_photo = st.camera_input("Take a photo, Containing English texts", on_change=change_photo_state)
@@ -181,16 +141,7 @@ def main():
181
  #our_image=load_image("image.jpg")
182
  #img = cv2.imread("scholarly_text.jpg")
183
  text = message
184
- if st.checkbox("Show Named Entities English/Bangla"):
185
- entity_result = entity_analyzer(text)
186
- st.json(entity_result)
187
-
188
- if st.checkbox("Show Sentiment Analysis for English"):
189
- blob = TextBlob(text)
190
- result_sentiment = blob.sentiment
191
- st.success(result_sentiment)
192
- if st.checkbox("Spell Corrections for English"):
193
- st.success(TextBlob(text).correct())
194
  if st.checkbox("Text Generation"):
195
  def query(payload):
196
  response = requests.post(API_URL, headers=headers, json=payload)
 
1
  """
2
  #App: NLP App with Streamlit
 
3
  Description
4
  This is a Natural Language Processing(NLP) base Application that is useful for basic NLP tasks such as follows;
5
 
6
+ + Document/Text Summarization from Bangla and English Images and PDF files.
7
 
 
 
 
 
 
 
 
 
 
8
  """
9
  # Core Pkgs
10
  import os
 
18
  #os.system('pip install -q pytesseract')
19
  #os.system('conda install -c conda-forge poppler')
20
  import streamlit as st
21
+ st.set_page_config(page_title="Summarization Tool", layout="wide", initial_sidebar_state="expanded")
22
  import torch
 
23
  import docx2txt
24
  from PIL import Image
25
  from PyPDF2 import PdfFileReader
 
29
  import pdf2image
30
 
31
 
32
+ # # NLP Pkgs
33
+ # from textblob import TextBlob
34
+ # import spacy
35
+ # #from gensim.summarization import summarize
36
  import requests
37
  import cv2
38
  import numpy as np
 
50
  response = requests.post(API_URL, headers=headers, json=payload)
51
  return response.json()
52
 
 
 
 
53
  def read_pdf(file):
54
  # images=pdf2image.convert_from_path(file)
55
  # # print(type(images))
 
84
  # return all_page_text
85
  st.title("NLP APPLICATION")
86
  #@st.cache_resource(experimental_allow_widgets=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  def main():
88
  #global tokenizer, model
89
  #tokenizer = AutoTokenizer.from_pretrained('t5-base')
 
92
  st.markdown("""
93
  #### Description
94
  ##This is a Natural Language Processing(NLP) base Application that is useful for basic NLP tasks such as follows:
95
+ PDF Document/Image's Text Summarization.
 
 
 
96
  """)
97
  def change_photo_state():
98
  st.session_state["photo"]="done"
99
+ st.subheader("Please, feed your pdf/images/text, features/services will appear automatically!")
100
  message = st.text_input("Type your text here!")
101
  uploaded_photo = st.file_uploader("Upload your PDF",type=['jpg','png','jpeg','pdf'], on_change=change_photo_state)
102
  camera_photo = st.camera_input("Take a photo, Containing English texts", on_change=change_photo_state)
 
141
  #our_image=load_image("image.jpg")
142
  #img = cv2.imread("scholarly_text.jpg")
143
  text = message
144
+
 
 
 
 
 
 
 
 
 
145
  if st.checkbox("Text Generation"):
146
  def query(payload):
147
  response = requests.post(API_URL, headers=headers, json=payload)