aemin commited on
Commit
f3522da
·
1 Parent(s): ed23144

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -1
app.py CHANGED
@@ -61,7 +61,7 @@ st.sidebar.markdown(logo_html, unsafe_allow_html=True)
61
 
62
 
63
  #sidebar info
64
- model_name= ["nerdl_fewnerd_100d", "ner_conll_elmo", "ner_mit_movie_complex_distilbert_base_cased", "ner_conll_albert_large_uncased"]
65
  st.sidebar.title("Pretrained model to test")
66
  selected_model = st.sidebar.selectbox("", model_name)
67
 
@@ -95,6 +95,14 @@ elif selected_model=="ner_conll_albert_large_uncased":
95
  st.title(app_title)
96
  st.markdown("<h2>"+app_description+"</h2>" , unsafe_allow_html=True)
97
  st.markdown("**`PER`** **,** **`LOC`** **,** **`ORG`** **,** **`MISC` **", unsafe_allow_html=True)
 
 
 
 
 
 
 
 
98
 
99
  st.subheader("")
100
 
@@ -213,6 +221,28 @@ def get_pipeline(text):
213
  ner_converter
214
  ])
215
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
 
217
 
218
  empty_df = spark.createDataFrame([[""]]).toDF("text")
@@ -234,6 +264,9 @@ elif selected_model=="ner_mit_movie_complex_distilbert_base_cased":
234
  elif selected_model=="ner_conll_elmo":
235
  text= st.text_input("Type here your text and press enter to run: ", value="Tottenham Hotspur Football Club, commonly referred to as Tottenham or Spurs, is an English professional football club based in Tottenham, London, that competes in the Premier League, the top flight of English football.")
236
 
 
 
 
237
  else:
238
  text= st.text_input("Type here your text and press enter to run:", value="12 Corazones ('12 Hearts') is Spanish-language dating game show produced in the United States for the television network Telemundo since January 2005, based on its namesake Argentine TV show format. The show is filmed in Los Angeles and revolves around the twelve Zodiac signs that identify each contestant. In 2008, Ho filmed a cameo in the Steven Spielberg feature film The Cloverfield Paradox, as a news pundit.")
239
 
 
61
 
62
 
63
  #sidebar info
64
+ model_name= ["nerdl_fewnerd_100d", "ner_conll_elmo", "ner_mit_movie_complex_distilbert_base_cased", "ner_conll_albert_large_uncased", "onto_100"]
65
  st.sidebar.title("Pretrained model to test")
66
  selected_model = st.sidebar.selectbox("", model_name)
67
 
 
95
  st.title(app_title)
96
  st.markdown("<h2>"+app_description+"</h2>" , unsafe_allow_html=True)
97
  st.markdown("**`PER`** **,** **`LOC`** **,** **`ORG`** **,** **`MISC` **", unsafe_allow_html=True)
98
+
99
+ elif selected_model=="onto_100":
100
+ app_title= "Detect up to 18 entity types in general domain texts"
101
+ app_description= "Named Entity Recognition model aimed to detect up to 18 entity types from general domain texts. This model was trained with GloVe 100d word embeddings using Spark NLP, so be sure to use same embeddings in the pipeline. It is available in Spark NLP Models hub (https://nlp.johnsnowlabs.com/models)"
102
+ st.title(app_title)
103
+ st.markdown("<h2>"+app_description+"</h2>" , unsafe_allow_html=True)
104
+ st.markdown("""**`CARDINAL`** **,** **`EVENT`** **,** **`WORK_OF_ART`** **,** **`ORG`** **,** **`DATE`** **,** **`GPE`** **,** **`PERSON`** **,** **`PRODUCT`**,
105
+ **`NORP`** **,** **`ORDINAL`** **,** **`MONEY`** **,** **`LOC` **, **`FAC`** **,** **`LAW`** **,** **`TIME`** **,** **`PERCENT`** **,** **`QUANTITY`** **,** **`LANGUAGE` **""", unsafe_allow_html=True)
106
 
107
  st.subheader("")
108
 
 
221
  ner_converter
222
  ])
223
 
224
+ elif selected_model=="onto_100":
225
+ embeddings = WordEmbeddingsModel.pretrained('glove_100d') \
226
+ .setInputCols(['document', 'token']) \
227
+ .setOutputCol('embeddings')
228
+
229
+ ner = NerDLModel.pretrained("onto_100", "en") \
230
+ .setInputCols(["document", "token", "embeddings"]) \
231
+ .setOutputCol("ner")
232
+
233
+ ner_converter = NerConverter()\
234
+ .setInputCols(["document","token","ner"])\
235
+ .setOutputCol("ner_chunk")
236
+
237
+ pipeline = Pipeline(
238
+ stages = [
239
+ documentAssembler,
240
+ sentenceDetector,
241
+ tokenizer,
242
+ embeddings,
243
+ ner,
244
+ ner_converter
245
+ ])
246
 
247
 
248
  empty_df = spark.createDataFrame([[""]]).toDF("text")
 
264
  elif selected_model=="ner_conll_elmo":
265
  text= st.text_input("Type here your text and press enter to run: ", value="Tottenham Hotspur Football Club, commonly referred to as Tottenham or Spurs, is an English professional football club based in Tottenham, London, that competes in the Premier League, the top flight of English football.")
266
 
267
+ elif selected_model=="onto_100":
268
+ text= st.text_input("Type here your text and press enter to run: ", value="William Henry Gates III (born October 28, 1955) is an American business magnate, software developer, investor, and philanthropist. He is best known as the co-founder of Microsoft Corporation. During his career at Microsoft, Gates held the positions of chairman, chief executive officer (CEO), president and chief software architect, while also being the largest individual shareholder until May 2014. He is one of the best-known entrepreneurs and pioneers of the microcomputer revolution of the 1970s and 1980s. Born and raised in Seattle, Washington, Gates co-founded Microsoft with childhood friend Paul Allen in 1975, in Albuquerque, New Mexico; it went on to become the world's largest personal computer software company. Gates led the company as chairman and CEO until stepping down as CEO in January 2000, but he remained chairman and became chief software architect.")
269
+
270
  else:
271
  text= st.text_input("Type here your text and press enter to run:", value="12 Corazones ('12 Hearts') is Spanish-language dating game show produced in the United States for the television network Telemundo since January 2005, based on its namesake Argentine TV show format. The show is filmed in Los Angeles and revolves around the twelve Zodiac signs that identify each contestant. In 2008, Ho filmed a cameo in the Steven Spielberg feature film The Cloverfield Paradox, as a news pundit.")
272