abdullahmubeen10 commited on
Commit
2e62a3e
Β·
verified Β·
1 Parent(s): e0b1416

Update Demo.py

Browse files
Files changed (1) hide show
  1. Demo.py +15 -24
Demo.py CHANGED
@@ -42,31 +42,27 @@ def init_spark():
42
 
43
  @st.cache_resource
44
  def create_pipeline(model):
45
- document_assembler = DocumentAssembler() \
46
- .setInputCol("text") \
47
- .setOutputCol("document")
48
 
49
- sentence_detector = SentenceDetector() \
50
- .setInputCols(["document"]) \
51
- .setOutputCol("sentence")
52
 
53
- word_segmenter = WordSegmenterModel.pretrained("wordseg_large", "zh") \
54
  .setInputCols(["sentence"]) \
55
  .setOutputCol("token")
56
 
57
- embeddings = BertEmbeddings.pretrained(name='bert_base_chinese', lang='zh') \
58
- .setInputCols(["document", "token"]) \
59
- .setOutputCol("embeddings")
60
 
61
- ner = NerDLModel.pretrained(model, "zh") \
62
- .setInputCols(["document", "token", "embeddings"]) \
63
- .setOutputCol("ner")
64
 
65
- ner_converter = NerConverter() \
66
- .setInputCols(["sentence", "token", "ner"]) \
67
- .setOutputCol("entities")
68
-
69
- pipeline = Pipeline(stages=[document_assembler, sentence_detector, word_segmenter, embeddings, ner, ner_converter])
70
  return pipeline
71
 
72
  def fit_data(pipeline, data):
@@ -91,16 +87,11 @@ def annotate(data):
91
 
92
  # Set up the page layout
93
  st.markdown('<div class="main-title">Recognize entities in Chinese text</div>', unsafe_allow_html=True)
94
- st.markdown("""
95
- <div class="section">
96
- <p>This demo utilizes embeddings-based NER model for Urdu texts, using the urduvec_140M_300d word embeddings</p>
97
- </div>
98
- """, unsafe_allow_html=True)
99
 
100
  # Sidebar content
101
  model = st.sidebar.selectbox(
102
  "Choose the pretrained model",
103
- ["ner_msra_bert_768d", "ner_weibo_bert_768d"],
104
  help="For more info about the models visit: https://sparknlp.org/models"
105
  )
106
 
 
42
 
43
  @st.cache_resource
44
  def create_pipeline(model):
45
+ documentAssembler = DocumentAssembler()\
46
+ .setInputCol("text")\
47
+ .setOutputCol("document")
48
 
49
+ sentenceDetector = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx")\
50
+ .setInputCols(["document"])\
51
+ .setOutputCol("sentence")
52
 
53
+ tokenizer = WordSegmenterModel.pretrained("wordseg_large", "zh") \
54
  .setInputCols(["sentence"]) \
55
  .setOutputCol("token")
56
 
57
+ tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_large_token_classifier_hrl", "xx")\
58
+ .setInputCols(["sentence",'token'])\
59
+ .setOutputCol("ner")
60
 
61
+ ner_converter = NerConverter()\
62
+ .setInputCols(["sentence", "token", "ner"])\
63
+ .setOutputCol("ner_chunk")
64
 
65
+ nlpPipeline = Pipeline(stages=[documentAssembler, sentenceDetector, tokenizer, tokenClassifier, ner_converter])
 
 
 
 
66
  return pipeline
67
 
68
  def fit_data(pipeline, data):
 
87
 
88
  # Set up the page layout
89
  st.markdown('<div class="main-title">Recognize entities in Chinese text</div>', unsafe_allow_html=True)
 
 
 
 
 
90
 
91
  # Sidebar content
92
  model = st.sidebar.selectbox(
93
  "Choose the pretrained model",
94
+ ["xlm_roberta_large_token_classifier_hrl"],
95
  help="For more info about the models visit: https://sparknlp.org/models"
96
  )
97