edithram23 commited on
Commit
8ccde14
·
verified ·
1 Parent(s): df678f8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -4
app.py CHANGED
@@ -181,11 +181,14 @@ if uploaded_file is not None:
181
  text = pg.get_text()
182
  sentences = sentence_tokenize(text)
183
  for sent in sentences:
184
- # x = mask_generation(sent)
185
- # t5_words = list(set(sent.split(" ")).difference(set(x.split(" "))))
186
-
 
 
 
187
  entities,words_out = extract_entities(sent)
188
- # words_out+=t5_words
189
  bert_words = words_red_bert(sent)
190
  new=[]
191
  for w in words_out:
 
181
  text = pg.get_text()
182
  sentences = sentence_tokenize(text)
183
  for sent in sentences:
184
+ x = mask_generation(sent)
185
+ t5=[]
186
+ t5_words = list(set(sent.split("\n")))
187
+ for i in t5_words:
188
+ t5+=i.split(" ")
189
+ t5_words=set(t5).difference(set(x.split(" "))))
190
  entities,words_out = extract_entities(sent)
191
+ words_out+=t5_words
192
  bert_words = words_red_bert(sent)
193
  new=[]
194
  for w in words_out: