edithram23 commited on
Commit
eb1cea3
·
verified ·
1 Parent(s): 39f0789

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -21
app.py CHANGED
@@ -182,27 +182,28 @@ if uploaded_file is not None:
182
  sentences = sentence_tokenize(text)
183
  for sent in sentences:
184
  x = mask_generation(sent)
185
- sent_out = re.sub("\n","|",sent)
186
- sent_out = re.sub(".","|",sent_out)
187
- sent_out = re.sub(",","|",sent_out)
188
- sent_out = re.sub(" ","|",sent_out)
189
- sent_n_q_c=sent_out.split("|")
190
- # sent_n_q_c=[]
191
- # sent_n = list(set(sent.lower().replace('.',' ').split("\n")))
192
- # for i in sent_n:
193
- # for j in i.split(" "):
194
- # sent_n_q_c+=j.split(',')
195
- # x_q = x.lower().replace('.',' ').split(' ')
196
- # e=[]
197
- # for i in x_q:
198
- # e+=i.split(',')
199
- sent_out = re.sub("\n","|",x)
200
- sent_out = re.sub(".","|",sent_out)
201
- sent_out = re.sub(",","|",sent_out)
202
- sent_out = re.sub(" ","|",sent_out)
203
- e = sent_out.split("|")
204
- t5_words=set(sent_n_q_c).difference(set(e))
205
  entities,words_out = extract_entities(sent)
 
206
  # print("\nwords_out:",words_out)
207
  # print("\nT5",t5_words)
208
  # print("X:",x,"\nsent:",sent,"\nx_q:",x_q,"\nsent_n:",sent_n,"\ne:",e,"\nsent_n_q_c:",sent_n_q_c,'\nt5_words',t5_words)
@@ -211,7 +212,7 @@ if uploaded_file is not None:
211
  new=[]
212
  for w in words_out:
213
  new+=w.split('\n')
214
- words_out+=t5_words
215
  new+=bert_words
216
  words_out = [i for i in new if len(i)>3]
217
  # print("\nfinal:",words_out)
 
182
  sentences = sentence_tokenize(text)
183
  for sent in sentences:
184
  x = mask_generation(sent)
185
+ # sent_out = re.sub("\n","|",sent)
186
+ # sent_out = re.sub(".","|",sent_out)
187
+ # sent_out = re.sub(",","|",sent_out)
188
+ # sent_out = re.sub(" ","|",sent_out)
189
+ # sent_n_q_c=sent_out.split("|")
190
+ sent_n_q_c=[]
191
+ sent_n = list(set(sent.lower().replace('.',' ').split("\n")))
192
+ for i in sent_n:
193
+ for j in i.split(" "):
194
+ sent_n_q_c+=j.split(',')
195
+ x_q = x.lower().replace('.',' ').split(' ')
196
+ e=[]
197
+ for i in x_q:
198
+ e+=i.split(',')
199
+ # sent_out = re.sub("\n","|",x)
200
+ # sent_out = re.sub(".","|",sent_out)
201
+ # sent_out = re.sub(",","|",sent_out)
202
+ # sent_out = re.sub(" ","|",sent_out)
203
+ # e = sent_out.split("|")
204
+ t5_words=list(set(sent_n_q_c).difference(set(e)))
205
  entities,words_out = extract_entities(sent)
206
+ words_out+=t5_words
207
  # print("\nwords_out:",words_out)
208
  # print("\nT5",t5_words)
209
  # print("X:",x,"\nsent:",sent,"\nx_q:",x_q,"\nsent_n:",sent_n,"\ne:",e,"\nsent_n_q_c:",sent_n_q_c,'\nt5_words',t5_words)
 
212
  new=[]
213
  for w in words_out:
214
  new+=w.split('\n')
215
+ # words_out+=t5_words
216
  new+=bert_words
217
  words_out = [i for i in new if len(i)>3]
218
  # print("\nfinal:",words_out)