Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -71,6 +71,16 @@ from tqdm.autonotebook import tqdm
|
|
71 |
import numpy as np
|
72 |
import re
|
73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
|
75 |
# We lower case our text and remove stop-words from indexing
|
76 |
def bm25_tokenizer(text):
|
@@ -88,6 +98,9 @@ for passage in tqdm(passages):
|
|
88 |
|
89 |
bm25 = BM25Okapi(tokenized_corpus)
|
90 |
|
|
|
|
|
|
|
91 |
# This function will search all wikipedia articles for passages that
|
92 |
# answer the query
|
93 |
def search(query):
|
@@ -163,7 +176,24 @@ def search(query):
|
|
163 |
#st.write(rs_final.strip())
|
164 |
res.append(rs_final.strip())
|
165 |
|
166 |
-
st.write(res[0:maxtags_sidebar])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
|
168 |
st.write("## Results:")
|
169 |
if st.button('Generated Expansion'):
|
|
|
71 |
import numpy as np
|
72 |
import re
|
73 |
|
74 |
+
import yake
|
75 |
+
|
76 |
+
language = "en"
|
77 |
+
max_ngram_size = 3
|
78 |
+
deduplication_threshold = 0.9
|
79 |
+
deduplication_algo = 'seqm'
|
80 |
+
windowSize = 3
|
81 |
+
numOfKeywords = 3
|
82 |
+
|
83 |
+
custom_kw_extractor = yake.KeywordExtractor(lan=language, n=max_ngram_size, dedupLim=deduplication_threshold, dedupFunc=deduplication_algo, windowsSize=windowSize, top=numOfKeywords, features=None)
|
84 |
|
85 |
# We lower case our text and remove stop-words from indexing
|
86 |
def bm25_tokenizer(text):
|
|
|
98 |
|
99 |
bm25 = BM25Okapi(tokenized_corpus)
|
100 |
|
101 |
+
def word_len(s):
|
102 |
+
return len([i for i in s.split(' ') if i])
|
103 |
+
|
104 |
# This function will search all wikipedia articles for passages that
|
105 |
# answer the query
|
106 |
def search(query):
|
|
|
176 |
#st.write(rs_final.strip())
|
177 |
res.append(rs_final.strip())
|
178 |
|
179 |
+
#st.write(res[0:maxtags_sidebar])
|
180 |
+
|
181 |
+
res_clean = []
|
182 |
+
for out in result:
|
183 |
+
if len(out) > 20:
|
184 |
+
keywords = custom_kw_extractor.extract_keywords(out)
|
185 |
+
for key in keywords:
|
186 |
+
res_clean.append(key[0])
|
187 |
+
else:
|
188 |
+
res_clean.append(out)
|
189 |
+
|
190 |
+
show_out = []
|
191 |
+
for i in res_clean:
|
192 |
+
num = word_len(i)
|
193 |
+
if num > 1:
|
194 |
+
show_out.append(i)
|
195 |
+
|
196 |
+
st.write(show_out[0:maxtags_sidebar])
|
197 |
|
198 |
st.write("## Results:")
|
199 |
if st.button('Generated Expansion'):
|