Jonas Leeb
commited on
Commit
·
20ac67a
1
Parent(s):
23585ec
more outputs
Browse files
app.py
CHANGED
@@ -50,7 +50,7 @@ class ArxivSearch:
|
|
50 |
self.search_button = gr.Button("Search")
|
51 |
|
52 |
self.output_md = gr.Markdown()
|
53 |
-
|
54 |
self.query_box.submit(
|
55 |
self.search_function,
|
56 |
inputs=[self.query_box, self.embedding_dropdown],
|
@@ -201,7 +201,7 @@ class ArxivSearch:
|
|
201 |
fig = go.Figure(data=[trace], layout=layout)
|
202 |
return fig
|
203 |
|
204 |
-
def keyword_match_ranking(self, query, top_n=
|
205 |
query_terms = query.lower().split()
|
206 |
query_indices = [i for i, term in enumerate(self.feature_names) if term in query_terms]
|
207 |
if not query_indices:
|
@@ -215,7 +215,7 @@ class ArxivSearch:
|
|
215 |
scores.sort(key=lambda x: x[1], reverse=True)
|
216 |
return scores[:top_n]
|
217 |
|
218 |
-
def word2vec_search(self, query, top_n=
|
219 |
tokens = [word for word in query.split() if word in self.wv_model.key_to_index]
|
220 |
if not tokens:
|
221 |
return []
|
@@ -226,7 +226,7 @@ class ArxivSearch:
|
|
226 |
top_indices = sims.argsort()[::-1][:top_n]
|
227 |
return [(i, sims[i]) for i in top_indices]
|
228 |
|
229 |
-
def bert_search(self, query, top_n=
|
230 |
with torch.no_grad():
|
231 |
inputs = self.tokenizer(query, return_tensors="pt", truncation=True, padding=True)
|
232 |
outputs = self.model(**inputs)
|
@@ -236,7 +236,7 @@ class ArxivSearch:
|
|
236 |
top_indices = sims.argsort()[::-1][:top_n]
|
237 |
return [(i, sims[i]) for i in top_indices]
|
238 |
|
239 |
-
def bert_search_2(self, query, top_n=
|
240 |
with torch.no_grad():
|
241 |
inputs = self.tokenizer(query, return_tensors="pt", truncation=True, padding=True)
|
242 |
outputs = self.model(**inputs)
|
|
|
50 |
self.search_button = gr.Button("Search")
|
51 |
|
52 |
self.output_md = gr.Markdown()
|
53 |
+
|
54 |
self.query_box.submit(
|
55 |
self.search_function,
|
56 |
inputs=[self.query_box, self.embedding_dropdown],
|
|
|
201 |
fig = go.Figure(data=[trace], layout=layout)
|
202 |
return fig
|
203 |
|
204 |
+
def keyword_match_ranking(self, query, top_n=10):
|
205 |
query_terms = query.lower().split()
|
206 |
query_indices = [i for i, term in enumerate(self.feature_names) if term in query_terms]
|
207 |
if not query_indices:
|
|
|
215 |
scores.sort(key=lambda x: x[1], reverse=True)
|
216 |
return scores[:top_n]
|
217 |
|
218 |
+
def word2vec_search(self, query, top_n=10):
|
219 |
tokens = [word for word in query.split() if word in self.wv_model.key_to_index]
|
220 |
if not tokens:
|
221 |
return []
|
|
|
226 |
top_indices = sims.argsort()[::-1][:top_n]
|
227 |
return [(i, sims[i]) for i in top_indices]
|
228 |
|
229 |
+
def bert_search(self, query, top_n=10):
|
230 |
with torch.no_grad():
|
231 |
inputs = self.tokenizer(query, return_tensors="pt", truncation=True, padding=True)
|
232 |
outputs = self.model(**inputs)
|
|
|
236 |
top_indices = sims.argsort()[::-1][:top_n]
|
237 |
return [(i, sims[i]) for i in top_indices]
|
238 |
|
239 |
+
def bert_search_2(self, query, top_n=10):
|
240 |
with torch.no_grad():
|
241 |
inputs = self.tokenizer(query, return_tensors="pt", truncation=True, padding=True)
|
242 |
outputs = self.model(**inputs)
|