Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
ddc3a5a
1
Parent(s):
9cee2a5
Shuffle around contents of execute_request, format_response, and search
Browse files
app.py
CHANGED
@@ -70,7 +70,7 @@ class Work:
|
|
70 |
)
|
71 |
|
72 |
@staticmethod
|
73 |
-
def
|
74 |
return [
|
75 |
"title",
|
76 |
"abstract_inverted_index",
|
@@ -124,18 +124,24 @@ def get_index(dir: Path, search_time_s: float) -> Dataset:
|
|
124 |
return index
|
125 |
|
126 |
|
127 |
-
def execute_request(
|
128 |
-
|
129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
|
|
|
|
|
|
|
131 |
|
132 |
-
def format_response(neighbors, response):
|
133 |
-
response = {doc['id']: doc for doc in response['results']}
|
134 |
-
|
135 |
-
result_string = ''
|
136 |
-
for distance, openalex_id in neighbors:
|
137 |
-
work = Work.from_dict(response[openalex_id])
|
138 |
|
|
|
|
|
|
|
139 |
# edge cases: no title
|
140 |
abstract = work.abstract if work.abstract is not None else "No abstract"
|
141 |
|
@@ -196,21 +202,18 @@ model = get_model(MODEL_NAME, "cpu")
|
|
196 |
index = get_index(DIR, SEARCH_TIME_S)
|
197 |
|
198 |
|
199 |
-
def search(query):
|
200 |
global model, index
|
201 |
|
202 |
-
# TODO: pass in param string directly?
|
203 |
query_embedding = model.encode(query)
|
204 |
distances, faiss_ids = index.search("embeddings", query_embedding, 20)
|
205 |
-
|
206 |
openalex_ids = index[faiss_ids]["idxs"]
|
207 |
-
search_filter = f'openalex_id:{"|".join(openalex_ids)}'
|
208 |
-
search_select = 'id,title,abstract_inverted_index,authorships,primary_location,publication_year,cited_by_count,doi'
|
209 |
|
210 |
-
|
211 |
-
|
|
|
|
|
212 |
|
213 |
-
return neighbors, request_str
|
214 |
|
215 |
with gr.Blocks() as demo:
|
216 |
gr.Markdown('# abstracts-index')
|
@@ -232,14 +235,10 @@ with gr.Blocks() as demo:
|
|
232 |
results = gr.Markdown()
|
233 |
|
234 |
md = MarkdownIt('js-default', {'linkify': True, 'typographer': True}) # don't render html or latex!
|
235 |
-
results.md = md
|
236 |
|
237 |
-
query.submit(search, inputs=[query], outputs=[
|
238 |
-
|
239 |
-
.success(format_response, inputs=[neighbors_var, response_var], outputs=[results])
|
240 |
-
btn.click(search, inputs=[query], outputs=[neighbors_var, request_str_var]) \
|
241 |
-
.success(execute_request, inputs=[request_str_var], outputs=[response_var]) \
|
242 |
-
.success(format_response, inputs=[neighbors_var, response_var], outputs=[results])
|
243 |
|
244 |
demo.queue(2)
|
245 |
demo.launch()
|
|
|
70 |
)
|
71 |
|
72 |
@staticmethod
|
73 |
+
def get_raw_fields() -> list[str]:
|
74 |
return [
|
75 |
"title",
|
76 |
"abstract_inverted_index",
|
|
|
124 |
return index
|
125 |
|
126 |
|
127 |
+
def execute_request(ids: list[str]) -> list[Work]:
|
128 |
+
# query with the /works endpoint with a specific list of IDs and fields
|
129 |
+
search_filter = f'openalex_id:{"|".join(ids)}'
|
130 |
+
search_select = ",".join(["id"] + Work.get_raw_fields())
|
131 |
+
response = requests.get(
|
132 |
+
"https://api.openalex.org/works",
|
133 |
+
{"filter": search_filter, "select": search_select}
|
134 |
+
)
|
135 |
+
response.raise_for_status()
|
136 |
|
137 |
+
# the response is not necessarily ordered, so order them
|
138 |
+
response = {d["id"]: Work.from_dict(d) for d in response.json()["results"]}
|
139 |
+
return [response[id_] for id_ in ids]
|
140 |
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
|
142 |
+
def format_response(neighbors: list[Work], distances: list[float]) -> str:
|
143 |
+
result_string = ''
|
144 |
+
for work, distance in zip(neighbors, distances):
|
145 |
# edge cases: no title
|
146 |
abstract = work.abstract if work.abstract is not None else "No abstract"
|
147 |
|
|
|
202 |
index = get_index(DIR, SEARCH_TIME_S)
|
203 |
|
204 |
|
205 |
+
def search(query: str) -> str:
|
206 |
global model, index
|
207 |
|
|
|
208 |
query_embedding = model.encode(query)
|
209 |
distances, faiss_ids = index.search("embeddings", query_embedding, 20)
|
|
|
210 |
openalex_ids = index[faiss_ids]["idxs"]
|
|
|
|
|
211 |
|
212 |
+
works = execute_request(openalex_ids)
|
213 |
+
result_string = format_response(works, distances)
|
214 |
+
|
215 |
+
return result_string
|
216 |
|
|
|
217 |
|
218 |
with gr.Blocks() as demo:
|
219 |
gr.Markdown('# abstracts-index')
|
|
|
235 |
results = gr.Markdown()
|
236 |
|
237 |
md = MarkdownIt('js-default', {'linkify': True, 'typographer': True}) # don't render html or latex!
|
238 |
+
results.md = md # TODO: does this not work anymore?
|
239 |
|
240 |
+
query.submit(search, inputs=[query], outputs=[results])
|
241 |
+
btn.click(search, inputs=[query], outputs=[results])
|
|
|
|
|
|
|
|
|
242 |
|
243 |
demo.queue(2)
|
244 |
demo.launch()
|