colonelwatch commited on
Commit
ddc3a5a
·
1 Parent(s): 9cee2a5

Shuffle around contents of execute_request, format_response, and search

Browse files
Files changed (1) hide show
  1. app.py +24 -25
app.py CHANGED
@@ -70,7 +70,7 @@ class Work:
70
  )
71
 
72
  @staticmethod
73
- def raw_fields() -> list[str]:
74
  return [
75
  "title",
76
  "abstract_inverted_index",
@@ -124,18 +124,24 @@ def get_index(dir: Path, search_time_s: float) -> Dataset:
124
  return index
125
 
126
 
127
- def execute_request(request_str):
128
- response = requests.get(request_str).json()
129
- return response
 
 
 
 
 
 
130
 
 
 
 
131
 
132
- def format_response(neighbors, response):
133
- response = {doc['id']: doc for doc in response['results']}
134
-
135
- result_string = ''
136
- for distance, openalex_id in neighbors:
137
- work = Work.from_dict(response[openalex_id])
138
 
 
 
 
139
  # edge cases: no title
140
  abstract = work.abstract if work.abstract is not None else "No abstract"
141
 
@@ -196,21 +202,18 @@ model = get_model(MODEL_NAME, "cpu")
196
  index = get_index(DIR, SEARCH_TIME_S)
197
 
198
 
199
- def search(query):
200
  global model, index
201
 
202
- # TODO: pass in param string directly?
203
  query_embedding = model.encode(query)
204
  distances, faiss_ids = index.search("embeddings", query_embedding, 20)
205
-
206
  openalex_ids = index[faiss_ids]["idxs"]
207
- search_filter = f'openalex_id:{"|".join(openalex_ids)}'
208
- search_select = 'id,title,abstract_inverted_index,authorships,primary_location,publication_year,cited_by_count,doi'
209
 
210
- neighbors = [(distance, openalex_id) for distance, openalex_id in zip(distances, openalex_ids)]
211
- request_str = f'https://api.openalex.org/works?filter={search_filter}&select={search_select}'
 
 
212
 
213
- return neighbors, request_str
214
 
215
  with gr.Blocks() as demo:
216
  gr.Markdown('# abstracts-index')
@@ -232,14 +235,10 @@ with gr.Blocks() as demo:
232
  results = gr.Markdown()
233
 
234
  md = MarkdownIt('js-default', {'linkify': True, 'typographer': True}) # don't render html or latex!
235
- results.md = md
236
 
237
- query.submit(search, inputs=[query], outputs=[neighbors_var, request_str_var]) \
238
- .success(execute_request, inputs=[request_str_var], outputs=[response_var]) \
239
- .success(format_response, inputs=[neighbors_var, response_var], outputs=[results])
240
- btn.click(search, inputs=[query], outputs=[neighbors_var, request_str_var]) \
241
- .success(execute_request, inputs=[request_str_var], outputs=[response_var]) \
242
- .success(format_response, inputs=[neighbors_var, response_var], outputs=[results])
243
 
244
  demo.queue(2)
245
  demo.launch()
 
70
  )
71
 
72
  @staticmethod
73
+ def get_raw_fields() -> list[str]:
74
  return [
75
  "title",
76
  "abstract_inverted_index",
 
124
  return index
125
 
126
 
127
+ def execute_request(ids: list[str]) -> list[Work]:
128
+ # query with the /works endpoint with a specific list of IDs and fields
129
+ search_filter = f'openalex_id:{"|".join(ids)}'
130
+ search_select = ",".join(["id"] + Work.get_raw_fields())
131
+ response = requests.get(
132
+ "https://api.openalex.org/works",
133
+ {"filter": search_filter, "select": search_select}
134
+ )
135
+ response.raise_for_status()
136
 
137
+ # the response is not necessarily ordered, so order them
138
+ response = {d["id"]: Work.from_dict(d) for d in response.json()["results"]}
139
+ return [response[id_] for id_ in ids]
140
 
 
 
 
 
 
 
141
 
142
+ def format_response(neighbors: list[Work], distances: list[float]) -> str:
143
+ result_string = ''
144
+ for work, distance in zip(neighbors, distances):
145
  # edge cases: no title
146
  abstract = work.abstract if work.abstract is not None else "No abstract"
147
 
 
202
  index = get_index(DIR, SEARCH_TIME_S)
203
 
204
 
205
+ def search(query: str) -> str:
206
  global model, index
207
 
 
208
  query_embedding = model.encode(query)
209
  distances, faiss_ids = index.search("embeddings", query_embedding, 20)
 
210
  openalex_ids = index[faiss_ids]["idxs"]
 
 
211
 
212
+ works = execute_request(openalex_ids)
213
+ result_string = format_response(works, distances)
214
+
215
+ return result_string
216
 
 
217
 
218
  with gr.Blocks() as demo:
219
  gr.Markdown('# abstracts-index')
 
235
  results = gr.Markdown()
236
 
237
  md = MarkdownIt('js-default', {'linkify': True, 'typographer': True}) # don't render html or latex!
238
+ results.md = md # TODO: does this not work anymore?
239
 
240
+ query.submit(search, inputs=[query], outputs=[results])
241
+ btn.click(search, inputs=[query], outputs=[results])
 
 
 
 
242
 
243
  demo.queue(2)
244
  demo.launch()