colonelwatch commited on
Commit
cd209de
·
1 Parent(s): 030ddc3

Add mailto option

Browse files
Files changed (1) hide show
  1. app.py +8 -7
app.py CHANGED
@@ -128,17 +128,17 @@ def get_index(dir: Path, search_time_s: float) -> Dataset:
128
  return index
129
 
130
 
131
- def execute_request(ids: list[str]) -> list[Work]:
132
  if len(ids) > 100:
133
  raise ValueError("querying /works endpoint with more than 100 works")
134
 
135
  # query with the /works endpoint with a specific list of IDs and fields
136
  search_filter = f"openalex_id:{"|".join(ids)}"
137
  search_select = ",".join(["id"] + Work.get_raw_fields())
138
- response = requests.get(
139
- "https://api.openalex.org/works",
140
- {"filter": search_filter, "select": search_select, "per-page": 100}
141
- )
142
  response.raise_for_status()
143
 
144
  # the response is not necessarily ordered, so order them
@@ -210,7 +210,8 @@ def main():
210
  model_name = get_env_var("MODEL_NAME", default="all-MiniLM-L6-v2")
211
  dir = get_env_var("DIR", Path, default=Path("index"))
212
  search_time_s = get_env_var("SEARCH_TIME_S", int, default=1)
213
- k = get_env_var("K", int, default=20)
 
214
 
215
  model = get_model(model_name, "cpu")
216
  index = get_index(dir, search_time_s)
@@ -222,7 +223,7 @@ def main():
222
 
223
  faiss_ids_flat = list(chain(*faiss_ids))
224
  openalex_ids_flat = index[faiss_ids_flat]["idxs"]
225
- works_flat = execute_request(openalex_ids_flat)
226
  works = [list(batch) for batch in batched(works_flat, k)]
227
 
228
  result_strings = [format_response(w, d) for w, d in zip(works, distances)]
 
128
  return index
129
 
130
 
131
+ def execute_request(ids: list[str], mailto: str | None) -> list[Work]:
132
  if len(ids) > 100:
133
  raise ValueError("querying /works endpoint with more than 100 works")
134
 
135
  # query with the /works endpoint with a specific list of IDs and fields
136
  search_filter = f"openalex_id:{"|".join(ids)}"
137
  search_select = ",".join(["id"] + Work.get_raw_fields())
138
+ params = {"filter": search_filter, "select": search_select, "per-page": 100}
139
+ if mailto is not None:
140
+ params["mailto"] = mailto
141
+ response = requests.get("https://api.openalex.org/works", params)
142
  response.raise_for_status()
143
 
144
  # the response is not necessarily ordered, so order them
 
210
  model_name = get_env_var("MODEL_NAME", default="all-MiniLM-L6-v2")
211
  dir = get_env_var("DIR", Path, default=Path("index"))
212
  search_time_s = get_env_var("SEARCH_TIME_S", int, default=1)
213
+ k = get_env_var("K", int, default=20) # TODO: can't go higher than 20 yet
214
+ mailto = get_env_var("MAILTO", str, None)
215
 
216
  model = get_model(model_name, "cpu")
217
  index = get_index(dir, search_time_s)
 
223
 
224
  faiss_ids_flat = list(chain(*faiss_ids))
225
  openalex_ids_flat = index[faiss_ids_flat]["idxs"]
226
+ works_flat = execute_request(openalex_ids_flat, mailto)
227
  works = [list(batch) for batch in batched(works_flat, k)]
228
 
229
  result_strings = [format_response(w, d) for w, d in zip(works, distances)]