Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
cd209de
1
Parent(s):
030ddc3
Add mailto option
Browse files
app.py
CHANGED
@@ -128,17 +128,17 @@ def get_index(dir: Path, search_time_s: float) -> Dataset:
|
|
128 |
return index
|
129 |
|
130 |
|
131 |
-
def execute_request(ids: list[str]) -> list[Work]:
|
132 |
if len(ids) > 100:
|
133 |
raise ValueError("querying /works endpoint with more than 100 works")
|
134 |
|
135 |
# query with the /works endpoint with a specific list of IDs and fields
|
136 |
search_filter = f"openalex_id:{"|".join(ids)}"
|
137 |
search_select = ",".join(["id"] + Work.get_raw_fields())
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
)
|
142 |
response.raise_for_status()
|
143 |
|
144 |
# the response is not necessarily ordered, so order them
|
@@ -210,7 +210,8 @@ def main():
|
|
210 |
model_name = get_env_var("MODEL_NAME", default="all-MiniLM-L6-v2")
|
211 |
dir = get_env_var("DIR", Path, default=Path("index"))
|
212 |
search_time_s = get_env_var("SEARCH_TIME_S", int, default=1)
|
213 |
-
k = get_env_var("K", int, default=20)
|
|
|
214 |
|
215 |
model = get_model(model_name, "cpu")
|
216 |
index = get_index(dir, search_time_s)
|
@@ -222,7 +223,7 @@ def main():
|
|
222 |
|
223 |
faiss_ids_flat = list(chain(*faiss_ids))
|
224 |
openalex_ids_flat = index[faiss_ids_flat]["idxs"]
|
225 |
-
works_flat = execute_request(openalex_ids_flat)
|
226 |
works = [list(batch) for batch in batched(works_flat, k)]
|
227 |
|
228 |
result_strings = [format_response(w, d) for w, d in zip(works, distances)]
|
|
|
128 |
return index
|
129 |
|
130 |
|
131 |
+
def execute_request(ids: list[str], mailto: str | None) -> list[Work]:
|
132 |
if len(ids) > 100:
|
133 |
raise ValueError("querying /works endpoint with more than 100 works")
|
134 |
|
135 |
# query with the /works endpoint with a specific list of IDs and fields
|
136 |
search_filter = f"openalex_id:{"|".join(ids)}"
|
137 |
search_select = ",".join(["id"] + Work.get_raw_fields())
|
138 |
+
params = {"filter": search_filter, "select": search_select, "per-page": 100}
|
139 |
+
if mailto is not None:
|
140 |
+
params["mailto"] = mailto
|
141 |
+
response = requests.get("https://api.openalex.org/works", params)
|
142 |
response.raise_for_status()
|
143 |
|
144 |
# the response is not necessarily ordered, so order them
|
|
|
210 |
model_name = get_env_var("MODEL_NAME", default="all-MiniLM-L6-v2")
|
211 |
dir = get_env_var("DIR", Path, default=Path("index"))
|
212 |
search_time_s = get_env_var("SEARCH_TIME_S", int, default=1)
|
213 |
+
k = get_env_var("K", int, default=20) # TODO: can't go higher than 20 yet
|
214 |
+
mailto = get_env_var("MAILTO", str, None)
|
215 |
|
216 |
model = get_model(model_name, "cpu")
|
217 |
index = get_index(dir, search_time_s)
|
|
|
223 |
|
224 |
faiss_ids_flat = list(chain(*faiss_ids))
|
225 |
openalex_ids_flat = index[faiss_ids_flat]["idxs"]
|
226 |
+
works_flat = execute_request(openalex_ids_flat, mailto)
|
227 |
works = [list(batch) for batch in batched(works_flat, k)]
|
228 |
|
229 |
result_strings = [format_response(w, d) for w, d in zip(works, distances)]
|