Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
2a39b6d
1
Parent(s):
09fb3b0
Update strings and whitespace
Browse files
app.py
CHANGED
@@ -49,7 +49,7 @@ class Work:
|
|
49 |
abstract = cls._recover_abstract(inverted_index) if inverted_index else None
|
50 |
|
51 |
try:
|
52 |
-
journal_name = d[
|
53 |
except (TypeError, KeyError): # key didn't exist or a value was null
|
54 |
journal_name = None
|
55 |
|
@@ -93,7 +93,7 @@ class Work:
|
|
93 |
for loc in locs:
|
94 |
abstract_words[loc] = word
|
95 |
|
96 |
-
return
|
97 |
|
98 |
|
99 |
def get_model(model_name: str, device: str) -> SentenceTransformer:
|
@@ -120,7 +120,7 @@ def get_index(dir: Path, search_time_s: float) -> Dataset:
|
|
120 |
|
121 |
def execute_request(ids: list[str]) -> list[Work]:
|
122 |
# query with the /works endpoint with a specific list of IDs and fields
|
123 |
-
search_filter = f
|
124 |
search_select = ",".join(["id"] + Work.get_raw_fields())
|
125 |
response = requests.get(
|
126 |
"https://api.openalex.org/works",
|
@@ -154,9 +154,9 @@ def format_response(neighbors: list[Work], distances: list[float]) -> str:
|
|
154 |
entry_string += "\n\n**"
|
155 |
|
156 |
if len(work.authors) >= 3: # truncate to 3 if necessary
|
157 |
-
entry_string +=
|
158 |
elif work.authors:
|
159 |
-
entry_string +=
|
160 |
else:
|
161 |
entry_string += "No author"
|
162 |
|
@@ -170,7 +170,7 @@ def format_response(neighbors: list[Work], distances: list[float]) -> str:
|
|
170 |
if work.abstract:
|
171 |
abstract = collapse_newlines(work.abstract)
|
172 |
if len(abstract) > 2000:
|
173 |
-
abstract = abstract[:2000] +
|
174 |
entry_string += abstract
|
175 |
else:
|
176 |
entry_string += "No abstract"
|
@@ -188,7 +188,7 @@ def format_response(neighbors: list[Work], distances: list[float]) -> str:
|
|
188 |
entry_string += "*\n"
|
189 |
|
190 |
result_string += entry_string
|
191 |
-
|
192 |
return result_string
|
193 |
|
194 |
|
@@ -210,18 +210,21 @@ def search(query: str) -> str:
|
|
210 |
|
211 |
|
212 |
with gr.Blocks() as demo:
|
213 |
-
gr.Markdown(
|
214 |
gr.Markdown(
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
|
|
|
|
|
|
221 |
)
|
222 |
|
223 |
-
query = gr.Textbox(lines=1, placeholder=
|
224 |
-
btn = gr.Button(
|
225 |
results = gr.Markdown(
|
226 |
latex_delimiters=[
|
227 |
{"left": "$$", "right": "$$", "display": False},
|
@@ -234,4 +237,4 @@ with gr.Blocks() as demo:
|
|
234 |
btn.click(search, inputs=[query], outputs=[results])
|
235 |
|
236 |
demo.queue(2)
|
237 |
-
demo.launch()
|
|
|
49 |
abstract = cls._recover_abstract(inverted_index) if inverted_index else None
|
50 |
|
51 |
try:
|
52 |
+
journal_name = d["primary_location"]["source"]["display_name"]
|
53 |
except (TypeError, KeyError): # key didn't exist or a value was null
|
54 |
journal_name = None
|
55 |
|
|
|
93 |
for loc in locs:
|
94 |
abstract_words[loc] = word
|
95 |
|
96 |
+
return " ".join(word for word in abstract_words if word is not None)
|
97 |
|
98 |
|
99 |
def get_model(model_name: str, device: str) -> SentenceTransformer:
|
|
|
120 |
|
121 |
def execute_request(ids: list[str]) -> list[Work]:
|
122 |
# query with the /works endpoint with a specific list of IDs and fields
|
123 |
+
search_filter = f"openalex_id:{"|".join(ids)}"
|
124 |
search_select = ",".join(["id"] + Work.get_raw_fields())
|
125 |
response = requests.get(
|
126 |
"https://api.openalex.org/works",
|
|
|
154 |
entry_string += "\n\n**"
|
155 |
|
156 |
if len(work.authors) >= 3: # truncate to 3 if necessary
|
157 |
+
entry_string += ", ".join(work.authors[:3]) + ", ..."
|
158 |
elif work.authors:
|
159 |
+
entry_string += ", ".join(work.authors)
|
160 |
else:
|
161 |
entry_string += "No author"
|
162 |
|
|
|
170 |
if work.abstract:
|
171 |
abstract = collapse_newlines(work.abstract)
|
172 |
if len(abstract) > 2000:
|
173 |
+
abstract = abstract[:2000] + "..."
|
174 |
entry_string += abstract
|
175 |
else:
|
176 |
entry_string += "No abstract"
|
|
|
188 |
entry_string += "*\n"
|
189 |
|
190 |
result_string += entry_string
|
191 |
+
|
192 |
return result_string
|
193 |
|
194 |
|
|
|
210 |
|
211 |
|
212 |
with gr.Blocks() as demo:
|
213 |
+
gr.Markdown("# abstracts-index")
|
214 |
gr.Markdown(
|
215 |
+
"Explore 95 million academic publications selected from the "
|
216 |
+
"[OpenAlex](https://openalex.org) dataset. This project is an index of the "
|
217 |
+
"embeddings generated from their titles and abstracts. The embeddings were "
|
218 |
+
"generated using the `all-MiniLM-L6-v2` model provided by the "
|
219 |
+
"[sentence-transformers](https://www.sbert.net/) module, and the index was "
|
220 |
+
"built using the [faiss](https://github.com/facebookresearch/faiss) module. "
|
221 |
+
"The build scripts and more information available at the main repo "
|
222 |
+
"[abstracts-search](https://github.com/colonelwatch/abstracts-search) on "
|
223 |
+
"Github."
|
224 |
)
|
225 |
|
226 |
+
query = gr.Textbox(lines=1, placeholder="Enter your query here", show_label=False)
|
227 |
+
btn = gr.Button("Search")
|
228 |
results = gr.Markdown(
|
229 |
latex_delimiters=[
|
230 |
{"left": "$$", "right": "$$", "display": False},
|
|
|
237 |
btn.click(search, inputs=[query], outputs=[results])
|
238 |
|
239 |
demo.queue(2)
|
240 |
+
demo.launch()
|