colonelwatch commited on
Commit
2a39b6d
·
1 Parent(s): 09fb3b0

Update strings and whitespace

Browse files
Files changed (1) hide show
  1. app.py +20 -17
app.py CHANGED
@@ -49,7 +49,7 @@ class Work:
49
  abstract = cls._recover_abstract(inverted_index) if inverted_index else None
50
 
51
  try:
52
- journal_name = d['primary_location']['source']['display_name']
53
  except (TypeError, KeyError): # key didn't exist or a value was null
54
  journal_name = None
55
 
@@ -93,7 +93,7 @@ class Work:
93
  for loc in locs:
94
  abstract_words[loc] = word
95
 
96
- return ' '.join(word for word in abstract_words if word is not None)
97
 
98
 
99
  def get_model(model_name: str, device: str) -> SentenceTransformer:
@@ -120,7 +120,7 @@ def get_index(dir: Path, search_time_s: float) -> Dataset:
120
 
121
  def execute_request(ids: list[str]) -> list[Work]:
122
  # query with the /works endpoint with a specific list of IDs and fields
123
- search_filter = f'openalex_id:{"|".join(ids)}'
124
  search_select = ",".join(["id"] + Work.get_raw_fields())
125
  response = requests.get(
126
  "https://api.openalex.org/works",
@@ -154,9 +154,9 @@ def format_response(neighbors: list[Work], distances: list[float]) -> str:
154
  entry_string += "\n\n**"
155
 
156
  if len(work.authors) >= 3: # truncate to 3 if necessary
157
- entry_string += ', '.join(work.authors[:3]) + ', ...'
158
  elif work.authors:
159
- entry_string += ', '.join(work.authors)
160
  else:
161
  entry_string += "No author"
162
 
@@ -170,7 +170,7 @@ def format_response(neighbors: list[Work], distances: list[float]) -> str:
170
  if work.abstract:
171
  abstract = collapse_newlines(work.abstract)
172
  if len(abstract) > 2000:
173
- abstract = abstract[:2000] + '...'
174
  entry_string += abstract
175
  else:
176
  entry_string += "No abstract"
@@ -188,7 +188,7 @@ def format_response(neighbors: list[Work], distances: list[float]) -> str:
188
  entry_string += "*\n"
189
 
190
  result_string += entry_string
191
-
192
  return result_string
193
 
194
 
@@ -210,18 +210,21 @@ def search(query: str) -> str:
210
 
211
 
212
  with gr.Blocks() as demo:
213
- gr.Markdown('# abstracts-index')
214
  gr.Markdown(
215
- 'Explore 95 million academic publications selected from the [OpenAlex](https://openalex.org) dataset. This '
216
- 'project is an index of the embeddings generated from their titles and abstracts. The embeddings were '
217
- 'generated using the `all-MiniLM-L6-v2` model provided by the [sentence-transformers](https://www.sbert.net/) '
218
- 'module, and the index was built using the [faiss](https://github.com/facebookresearch/faiss) module. The build '
219
- 'scripts and more information available at the main repo '
220
- '[abstracts-search](https://github.com/colonelwatch/abstracts-search) on Github.'
 
 
 
221
  )
222
 
223
- query = gr.Textbox(lines=1, placeholder='Enter your query here', show_label=False)
224
- btn = gr.Button('Search')
225
  results = gr.Markdown(
226
  latex_delimiters=[
227
  {"left": "$$", "right": "$$", "display": False},
@@ -234,4 +237,4 @@ with gr.Blocks() as demo:
234
  btn.click(search, inputs=[query], outputs=[results])
235
 
236
  demo.queue(2)
237
- demo.launch()
 
49
  abstract = cls._recover_abstract(inverted_index) if inverted_index else None
50
 
51
  try:
52
+ journal_name = d["primary_location"]["source"]["display_name"]
53
  except (TypeError, KeyError): # key didn't exist or a value was null
54
  journal_name = None
55
 
 
93
  for loc in locs:
94
  abstract_words[loc] = word
95
 
96
+ return " ".join(word for word in abstract_words if word is not None)
97
 
98
 
99
  def get_model(model_name: str, device: str) -> SentenceTransformer:
 
120
 
121
  def execute_request(ids: list[str]) -> list[Work]:
122
  # query with the /works endpoint with a specific list of IDs and fields
123
+ search_filter = f"openalex_id:{"|".join(ids)}"
124
  search_select = ",".join(["id"] + Work.get_raw_fields())
125
  response = requests.get(
126
  "https://api.openalex.org/works",
 
154
  entry_string += "\n\n**"
155
 
156
  if len(work.authors) >= 3: # truncate to 3 if necessary
157
+ entry_string += ", ".join(work.authors[:3]) + ", ..."
158
  elif work.authors:
159
+ entry_string += ", ".join(work.authors)
160
  else:
161
  entry_string += "No author"
162
 
 
170
  if work.abstract:
171
  abstract = collapse_newlines(work.abstract)
172
  if len(abstract) > 2000:
173
+ abstract = abstract[:2000] + "..."
174
  entry_string += abstract
175
  else:
176
  entry_string += "No abstract"
 
188
  entry_string += "*\n"
189
 
190
  result_string += entry_string
191
+
192
  return result_string
193
 
194
 
 
210
 
211
 
212
  with gr.Blocks() as demo:
213
+ gr.Markdown("# abstracts-index")
214
  gr.Markdown(
215
+ "Explore 95 million academic publications selected from the "
216
+ "[OpenAlex](https://openalex.org) dataset. This project is an index of the "
217
+ "embeddings generated from their titles and abstracts. The embeddings were "
218
+ "generated using the `all-MiniLM-L6-v2` model provided by the "
219
+ "[sentence-transformers](https://www.sbert.net/) module, and the index was "
220
+ "built using the [faiss](https://github.com/facebookresearch/faiss) module. "
221
+ "The build scripts and more information available at the main repo "
222
+ "[abstracts-search](https://github.com/colonelwatch/abstracts-search) on "
223
+ "Github."
224
  )
225
 
226
+ query = gr.Textbox(lines=1, placeholder="Enter your query here", show_label=False)
227
+ btn = gr.Button("Search")
228
  results = gr.Markdown(
229
  latex_delimiters=[
230
  {"left": "$$", "right": "$$", "display": False},
 
237
  btn.click(search, inputs=[query], outputs=[results])
238
 
239
  demo.queue(2)
240
+ demo.launch()