colonelwatch commited on
Commit
e2d494c
·
1 Parent(s): 92f5352

Cover missing title edge case

Browse files
Files changed (1) hide show
  1. app.py +11 -4
app.py CHANGED
@@ -21,9 +21,6 @@ ps.set_index_parameters(index, 'nprobe=16,ht=512')
21
 
22
 
23
  def _recover_abstract(inverted_index):
24
- if inverted_index is None: # edge case: no abstract
25
- return ''
26
-
27
  abstract_size = max([max(appearances) for appearances in inverted_index.values()])+1
28
 
29
  abstract = [None]*abstract_size
@@ -67,13 +64,23 @@ def format_response(neighbors, response):
67
 
68
  # collect attributes from openalex doc for the given openalex_id
69
  title = doc['title']
70
- abstract = _recover_abstract(doc['abstract_inverted_index'])
 
71
  author_names = [authorship['author']['display_name'] for authorship in doc['authorships']]
72
  # journal_name = doc['primary_location']['source']['display_name']
73
  publication_year = doc['publication_year']
74
  citation_count = doc['cited_by_count']
75
  doi = doc['doi']
76
 
 
 
 
 
 
 
 
 
 
77
  # try to get journal name or else set it to None
78
  try:
79
  journal_name = doc['primary_location']['source']['display_name']
 
21
 
22
 
23
  def _recover_abstract(inverted_index):
 
 
 
24
  abstract_size = max([max(appearances) for appearances in inverted_index.values()])+1
25
 
26
  abstract = [None]*abstract_size
 
64
 
65
  # collect attributes from openalex doc for the given openalex_id
66
  title = doc['title']
67
+ # abstract = _recover_abstract(doc['abstract_inverted_index'])
68
+ abstract_inverted_index = doc['abstract_inverted_index']
69
  author_names = [authorship['author']['display_name'] for authorship in doc['authorships']]
70
  # journal_name = doc['primary_location']['source']['display_name']
71
  publication_year = doc['publication_year']
72
  citation_count = doc['cited_by_count']
73
  doi = doc['doi']
74
 
75
+ if title is None: # edge case: no title
76
+ title = 'No title'
77
+
78
+ if abstract is None: # edge case: no abstract
79
+ abstract = 'No abstract'
80
+ else:
81
+ abstract = _recover_abstract(abstract_inverted_index)
82
+ abstract = abstract.replace('\n', '\\n').replace('\r', '\\r')
83
+
84
  # try to get journal name or else set it to None
85
  try:
86
  journal_name = doc['primary_location']['source']['display_name']