Omar ID EL MOUMEN commited on
Commit
a039626
·
1 Parent(s): d8045d1

Modify title format

Browse files
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -113,7 +113,7 @@ async def extract_text_pdf(document: DocumentID):
113
  for title in titles:
114
  if title[0] == 1:
115
  main_titles.append(title[1])
116
- return {"pub_id": document.doc_id, "titles": main_titles, "text": postprocess_text, "error": False} if len(main_titles) > 0 else {"pub_id": document.doc_id, "titles": "No titles found !", "text": postprocess_text, "error": False}
117
  else:
118
  print("ID: " + document.doc_id)
119
  print("URL: " + f"http://arxiv.org/pdf/{document.doc_id}")
 
113
  for title in titles:
114
  if title[0] == 1:
115
  main_titles.append(title[1])
116
+ return {"pub_id": document.doc_id, "titles": [re.sub(r"\s+", " ", remove_punctuations(remove_in_betweens(t))).strip() for t in main_titles], "text": postprocess_text, "error": False} if len(main_titles) > 0 else {"pub_id": document.doc_id, "titles": "No titles found !", "text": postprocess_text, "error": False}
117
  else:
118
  print("ID: " + document.doc_id)
119
  print("URL: " + f"http://arxiv.org/pdf/{document.doc_id}")