Omar ID EL MOUMEN commited on
Commit
d8045d1
·
1 Parent(s): 2e329bd

Change regex

Browse files
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -104,7 +104,7 @@ async def extract_text_pdf(document: DocumentID):
104
  postprocess_text = remove_punctuations(postprocess_text)
105
  postprocess_text = re.sub(r"\s+", " ", postprocess_text)
106
  postprocess_text = postprocess_text.strip()
107
- regex_titles = r"(?:[IVX]+|[0-9]+)\.\s[A-Z0-9\s]+"
108
  titles = doc.get_toc()
109
  main_titles = []
110
  if len(titles) <= 0:
 
104
  postprocess_text = remove_punctuations(postprocess_text)
105
  postprocess_text = re.sub(r"\s+", " ", postprocess_text)
106
  postprocess_text = postprocess_text.strip()
107
+ regex_titles = r"(?:[IVX]+|[0-9]+)\.\s[A-Z0-9\s]+$"
108
  titles = doc.get_toc()
109
  main_titles = []
110
  if len(titles) <= 0: