Spaces:
Sleeping
Sleeping
Omar ID EL MOUMEN
commited on
Commit
·
db5cf0a
1
Parent(s):
a039626
Accept sub chapters
Browse files
app.py
CHANGED
@@ -111,7 +111,7 @@ async def extract_text_pdf(document: DocumentID):
|
|
111 |
main_titles = re.findall(regex_titles, postprocess_text, flags=re.MULTILINE)
|
112 |
else:
|
113 |
for title in titles:
|
114 |
-
if title[0] == 1:
|
115 |
main_titles.append(title[1])
|
116 |
return {"pub_id": document.doc_id, "titles": [re.sub(r"\s+", " ", remove_punctuations(remove_in_betweens(t))).strip() for t in main_titles], "text": postprocess_text, "error": False} if len(main_titles) > 0 else {"pub_id": document.doc_id, "titles": "No titles found !", "text": postprocess_text, "error": False}
|
117 |
else:
|
|
|
111 |
main_titles = re.findall(regex_titles, postprocess_text, flags=re.MULTILINE)
|
112 |
else:
|
113 |
for title in titles:
|
114 |
+
if title[0] == 1 or title[0] == 2:
|
115 |
main_titles.append(title[1])
|
116 |
return {"pub_id": document.doc_id, "titles": [re.sub(r"\s+", " ", remove_punctuations(remove_in_betweens(t))).strip() for t in main_titles], "text": postprocess_text, "error": False} if len(main_titles) > 0 else {"pub_id": document.doc_id, "titles": "No titles found !", "text": postprocess_text, "error": False}
|
117 |
else:
|