Spaces:
Sleeping
Sleeping
Omar ID EL MOUMEN
commited on
Commit
·
95d420b
1
Parent(s):
2ca42fd
Fix regex issue
Browse files
app.py
CHANGED
@@ -100,7 +100,7 @@ async def extract_text_pdf(document: DocumentID):
|
|
100 |
postprocess_text = remove_punctuations(postprocess_text)
|
101 |
postprocess_text = re.sub(r"\s+", " ", postprocess_text)
|
102 |
postprocess_text = postprocess_text.strip()
|
103 |
-
regex_titles = r"(?:[IVX]+|[0-9]+)\.\s[A-Z0-9\s]
|
104 |
titles = doc.get_toc()
|
105 |
main_titles = []
|
106 |
if len(titles) <= 0:
|
|
|
100 |
postprocess_text = remove_punctuations(postprocess_text)
|
101 |
postprocess_text = re.sub(r"\s+", " ", postprocess_text)
|
102 |
postprocess_text = postprocess_text.strip()
|
103 |
+
regex_titles = r"(?:[IVX]+|[0-9]+)\.\s[A-Z0-9\s]+"
|
104 |
titles = doc.get_toc()
|
105 |
main_titles = []
|
106 |
if len(titles) <= 0:
|