Spaces:
Sleeping
Sleeping
Omar ID EL MOUMEN
commited on
Commit
·
d8045d1
1
Parent(s):
2e329bd
Change regex
Browse files
app.py
CHANGED
@@ -104,7 +104,7 @@ async def extract_text_pdf(document: DocumentID):
|
|
104 |
postprocess_text = remove_punctuations(postprocess_text)
|
105 |
postprocess_text = re.sub(r"\s+", " ", postprocess_text)
|
106 |
postprocess_text = postprocess_text.strip()
|
107 |
-
regex_titles = r"(?:[IVX]+|[0-9]+)\.\s[A-Z0-9\s]
|
108 |
titles = doc.get_toc()
|
109 |
main_titles = []
|
110 |
if len(titles) <= 0:
|
|
|
104 |
postprocess_text = remove_punctuations(postprocess_text)
|
105 |
postprocess_text = re.sub(r"\s+", " ", postprocess_text)
|
106 |
postprocess_text = postprocess_text.strip()
|
107 |
+
regex_titles = r"(?:[IVX]+|[0-9]+)\.\s[A-Z0-9\s]+$"
|
108 |
titles = doc.get_toc()
|
109 |
main_titles = []
|
110 |
if len(titles) <= 0:
|