KevinHuSh
commited on
Commit
·
eefeab4
1
Parent(s):
defd4c5
fix bug in pdf parser (#986)
Browse files### What problem does this PR solve?
#963
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
deepdoc/parser/pdf_parser.py
CHANGED
@@ -392,7 +392,7 @@ class RAGFlowPdfParser:
|
|
392 |
b["text"].strip()[-1] in ",;:'\",、‘“;:-",
|
393 |
len(b["text"].strip()) > 1 and b["text"].strip(
|
394 |
)[-2] in ",;:'\",‘“、;:",
|
395 |
-
b_["text"].strip()[0] in "。;?!?”)),,、:",
|
396 |
]
|
397 |
# features for not concating
|
398 |
feats = [
|
|
|
392 |
b["text"].strip()[-1] in ",;:'\",、‘“;:-",
|
393 |
len(b["text"].strip()) > 1 and b["text"].strip(
|
394 |
)[-2] in ",;:'\",‘“、;:",
|
395 |
+
b_["text"].strip() and b_["text"].strip()[0] in "。;?!?”)),,、:",
|
396 |
]
|
397 |
# features for not concating
|
398 |
feats = [
|