Spaces:
Paused
Paused
修复pdf分解bug
Browse files
crazy_functions/crazy_utils.py
CHANGED
@@ -444,6 +444,7 @@ def read_and_clean_pdf_text(fp):
|
|
444 |
pf = 998
|
445 |
for l in t['lines']:
|
446 |
txt_line = "".join([wtf['text'] for wtf in l['spans']])
|
|
|
447 |
pf = primary_ffsize(l)
|
448 |
meta_line.append([txt_line, pf, l['bbox'], l])
|
449 |
for wtf in l['spans']: # for l in t['lines']:
|
|
|
444 |
pf = 998
|
445 |
for l in t['lines']:
|
446 |
txt_line = "".join([wtf['text'] for wtf in l['spans']])
|
447 |
+
if len(txt_line) == 0: continue
|
448 |
pf = primary_ffsize(l)
|
449 |
meta_line.append([txt_line, pf, l['bbox'], l])
|
450 |
for wtf in l['spans']: # for l in t['lines']:
|