Spaces:
No application file
No application file
File size: 490 Bytes
db68fe3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
import pdfplumber
from pathlib import Path
import os
input_path = Path("./documents")
file_names = os.listdir(input_path)
result = ""
for file_name in file_names:
pdf = pdfplumber.open(input_path / file_name)
for page in pdf.pages:
text = page.extract_text()
result += text
# encoding to ASCII will remove special caracters.
result = result.encode(encoding="ASCII", errors="ignore").decode()
with open(input_path / "result.txt", "w") as f:
f.write(result)
|