File size: 490 Bytes
db68fe3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import pdfplumber
from pathlib import Path
import os

input_path = Path("./documents")

file_names = os.listdir(input_path)

result = ""

for file_name in file_names:
    pdf = pdfplumber.open(input_path / file_name)
    for page in pdf.pages:
        text = page.extract_text()
        result += text

# encoding to ASCII will remove special caracters.
result = result.encode(encoding="ASCII", errors="ignore").decode()

with open(input_path / "result.txt", "w") as f:
    f.write(result)