File size: 416 Bytes
e76edd3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# extract_text.py

import fitz  # PyMuPDF
import json

def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = []
    for page in doc:
        text.append(page.get_text())
    return text

if __name__ == "__main__":
    pdf_text = extract_text_from_pdf("apexcustoms.pdf")

    # Save the extracted text to a JSON file
    with open("apexcustoms.json", "w") as f:
        json.dump(pdf_text, f)