Spaces:
Sleeping
Sleeping
File size: 416 Bytes
e76edd3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
# extract_text.py
import fitz # PyMuPDF
import json
def extract_text_from_pdf(pdf_path):
doc = fitz.open(pdf_path)
text = []
for page in doc:
text.append(page.get_text())
return text
if __name__ == "__main__":
pdf_text = extract_text_from_pdf("apexcustoms.pdf")
# Save the extracted text to a JSON file
with open("apexcustoms.json", "w") as f:
json.dump(pdf_text, f)
|