Spaces:
Sleeping
Sleeping
# extract_text.py | |
import fitz # PyMuPDF | |
import json | |
def extract_text_from_pdf(pdf_path): | |
doc = fitz.open(pdf_path) | |
text = [] | |
for page in doc: | |
text.append(page.get_text()) | |
return text | |
if __name__ == "__main__": | |
pdf_text = extract_text_from_pdf("apexcustoms.pdf") | |
# Save the extracted text to a JSON file | |
with open("apexcustoms.json", "w") as f: | |
json.dump(pdf_text, f) | |