Moha782 commited on
Commit
af2f31f
·
verified ·
1 Parent(s): 3918058

Delete extract_text.py

Browse files
Files changed (1) hide show
  1. extract_text.py +0 -18
extract_text.py DELETED
@@ -1,18 +0,0 @@
1
- # extract_text.py
2
-
3
- import fitz # PyMuPDF
4
- import json
5
-
6
- def extract_text_from_pdf(pdf_path):
7
- doc = fitz.open(pdf_path)
8
- text = []
9
- for page in doc:
10
- text.append(page.get_text())
11
- return text
12
-
13
- if __name__ == "__main__":
14
- pdf_text = extract_text_from_pdf("apexcustoms.pdf")
15
-
16
- # Save the extracted text to a JSON file
17
- with open("apexcustoms.json", "w") as f:
18
- json.dump(pdf_text, f)