Spaces:
Sleeping
Sleeping
Delete extract_text.py
Browse files- extract_text.py +0 -18
extract_text.py
DELETED
@@ -1,18 +0,0 @@
|
|
1 |
-
# extract_text.py
|
2 |
-
|
3 |
-
import fitz # PyMuPDF
|
4 |
-
import json
|
5 |
-
|
6 |
-
def extract_text_from_pdf(pdf_path):
|
7 |
-
doc = fitz.open(pdf_path)
|
8 |
-
text = []
|
9 |
-
for page in doc:
|
10 |
-
text.append(page.get_text())
|
11 |
-
return text
|
12 |
-
|
13 |
-
if __name__ == "__main__":
|
14 |
-
pdf_text = extract_text_from_pdf("apexcustoms.pdf")
|
15 |
-
|
16 |
-
# Save the extracted text to a JSON file
|
17 |
-
with open("apexcustoms.json", "w") as f:
|
18 |
-
json.dump(pdf_text, f)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|