Spaces:
Sleeping
Sleeping
Commit
·
4ad1f12
1
Parent(s):
9ed0ab0
Upload 2 files
Browse files- requirements.txt +0 -0
- scan_pdf_parser.py +10 -0
requirements.txt
ADDED
Binary file (4.65 kB). View file
|
|
scan_pdf_parser.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pytesseract
|
2 |
+
from pdf2image import convert_from_path
|
3 |
+
|
4 |
+
|
5 |
+
def get_text_from_scanned_pdf(pdf_path):
|
6 |
+
text = ''
|
7 |
+
images = convert_from_path(pdf_path)
|
8 |
+
for img in images:
|
9 |
+
text += pytesseract.image_to_string(img)
|
10 |
+
return text
|