Spaces:
Running
Running
Update reader.py
Browse files
reader.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import pypdfium2 as pdfium
|
2 |
import re
|
3 |
-
|
4 |
from PIL import Image
|
5 |
from pytesseract import image_to_string
|
6 |
from utils import recover_text, get_average_line_len
|
@@ -18,9 +18,6 @@ class ResumeReader:
|
|
18 |
clean_text = re.sub(r'• ', " ", clean_text)
|
19 |
return clean_text
|
20 |
|
21 |
-
def recover_text(self, text_without_spaces):
|
22 |
-
recovered_text = " ".join(wordninja.split(text_without_spaces))
|
23 |
-
return recovered_text
|
24 |
|
25 |
def read_image(self, path_file):
|
26 |
raw_text = str(image_to_string(Image.open(path_file)))
|
|
|
1 |
import pypdfium2 as pdfium
|
2 |
import re
|
3 |
+
|
4 |
from PIL import Image
|
5 |
from pytesseract import image_to_string
|
6 |
from utils import recover_text, get_average_line_len
|
|
|
18 |
clean_text = re.sub(r'• ', " ", clean_text)
|
19 |
return clean_text
|
20 |
|
|
|
|
|
|
|
21 |
|
22 |
def read_image(self, path_file):
|
23 |
raw_text = str(image_to_string(Image.open(path_file)))
|