khrek commited on
Commit
3346330
·
1 Parent(s): 0adad71

Update reader.py

Browse files
Files changed (1) hide show
  1. reader.py +1 -4
reader.py CHANGED
@@ -1,6 +1,6 @@
1
  import pypdfium2 as pdfium
2
  import re
3
- import wordninja
4
  from PIL import Image
5
  from pytesseract import image_to_string
6
  from utils import recover_text, get_average_line_len
@@ -18,9 +18,6 @@ class ResumeReader:
18
  clean_text = re.sub(r'• ', " ", clean_text)
19
  return clean_text
20
 
21
- def recover_text(self, text_without_spaces):
22
- recovered_text = " ".join(wordninja.split(text_without_spaces))
23
- return recovered_text
24
 
25
  def read_image(self, path_file):
26
  raw_text = str(image_to_string(Image.open(path_file)))
 
1
  import pypdfium2 as pdfium
2
  import re
3
+
4
  from PIL import Image
5
  from pytesseract import image_to_string
6
  from utils import recover_text, get_average_line_len
 
18
  clean_text = re.sub(r'• ', " ", clean_text)
19
  return clean_text
20
 
 
 
 
21
 
22
  def read_image(self, path_file):
23
  raw_text = str(image_to_string(Image.open(path_file)))