Commit
·
04cc391
1
Parent(s):
3becfb1
Add basic image OCR in indian and western languages
Browse files- backend/image_to_tgt.py +21 -3
backend/image_to_tgt.py
CHANGED
@@ -1,4 +1,22 @@
|
|
1 |
-
from PIL import Image
|
2 |
|
3 |
-
def src_image_to_eng_translator(image_input_file):
|
4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# from PIL import Image
|
2 |
|
3 |
+
# def src_image_to_eng_translator(image_input_file):
|
4 |
+
# return "Random"
|
5 |
+
from deep_translator import GoogleTranslator
|
6 |
+
import pytesseract
|
7 |
+
from PIL import image
|
8 |
+
|
9 |
+
language_dict = {'English': 'eng', 'French': 'fra', 'Odia': 'ori', 'Hindi': 'hin',
|
10 |
+
'Bengali': 'ben', 'Telugu': 'tel', 'Hindi': 'hin', 'Malayalam': 'mal',
|
11 |
+
'Kannada': 'kan', 'Tamil': 'tam', 'Marathi': 'mar', 'Gujarati': 'guj',
|
12 |
+
'Punjabi': 'pan', 'Sinhalese': 'sin',
|
13 |
+
'Arabic': 'ara', 'German': 'deu', 'Spanish': 'spa', 'Italian': 'ita',
|
14 |
+
'Russian': 'rus', 'Japanese': 'jpn', 'Korean': 'kor', 'Hebrew': 'heb',
|
15 |
+
'Persian': 'fas', 'Chinese Simplified': 'chi_sim', 'Chinese Traditional': 'chi_tra',
|
16 |
+
}
|
17 |
+
|
18 |
+
def src_image_to_eng_translator(input_image, lang = 'eng'):
|
19 |
+
image_text = pytesseract.image_to_string(input_image, lang = language_dict.get(lang, 'eng'))
|
20 |
+
|
21 |
+
translated = GoogleTranslator(source='auto', target='en').translate(text)
|
22 |
+
return image_text, translated
|