DebasishDhal99 commited on
Commit
04cc391
·
1 Parent(s): 3becfb1

Add basic image OCR in indian and western languages

Browse files
Files changed (1) hide show
  1. backend/image_to_tgt.py +21 -3
backend/image_to_tgt.py CHANGED
@@ -1,4 +1,22 @@
1
- from PIL import Image
2
 
3
- def src_image_to_eng_translator(image_input_file):
4
- return "Random"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from PIL import Image
2
 
3
+ # def src_image_to_eng_translator(image_input_file):
4
+ # return "Random"
5
+ from deep_translator import GoogleTranslator
6
+ import pytesseract
7
+ from PIL import image
8
+
9
+ language_dict = {'English': 'eng', 'French': 'fra', 'Odia': 'ori', 'Hindi': 'hin',
10
+ 'Bengali': 'ben', 'Telugu': 'tel', 'Hindi': 'hin', 'Malayalam': 'mal',
11
+ 'Kannada': 'kan', 'Tamil': 'tam', 'Marathi': 'mar', 'Gujarati': 'guj',
12
+ 'Punjabi': 'pan', 'Sinhalese': 'sin',
13
+ 'Arabic': 'ara', 'German': 'deu', 'Spanish': 'spa', 'Italian': 'ita',
14
+ 'Russian': 'rus', 'Japanese': 'jpn', 'Korean': 'kor', 'Hebrew': 'heb',
15
+ 'Persian': 'fas', 'Chinese Simplified': 'chi_sim', 'Chinese Traditional': 'chi_tra',
16
+ }
17
+
18
+ def src_image_to_eng_translator(input_image, lang = 'eng'):
19
+ image_text = pytesseract.image_to_string(input_image, lang = language_dict.get(lang, 'eng'))
20
+
21
+ translated = GoogleTranslator(source='auto', target='en').translate(text)
22
+ return image_text, translated