DebasishDhal99 commited on
Commit
7dbc3ec
·
1 Parent(s): 0fb7c05

Add target lang option for image

Browse files
Files changed (2) hide show
  1. app.py +8 -4
  2. backend/image_to_tgt.py +9 -4
app.py CHANGED
@@ -40,13 +40,17 @@ image_interface = gr.Interface(
40
  inputs=[
41
  gr.Image(label="Upload an Image", type="filepath"),
42
  gr.Dropdown(choices=sorted_languages,
43
- label="Select Language",
44
- # default='English'
45
- )
 
 
 
 
46
  ],
47
  outputs=[
48
  gr.Textbox(label="Image Text"),
49
- gr.Textbox(label="Translated Text")
50
  ],
51
  title="Image Text Extractor and Translator",
52
  description=description_image,
 
40
  inputs=[
41
  gr.Image(label="Upload an Image", type="filepath"),
42
  gr.Dropdown(choices=sorted_languages,
43
+ label="Select Image Text Language",
44
+ ),
45
+ gr.Dropdown(
46
+ choices=language_list,
47
+ label="Select Target Language",
48
+ interactive=True
49
+ )
50
  ],
51
  outputs=[
52
  gr.Textbox(label="Image Text"),
53
+ gr.Textbox(label="Translated Text"),
54
  ],
55
  title="Image Text Extractor and Translator",
56
  description=description_image,
backend/image_to_tgt.py CHANGED
@@ -5,7 +5,11 @@
5
  from deep_translator import GoogleTranslator
6
  import pytesseract
7
 
8
- language_dict = {'English': 'eng', 'French': 'fra', 'Odia': 'ori', 'Hindi': 'hin',
 
 
 
 
9
  'Bengali': 'ben', 'Telugu': 'tel', 'Hindi': 'hin', 'Malayalam': 'mal',
10
  'Kannada': 'kan', 'Tamil': 'tam', 'Marathi': 'mar', 'Gujarati': 'guj',
11
  'Punjabi': 'pan', 'Sinhalese': 'sin',
@@ -14,8 +18,9 @@ language_dict = {'English': 'eng', 'French': 'fra', 'Odia': 'ori', 'Hindi': 'hin
14
  'Persian': 'fas', 'Chinese Simplified': 'chi_sim', 'Chinese Traditional': 'chi_tra',
15
  }
16
 
17
- def src_image_to_eng_translator(input_image, lang = 'eng'):
18
- image_text = pytesseract.image_to_string(input_image, lang = language_dict.get(lang, 'eng'))
19
 
20
- translated = GoogleTranslator(source='auto', target='en').translate(image_text)
 
21
  return image_text, translated
 
5
  from deep_translator import GoogleTranslator
6
  import pytesseract
7
 
8
+ available_languages = GoogleTranslator().get_supported_languages(as_dict=True)
9
+ formatted_languages = {key.title(): value for key, value in available_languages.items()}
10
+ formatted_codes = {value: key.title() for key, value in available_languages.items()}
11
+
12
+ pytesseract_language_dict = {'English': 'eng', 'French': 'fra', 'Odia': 'ori', 'Hindi': 'hin',
13
  'Bengali': 'ben', 'Telugu': 'tel', 'Hindi': 'hin', 'Malayalam': 'mal',
14
  'Kannada': 'kan', 'Tamil': 'tam', 'Marathi': 'mar', 'Gujarati': 'guj',
15
  'Punjabi': 'pan', 'Sinhalese': 'sin',
 
18
  'Persian': 'fas', 'Chinese Simplified': 'chi_sim', 'Chinese Traditional': 'chi_tra',
19
  }
20
 
21
+ def src_image_to_eng_translator(input_image, image_lang = 'eng', target_lang = 'English'):
22
+ image_text = pytesseract.image_to_string(input_image, lang = pytesseract_language_dict.get(image_lang, 'eng'))
23
 
24
+ target_lang_code = formatted_languages.get(target_lang, 'en')
25
+ translated = GoogleTranslator(source='auto', target=target_lang_code).translate(image_text)
26
  return image_text, translated