DebasishDhal99 commited on
Commit
087b372
·
1 Parent(s): dcd3020

Add target lang option to audio translation

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. backend/audio_to_tgt.py +13 -4
app.py CHANGED
@@ -74,7 +74,7 @@ audio_interface = gr.Interface(
74
  title=heading_audio,
75
  description=description_audio,
76
  examples=[
77
- ["examples/audios/russian_sample_audio.mp3", "turbo"]
78
  ]
79
  )
80
  combined_interface = gr.TabbedInterface(
 
74
  title=heading_audio,
75
  description=description_audio,
76
  examples=[
77
+ ["examples/audios/russian_sample_audio.mp3", "turbo", "English"]
78
  ]
79
  )
80
  combined_interface = gr.TabbedInterface(
backend/audio_to_tgt.py CHANGED
@@ -2,7 +2,14 @@ import whisper
2
  import numpy as np
3
  from pydub import AudioSegment
4
  import langcodes
5
- from deep_translator import GoogleTranslator
 
 
 
 
 
 
 
6
 
7
  def audio_to_numpy(audio_file_input):
8
  audio = AudioSegment.from_file(audio_file_input)
@@ -11,14 +18,16 @@ def audio_to_numpy(audio_file_input):
11
 
12
  return samples / np.iinfo(audio.array_type).max
13
 
14
- def src_audio_to_eng_translator(audio_file_input, model_size = "turbo"):
15
  audio_data = audio_to_numpy(audio_file_input)
16
 
17
  model = whisper.load_model(model_size)
18
  result = model.transcribe(audio_data)
19
  input_text = result["text"]
20
  language_code = result["language"]
 
 
21
  language_name = langcodes.get(language_code).language_name()
22
- translated_text = GoogleTranslator(source='auto', target='en').translate(input_text)
23
- return input_text, translated_text, language_name
24
  # return result['text']
 
2
  import numpy as np
3
  from pydub import AudioSegment
4
  import langcodes
5
+ from deep_translator import GoogleTranslator, detection
6
+ import os
7
+
8
+ available_languages = GoogleTranslator().get_supported_languages(as_dict=True)
9
+ formatted_languages = {key.title(): value for key, value in available_languages.items()}
10
+ formatted_codes = {value: key.title() for key, value in available_languages.items()}
11
+
12
+ lang_detect_key = os.getenv("detect_language_api_key")
13
 
14
  def audio_to_numpy(audio_file_input):
15
  audio = AudioSegment.from_file(audio_file_input)
 
18
 
19
  return samples / np.iinfo(audio.array_type).max
20
 
21
+ def src_audio_to_eng_translator(audio_file_input, model_size = "turbo", target_lang = "English"):
22
  audio_data = audio_to_numpy(audio_file_input)
23
 
24
  model = whisper.load_model(model_size)
25
  result = model.transcribe(audio_data)
26
  input_text = result["text"]
27
  language_code = result["language"]
28
+ src_lang_code = detection.single_detection(input_text, api_key = lang_detect_key)
29
+ src_lang = formatted_codes.get(src_lang_code, 'Source language not detected')
30
  language_name = langcodes.get(language_code).language_name()
31
+ translated_text = GoogleTranslator(source='auto', target=src_lang_code).translate(input_text)
32
+ return input_text, translated_text, src_lang
33
  # return result['text']