DebasishDhal99 commited on
Commit
29b5120
·
1 Parent(s): 1d62a43

Simplifying dependencies in audio part

Browse files
Files changed (2) hide show
  1. app.py +13 -5
  2. backend/audio_to_tgt.py +5 -7
app.py CHANGED
@@ -62,12 +62,20 @@ description_audio = "Upload an audio file to extract text and translate it to En
62
 
63
  audio_interface = gr.Interface(
64
  fn=src_audio_to_eng_translator,
65
- inputs=[gr.Audio(label="Upload an Audio file", type="filepath"),
 
 
 
 
 
 
 
 
66
  gr.Dropdown(
67
- choices=["turbo", "base", "tiny", "small", "medium", "large"],
68
- label="Select Whisper Model size",
69
- ),
70
- gr.Dropdown(choices=language_list, label="Select Target Language", interactive=True)
71
  ],
72
  outputs=[gr.Textbox(label="Original text"),
73
  gr.Textbox(label="Translated text"),
 
62
 
63
  audio_interface = gr.Interface(
64
  fn=src_audio_to_eng_translator,
65
+ inputs=[
66
+ gr.Audio(
67
+ label="Upload an Audio file",
68
+ type="filepath"
69
+ ),
70
+ gr.Dropdown(
71
+ choices=["turbo", "base", "tiny", "small", "medium", "large"],
72
+ label="Select Whisper Model size",
73
+ ),
74
  gr.Dropdown(
75
+ choices=language_list,
76
+ label="Select Target Language",
77
+ interactive=True
78
+ )
79
  ],
80
  outputs=[gr.Textbox(label="Original text"),
81
  gr.Textbox(label="Translated text"),
backend/audio_to_tgt.py CHANGED
@@ -1,7 +1,6 @@
1
  import whisper
2
  import numpy as np
3
  from pydub import AudioSegment
4
- import langcodes
5
  from deep_translator import GoogleTranslator, detection
6
  import os
7
 
@@ -24,10 +23,9 @@ def src_audio_to_eng_translator(audio_file_input, model_size = "turbo", target_l
24
  model = whisper.load_model(model_size)
25
  result = model.transcribe(audio_data)
26
  input_text = result["text"]
27
- language_code = result["language"]
28
  src_lang_code = detection.single_detection(input_text, api_key = lang_detect_key)
29
- src_lang = formatted_codes.get(src_lang_code, 'Source language not detected')
30
- language_name = langcodes.get(language_code).language_name()
31
- translated_text = GoogleTranslator(source='auto', target=src_lang_code).translate(input_text)
32
- return input_text, translated_text, src_lang
33
- # return result['text']
 
1
  import whisper
2
  import numpy as np
3
  from pydub import AudioSegment
 
4
  from deep_translator import GoogleTranslator, detection
5
  import os
6
 
 
23
  model = whisper.load_model(model_size)
24
  result = model.transcribe(audio_data)
25
  input_text = result["text"]
26
+
27
  src_lang_code = detection.single_detection(input_text, api_key = lang_detect_key)
28
+ src_lang = formatted_languages.get(src_lang_code, 'Source language not detected')
29
+ target_lang_code = formatted_languages.get(target_lang, 'en')
30
+ translated_text = GoogleTranslator(source='auto', target=target_lang_code).translate(input_text)
31
+ return input_text, translated_text, src_lang