paulbauriegel commited on
Commit
3c78a64
·
1 Parent(s): bcd9622

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -11
app.py CHANGED
@@ -6,8 +6,8 @@ import psutil
6
  import time
7
  import whisperx
8
 
9
- #model = WhisperModel('large-v2', device="cuda", compute_type="float16")
10
- model = whisper.load_model('large-v2')
11
 
12
  def speech_to_text(mic=None, file=None, lang=None, task='transcribe'):
13
  if mic is not None:
@@ -19,13 +19,13 @@ def speech_to_text(mic=None, file=None, lang=None, task='transcribe'):
19
  print(lang, task)
20
 
21
  time_start = time.time()
22
- #segments, info = model.transcribe(audio, task=task, language=lang, beam_size=5)
23
- results = model.transcribe(audio, task=task, language=lang, beam_size=5)
24
  #print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
25
 
26
  # Decode audio to Text
27
- #objects = [s._asdict() for s in segments]
28
- objects = results["segments"]
29
  print(objects)
30
  time_end = time.time()
31
  time_diff = time_end - time_start
@@ -67,17 +67,16 @@ with gr.Blocks(title='Whisper Demo', theme=theme) as demo:
67
  ''')
68
  audio_in = gr.Audio(label="Record", source='microphone', type="filepath")
69
  file_in = gr.Audio(label="Upload", source='upload', type="filepath")
70
- drop_down = gr.Dropdown(["de", "en", "es", "fr", "ru", None], value=None)
71
  transcribe_btn = gr.Button("Transcribe audio", variant="primary")
72
- translate_btn = gr.Button("Translate audio")
73
  trans_df = gr.DataFrame(label="Transcription dataframe", row_count=(0, "dynamic"), max_rows = 10, wrap=True, overflow_row_behaviour='paginate')
74
  sys_info = gr.Markdown("")
75
- transcribe_btn.click(lambda x, y: speech_to_text(x, y, None, 'transcribe'),
76
  [audio_in, file_in],
77
  [trans_df, sys_info]
78
  )
79
- translate_btn.click(lambda x, y, z: speech_to_text(x, y, z, 'translate'),
80
- [audio_in, file_in, drop_down],
81
  [trans_df, sys_info])
82
 
83
  demo.launch()
 
6
  import time
7
  import whisperx
8
 
9
+ model = WhisperModel('large-v2', device="cuda", compute_type="float16")
10
+ #model = whisper.load_model('large-v2')
11
 
12
  def speech_to_text(mic=None, file=None, lang=None, task='transcribe'):
13
  if mic is not None:
 
19
  print(lang, task)
20
 
21
  time_start = time.time()
22
+ segments, info = model.transcribe(audio, task=task, language=lang, beam_size=5)
23
+ #results = model.transcribe(audio, task=task, language=lang, beam_size=5)
24
  #print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
25
 
26
  # Decode audio to Text
27
+ objects = [s._asdict() for s in segments]
28
+ #objects = results["segments"]
29
  print(objects)
30
  time_end = time.time()
31
  time_diff = time_end - time_start
 
67
  ''')
68
  audio_in = gr.Audio(label="Record", source='microphone', type="filepath")
69
  file_in = gr.Audio(label="Upload", source='upload', type="filepath")
 
70
  transcribe_btn = gr.Button("Transcribe audio", variant="primary")
71
+ translate_btn = gr.Button("Translate to English")
72
  trans_df = gr.DataFrame(label="Transcription dataframe", row_count=(0, "dynamic"), max_rows = 10, wrap=True, overflow_row_behaviour='paginate')
73
  sys_info = gr.Markdown("")
74
+ transcribe_btn.click(lambda x, y: speech_to_text(x, y, task='transcribe'),
75
  [audio_in, file_in],
76
  [trans_df, sys_info]
77
  )
78
+ translate_btn.click(lambda x, y, z: speech_to_text(x, y, task='translate'),
79
+ [audio_in, file_in],
80
  [trans_df, sys_info])
81
 
82
  demo.launch()