Samuel L Meyers commited on
Commit
dc422ae
·
1 Parent(s): 0b9f399

Add whisper transcription

Browse files
Files changed (1) hide show
  1. app.py +27 -0
app.py CHANGED
@@ -9,6 +9,7 @@ import glob
9
  import logging
10
  from typing import cast
11
  from threading import Lock
 
12
 
13
  import gradio as gr
14
  from balacoon_tts import TTS
@@ -31,6 +32,16 @@ for name in list_repo_files(repo_id="balacoon/tts"):
31
  local_dir=model_repo_dir,
32
  )
33
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  def main():
36
  logging.basicConfig(level=logging.INFO)
@@ -96,6 +107,22 @@ def main():
96
  generate = gr.Button("Generate")
97
  with gr.Row(variant="panel"):
98
  audio = gr.Audio()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
  def synthesize_audio(text_str: str, model_name_str: str, speaker_str: str):
101
  """
 
9
  import logging
10
  from typing import cast
11
  from threading import Lock
12
+ from transformers import pipeline
13
 
14
  import gradio as gr
15
  from balacoon_tts import TTS
 
32
  local_dir=model_repo_dir,
33
  )
34
 
35
+ stt_pipe = pipeline(
36
+ task="automatic-speech-recognition",
37
+ model="openai/whisper-large-v3",
38
+ )
39
+
40
+ def transcribe(audio):
41
+ if audio is None:
42
+ raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
43
+ text = stt_pipe(audio, generate_kwargs={"language": "english", "task": "transcribe"})["text"]
44
+ return text
45
 
46
  def main():
47
  logging.basicConfig(level=logging.INFO)
 
107
  generate = gr.Button("Generate")
108
  with gr.Row(variant="panel"):
109
  audio = gr.Audio()
110
+ with gr.Row(variant="panel"):
111
+ mf_transcribe = gr.Interface(
112
+ transcribe,
113
+ gr.Audio(source="microphone", type="filepath"),
114
+ outputs="text",
115
+ title="Transcribe",
116
+ description=(
117
+ "Transcribe audio using Whisper v3 Large. "
118
+ ),
119
+ )
120
+
121
+ def transcribe(audio):
122
+ if audio is None:
123
+ raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
124
+ text = stt_pipe(audio, generate_kwargs={"language": "english", "task": "transcribe"})["text"]
125
+ return text
126
 
127
  def synthesize_audio(text_str: str, model_name_str: str, speaker_str: str):
128
  """