Spaces:

MrOvkill
/

MiniChat-3B

Sleeping

App Files Files Community

Samuel L Meyers commited on Nov 17, 2023

Commit

dc422ae

1 Parent(s): 0b9f399

Add whisper transcription

Browse files

Files changed (1) hide show

app.py +27 -0

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ import glob
 import logging
 from typing import cast
 from threading import Lock
 import gradio as gr
 from balacoon_tts import TTS
@@ -31,6 +32,16 @@ for name in list_repo_files(repo_id="balacoon/tts"):
             local_dir=model_repo_dir,
         )
 def main():
     logging.basicConfig(level=logging.INFO)
@@ -96,6 +107,22 @@ def main():
             generate = gr.Button("Generate")
         with gr.Row(variant="panel"):
             audio = gr.Audio()
         def synthesize_audio(text_str: str, model_name_str: str, speaker_str: str):
             """

 import logging
 from typing import cast
 from threading import Lock
+from transformers import pipeline
 import gradio as gr
 from balacoon_tts import TTS
             local_dir=model_repo_dir,
         )
+stt_pipe = pipeline(
+    task="automatic-speech-recognition",
+    model="openai/whisper-large-v3",
+)
+def transcribe(audio):
+    if audio is None:
+        raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
+    text = stt_pipe(audio, generate_kwargs={"language": "english", "task": "transcribe"})["text"]
+    return text
 def main():
     logging.basicConfig(level=logging.INFO)
             generate = gr.Button("Generate")
         with gr.Row(variant="panel"):
             audio = gr.Audio()
+        with gr.Row(variant="panel"):
+            mf_transcribe = gr.Interface(
+                transcribe,
+                gr.Audio(source="microphone", type="filepath"),
+                outputs="text",
+                title="Transcribe",
+                description=(
+                    "Transcribe audio using Whisper v3 Large. "
+                ),
+            )
+        def transcribe(audio):
+            if audio is None:
+                raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
+            text = stt_pipe(audio, generate_kwargs={"language": "english", "task": "transcribe"})["text"]
+            return text
         def synthesize_audio(text_str: str, model_name_str: str, speaker_str: str):
             """