Spaces:

MrOvkill
/

MiniChat-3B

Sleeping

Samuel L Meyers commited on Nov 17, 2023

Commit

4e554fb

1 Parent(s): fbfb629

Fixed transcription

Files changed (1) hide show

app.py CHANGED Viewed

@@ -108,22 +108,11 @@ def main():
         with gr.Row(variant="panel"):
             audio = gr.Audio()
         with gr.Row(variant="panel"):
-            mf_transcribe = gr.Interface(
-                transcribe_stt,
-                gr.Audio(source="microphone", type="filepath"),
-                id=10107,
-                outputs="text",
-                title="Transcribe",
-                description=(
-                    "Transcribe audio using Whisper v3 Large. "
-                ),
-            )
-        def transcribe(audio):
-            if audio is None:
-                raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
-            text = stt_pipe(audio, generate_kwargs={"language": "english", "task": "transcribe"})["text"]
-            return text
         def synthesize_audio(text_str: str, model_name_str: str, speaker_str: str):
             """
@@ -151,6 +140,7 @@ def main():
             return gr.Audio.update(value=(tts.get_sampling_rate(), samples))
         generate.click(synthesize_audio, inputs=[text, model_name, speaker], outputs=audio, api_name="synthesize")
     demo.queue(concurrency_count=1).launch()

         with gr.Row(variant="panel"):
             audio = gr.Audio()
         with gr.Row(variant="panel"):
+            with gr.Column(variant="panel"):
+                stt_input_mic = gr.Audio(source="microphone", type="filepath", label="Record")
+                stt_input_file = gr.Audio(source="upload", type="filepath", label="Upload")
+            with gr.Column(variant="panel"):
+                stt_transcribe_btn = gr.Button("Transcribe")
         def synthesize_audio(text_str: str, model_name_str: str, speaker_str: str):
             """
             return gr.Audio.update(value=(tts.get_sampling_rate(), samples))
         generate.click(synthesize_audio, inputs=[text, model_name, speaker], outputs=audio, api_name="synthesize")
+        stt_transcribe_btn.click(transcribe_stt, inputs=stt_input_file, outputs=text, api_name="transcribe")
     demo.queue(concurrency_count=1).launch()