Spaces:

esnagy
/

hungarian_speech_transcriber

Runtime error

Edward Nagy commited on Dec 2, 2023

Commit

f9bd2ea

unverified ·

1 Parent(s): 05d486c

Update transcribe function to handle input data

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,13 +7,16 @@ import os
 # pipe = pipeline(model="esnagy/whisper-small-hu")
 def transcribe_audio(audio_file):
-    text = "Test text"
     # text = pipe(audio_file)["text"]
     os.remove(audio_file)  # Remove temporary audio file
     return text
-def transcribe(video_url, audio=None):
-    if video_url:
         # Download the video from the URL
         video_filename = "temp_video.mp4"
         with open(video_filename, 'wb') as f:
@@ -35,24 +38,15 @@ def transcribe(video_url, audio=None):
         return text
-    elif audio:
-        return transcribe_audio(audio)
-iface_video = gr.Interface(
-    fn=transcribe,
-    inputs=gr.Textbox(label="Enter video URL", placeholder="Or leave empty to use microphone"),
-    outputs="text",
-    title="Whisper Small Hungarian - Video",
-    description="Realtime demo for Hungarian speech recognition using a fine-tuned Whisper small model. Enter a video URL to transcribe its audio."
-)
-iface_audio = gr.Interface(
     fn=transcribe,
-    inputs=gr.Audio(sources=["microphone"], type="filepath", label="Or record your voice"),
-    outputs="text",
-    title="Whisper Small Hungarian - Microphone",
-    description="Realtime demo for Hungarian speech recognition using a fine-tuned Whisper small model. Record your voice to transcribe."
 )
-iface_video.launch(share=True)
-iface_audio.launch(share=True)

 # pipe = pipeline(model="esnagy/whisper-small-hu")
 def transcribe_audio(audio_file):
+    text = "Text text"
     # text = pipe(audio_file)["text"]
     os.remove(audio_file)  # Remove temporary audio file
     return text
+def transcribe(input_data):
+    if input_data["audio"]:
+        return transcribe_audio(input_data["audio"].name)
+    elif input_data["video_url"]:
+        video_url = input_data["video_url"]
         # Download the video from the URL
         video_filename = "temp_video.mp4"
         with open(video_filename, 'wb') as f:
         return text
+iface = gr.Interface(
     fn=transcribe,
+    inputs=[
+        gr.Input("text", label="Enter video URL", name="video_url", placeholder="Or leave empty to use microphone"),
+        gr.Input("audio", label="Or record your voice", name="audio", source="microphone")
+    ],
+    outputs=gr.Output("text"),
+    title="Whisper Small Hungarian",
+    description="Realtime demo for Hungarian speech recognition using a fine-tuned Whisper small model. Enter a video URL or record your voice to transcribe."
 )
+iface.launch()