Spaces:

esnagy
/

hungarian_speech_transcriber

Runtime error

App Files Files Community

Edward Nagy commited on Dec 2, 2023

Commit

32ebaf4

unverified ·

1 Parent(s): 623ac6b

Add microphone functionality as well

Browse files

Files changed (1) hide show

app.py +40 -23

app.py CHANGED Viewed

@@ -6,36 +6,53 @@ import os
 # pipe = pipeline(model="esnagy/whisper-small-hu")
-def transcribe(video_url):
-    # Download the video from the URL
-    video_filename = "temp_video.mp4"
-    with open(video_filename, 'wb') as f:
-        response = requests.get(video_url)
-        f.write(response.content)
-    # Load the video using moviepy
-    video = VideoFileClip(video_filename)
-    audio = video.audio
-    audio_file = "temp_audio.wav"
-    audio.write_audiofile(audio_file, codec='pcm_s16le')
-    # Transcribe the audio
-    text = "Test text"
-    # text = pipe(audio_file)["text"]
-    # Remove temporary files
-    os.remove(video_filename)
-    os.remove(audio_file)
-    return text
-iface = gr.Interface(
     fn=transcribe,
-    inputs=gr.Textbox(label="Enter video URL"),
     outputs="text",
-    title="Whisper Small Hungarian",
-    description="Realtime demo for Hungarian speech recognition using a fine-tuned Whisper small model. Enter a video URL to transcribe its audio.",
 )
-iface.launch()

 # pipe = pipeline(model="esnagy/whisper-small-hu")
+def transcribe_audio(audio_file):
+    text = "Test text"
+    # text = pipe(audio_file)["text"]
+    os.remove(audio_file)  # Remove temporary audio file
+    return text
+def transcribe(video_url, audio=None):
+    if video_url:
+        # Download the video from the URL
+        video_filename = "temp_video.mp4"
+        with open(video_filename, 'wb') as f:
+            response = requests.get(video_url)
+            f.write(response.content)
+        # Load the video using moviepy
+        video = VideoFileClip(video_filename)
+        audio = video.audio
+        audio_file = "temp_audio.wav"
+        audio.write_audiofile(audio_file, codec='pcm_s16le')
+        text = transcribe_audio(audio_file)
+        # Remove temporary files
+        os.remove(video_filename)
+        os.remove(audio_file)
+        return text
+    elif audio:
+        return transcribe_audio(audio)
+iface_video = gr.Interface(
+    fn=transcribe,
+    inputs=gr.Textbox(label="Enter video URL", placeholder="Or leave empty to use microphone"),
+    outputs="text",
+    title="Whisper Small Hungarian - Video",
+    description="Realtime demo for Hungarian speech recognition using a fine-tuned Whisper small model. Enter a video URL to transcribe its audio."
+)
+iface_audio = gr.Interface(
     fn=transcribe,
+    inputs=gr.Audio(sources=["microphone"], type="file", label="Or record your voice"),
     outputs="text",
+    title="Whisper Small Hungarian - Microphone",
+    description="Realtime demo for Hungarian speech recognition using a fine-tuned Whisper small model. Record your voice to transcribe."
 )
+iface_video.launch(share=True)
+iface_audio.launch(share=True)