Spaces:
Runtime error
Runtime error
Edward Nagy
commited on
Fix audio file codec and add input placeholders
Browse files
app.py
CHANGED
@@ -6,12 +6,14 @@ import os
|
|
6 |
|
7 |
# pipe = pipeline(model="esnagy/whisper-small-hu")
|
8 |
|
|
|
9 |
def transcribe_audio(audio_file):
|
10 |
text = "Test text"
|
11 |
# text = pipe(audio_file)["text"]
|
12 |
os.remove(audio_file) # Remove temporary audio file
|
13 |
return text
|
14 |
|
|
|
15 |
def transcribe(input_data):
|
16 |
if input_data["audio"]:
|
17 |
return transcribe_audio(input_data["audio"].name)
|
@@ -19,7 +21,7 @@ def transcribe(input_data):
|
|
19 |
video_url = input_data["video_url"]
|
20 |
# Download the video from the URL
|
21 |
video_filename = "temp_video.mp4"
|
22 |
-
with open(video_filename,
|
23 |
response = requests.get(video_url)
|
24 |
f.write(response.content)
|
25 |
|
@@ -28,7 +30,7 @@ def transcribe(input_data):
|
|
28 |
audio = video.audio
|
29 |
|
30 |
audio_file = "temp_audio.wav"
|
31 |
-
audio.write_audiofile(audio_file, codec=
|
32 |
|
33 |
text = transcribe_audio(audio_file)
|
34 |
|
@@ -38,15 +40,18 @@ def transcribe(input_data):
|
|
38 |
|
39 |
return text
|
40 |
|
41 |
-
video_url_input = gr.inputs.Textbox(label="Enter video URL", placeholder="Or leave empty to use microphone")
|
42 |
-
audio_input = gr.inputs.Audio(label="Or record your voice", source="microphone")
|
43 |
|
44 |
iface = gr.Interface(
|
45 |
fn=transcribe,
|
46 |
-
inputs=[
|
|
|
|
|
|
|
|
|
|
|
47 |
outputs=gr.outputs.Textbox(),
|
48 |
title="Whisper Small Hungarian",
|
49 |
-
description="Realtime demo for Hungarian speech recognition using a fine-tuned Whisper small model. Enter a video URL or record your voice to transcribe."
|
50 |
)
|
51 |
|
52 |
iface.launch()
|
|
|
6 |
|
7 |
# pipe = pipeline(model="esnagy/whisper-small-hu")
|
8 |
|
9 |
+
|
10 |
def transcribe_audio(audio_file):
|
11 |
text = "Test text"
|
12 |
# text = pipe(audio_file)["text"]
|
13 |
os.remove(audio_file) # Remove temporary audio file
|
14 |
return text
|
15 |
|
16 |
+
|
17 |
def transcribe(input_data):
|
18 |
if input_data["audio"]:
|
19 |
return transcribe_audio(input_data["audio"].name)
|
|
|
21 |
video_url = input_data["video_url"]
|
22 |
# Download the video from the URL
|
23 |
video_filename = "temp_video.mp4"
|
24 |
+
with open(video_filename, "wb") as f:
|
25 |
response = requests.get(video_url)
|
26 |
f.write(response.content)
|
27 |
|
|
|
30 |
audio = video.audio
|
31 |
|
32 |
audio_file = "temp_audio.wav"
|
33 |
+
audio.write_audiofile(audio_file, codec="pcm_s16le")
|
34 |
|
35 |
text = transcribe_audio(audio_file)
|
36 |
|
|
|
40 |
|
41 |
return text
|
42 |
|
|
|
|
|
43 |
|
44 |
iface = gr.Interface(
|
45 |
fn=transcribe,
|
46 |
+
inputs=[
|
47 |
+
gr.Textbox(
|
48 |
+
label="Enter video URL", placeholder="Or leave empty to use microphone"
|
49 |
+
),
|
50 |
+
gr.Audio(sources=["microphone"], type="filepath"),
|
51 |
+
],
|
52 |
outputs=gr.outputs.Textbox(),
|
53 |
title="Whisper Small Hungarian",
|
54 |
+
description="Realtime demo for Hungarian speech recognition using a fine-tuned Whisper small model. Enter a video URL or record your voice to transcribe.",
|
55 |
)
|
56 |
|
57 |
iface.launch()
|