Spaces:
Runtime error
Runtime error
Commit
·
54fe872
1
Parent(s):
c38911e
feat: fixing audio sample bugs
Browse files
app.py
CHANGED
@@ -6,28 +6,29 @@ import torch
|
|
6 |
from transformers import WhisperForConditionalGeneration, WhisperProcessor
|
7 |
|
8 |
hf_token = os.getenv("hf_token")
|
9 |
-
|
10 |
if hf_token is None:
|
11 |
raise ValueError(
|
12 |
"Hugging Face token not found. Please set the 'hf_token' environment variable."
|
13 |
)
|
14 |
|
15 |
processor = WhisperProcessor.from_pretrained(
|
16 |
-
"openai/whisper-small",
|
17 |
-
language="Indonesian",
|
18 |
-
task="transcribe",
|
19 |
-
token=hf_token,
|
20 |
)
|
21 |
model = WhisperForConditionalGeneration.from_pretrained(
|
22 |
"avalonai/whisper-small-jv", token=hf_token
|
23 |
)
|
24 |
|
25 |
|
26 |
-
def transcribe(
|
27 |
-
if
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
29 |
try:
|
30 |
-
audio, sampling_rate = librosa.load(
|
31 |
except Exception as e:
|
32 |
return f"Failed to load audio: {str(e)}"
|
33 |
|
@@ -44,18 +45,19 @@ def transcribe(audio):
|
|
44 |
audio_samples = [
|
45 |
os.path.join("audio_sample", f)
|
46 |
for f in os.listdir("audio_sample")
|
47 |
-
if f.endswith((".wav", ".mp3"))
|
48 |
]
|
|
|
|
|
|
|
|
|
49 |
audio_input = gr.Audio(
|
50 |
-
sources="microphone",
|
51 |
-
type="filepath",
|
52 |
-
label="Upload Audio or Select a Sample",
|
53 |
-
choices=audio_samples,
|
54 |
)
|
55 |
|
56 |
iface = gr.Interface(
|
57 |
fn=transcribe,
|
58 |
-
inputs=audio_input,
|
59 |
outputs="text",
|
60 |
title="Speech-to-text on Javanese Language Demo",
|
61 |
description="Ini adalah platform untuk pengujian model speech-to-text pada bahasa Jawa oleh Avalon AI. Silahkan coba dengan mengucapkan kalimat atau memilih salah satu sample audio.",
|
|
|
6 |
from transformers import WhisperForConditionalGeneration, WhisperProcessor
|
7 |
|
8 |
hf_token = os.getenv("hf_token")
|
|
|
9 |
if hf_token is None:
|
10 |
raise ValueError(
|
11 |
"Hugging Face token not found. Please set the 'hf_token' environment variable."
|
12 |
)
|
13 |
|
14 |
processor = WhisperProcessor.from_pretrained(
|
15 |
+
"openai/whisper-small", language="Indonesian", task="transcribe", token=hf_token
|
|
|
|
|
|
|
16 |
)
|
17 |
model = WhisperForConditionalGeneration.from_pretrained(
|
18 |
"avalonai/whisper-small-jv", token=hf_token
|
19 |
)
|
20 |
|
21 |
|
22 |
+
def transcribe(audio_choice, audio_file):
|
23 |
+
if audio_file is not None:
|
24 |
+
audio_path = audio_file
|
25 |
+
elif audio_choice is not None:
|
26 |
+
audio_path = audio_choice
|
27 |
+
else:
|
28 |
+
return "No audio file provided. Please upload an audio file or select a sample."
|
29 |
+
|
30 |
try:
|
31 |
+
audio, sampling_rate = librosa.load(audio_path, sr=16000)
|
32 |
except Exception as e:
|
33 |
return f"Failed to load audio: {str(e)}"
|
34 |
|
|
|
45 |
audio_samples = [
|
46 |
os.path.join("audio_sample", f)
|
47 |
for f in os.listdir("audio_sample")
|
48 |
+
if f.endswith((".wav", ".mp3", ".m4a"))
|
49 |
]
|
50 |
+
|
51 |
+
audio_choice = gr.Dropdown(
|
52 |
+
label="Select a Sample Audio", choices=audio_samples, default=None
|
53 |
+
)
|
54 |
audio_input = gr.Audio(
|
55 |
+
sources="microphone", type="filepath", label="Upload Audio or Use Microphone"
|
|
|
|
|
|
|
56 |
)
|
57 |
|
58 |
iface = gr.Interface(
|
59 |
fn=transcribe,
|
60 |
+
inputs=[audio_choice, audio_input],
|
61 |
outputs="text",
|
62 |
title="Speech-to-text on Javanese Language Demo",
|
63 |
description="Ini adalah platform untuk pengujian model speech-to-text pada bahasa Jawa oleh Avalon AI. Silahkan coba dengan mengucapkan kalimat atau memilih salah satu sample audio.",
|