Spaces:

avalonai
/

Whisper-Javanese-Small

Runtime error

App Files Files Community

ranggaaldosas commited on May 9, 2024

Commit

54fe872

1 Parent(s): c38911e

feat: fixing audio sample bugs

Browse files

Files changed (1) hide show

app.py +17 -15

app.py CHANGED Viewed

@@ -6,28 +6,29 @@ import torch
 from transformers import WhisperForConditionalGeneration, WhisperProcessor
 hf_token = os.getenv("hf_token")
 if hf_token is None:
     raise ValueError(
         "Hugging Face token not found. Please set the 'hf_token' environment variable."
     )
 processor = WhisperProcessor.from_pretrained(
-    "openai/whisper-small",
-    language="Indonesian",
-    task="transcribe",
-    token=hf_token,
 )
 model = WhisperForConditionalGeneration.from_pretrained(
     "avalonai/whisper-small-jv", token=hf_token
 )
-def transcribe(audio):
-    if audio is None:
-        return "No audio file provided. Please upload an audio file."
     try:
-        audio, sampling_rate = librosa.load(audio, sr=16000)
     except Exception as e:
         return f"Failed to load audio: {str(e)}"
@@ -44,18 +45,19 @@ def transcribe(audio):
 audio_samples = [
     os.path.join("audio_sample", f)
     for f in os.listdir("audio_sample")
-    if f.endswith((".wav", ".mp3"))
 ]
 audio_input = gr.Audio(
-    sources="microphone",
-    type="filepath",
-    label="Upload Audio or Select a Sample",
-    choices=audio_samples,
 )
 iface = gr.Interface(
     fn=transcribe,
-    inputs=audio_input,
     outputs="text",
     title="Speech-to-text on Javanese Language Demo",
     description="Ini adalah platform untuk pengujian model speech-to-text pada bahasa Jawa oleh Avalon AI. Silahkan coba dengan mengucapkan kalimat atau memilih salah satu sample audio.",

 from transformers import WhisperForConditionalGeneration, WhisperProcessor
 hf_token = os.getenv("hf_token")
 if hf_token is None:
     raise ValueError(
         "Hugging Face token not found. Please set the 'hf_token' environment variable."
     )
 processor = WhisperProcessor.from_pretrained(
+    "openai/whisper-small", language="Indonesian", task="transcribe", token=hf_token
 )
 model = WhisperForConditionalGeneration.from_pretrained(
     "avalonai/whisper-small-jv", token=hf_token
 )
+def transcribe(audio_choice, audio_file):
+    if audio_file is not None:
+        audio_path = audio_file
+    elif audio_choice is not None:
+        audio_path = audio_choice
+    else:
+        return "No audio file provided. Please upload an audio file or select a sample."
     try:
+        audio, sampling_rate = librosa.load(audio_path, sr=16000)
     except Exception as e:
         return f"Failed to load audio: {str(e)}"
 audio_samples = [
     os.path.join("audio_sample", f)
     for f in os.listdir("audio_sample")
+    if f.endswith((".wav", ".mp3", ".m4a"))
 ]
+audio_choice = gr.Dropdown(
+    label="Select a Sample Audio", choices=audio_samples, default=None
+)
 audio_input = gr.Audio(
+    sources="microphone", type="filepath", label="Upload Audio or Use Microphone"
 )
 iface = gr.Interface(
     fn=transcribe,
+    inputs=[audio_choice, audio_input],
     outputs="text",
     title="Speech-to-text on Javanese Language Demo",
     description="Ini adalah platform untuk pengujian model speech-to-text pada bahasa Jawa oleh Avalon AI. Silahkan coba dengan mengucapkan kalimat atau memilih salah satu sample audio.",