Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -16,11 +16,11 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
16 |
#model_checkpoint = "oza75/whisper-bambara-asr-002"
|
17 |
#revision = "831cd15ed74a554caac9f304cf50dc773841ba1b"
|
18 |
model_checkpoint = "oza75/whisper-bambara-asr-005"
|
19 |
-
revision = "6a92cd0f19985d12739c2f6864607627115e015d"
|
20 |
-
#revision = "fb69a5750182933868397543366dbb63747cf40c"
|
|
|
21 |
# language = "bambara"
|
22 |
-
language = "icelandic"
|
23 |
-
task = "transcribe"
|
24 |
|
25 |
|
26 |
# Load the custom tokenizer designed for Bambara and the ASR model
|
@@ -49,7 +49,7 @@ def resample_audio(audio_path, target_sample_rate=16000):
|
|
49 |
return waveform, target_sample_rate
|
50 |
|
51 |
@spaces.GPU()
|
52 |
-
def transcribe(audio):
|
53 |
"""
|
54 |
Transcribes the provided audio file into text using the configured ASR pipeline.
|
55 |
|
@@ -63,7 +63,7 @@ def transcribe(audio):
|
|
63 |
waveform, sample_rate = resample_audio(audio)
|
64 |
|
65 |
# Use the pipeline to perform transcription
|
66 |
-
text = pipe({"array": waveform.squeeze().numpy(), "sampling_rate": sample_rate}, generate_kwargs={"task":
|
67 |
|
68 |
return text
|
69 |
|
@@ -90,7 +90,10 @@ def main():
|
|
90 |
# Setup Gradio interface
|
91 |
iface = gr.Interface(
|
92 |
fn=transcribe,
|
93 |
-
inputs=
|
|
|
|
|
|
|
94 |
outputs="text",
|
95 |
title="Bambara Automatic Speech Recognition",
|
96 |
description="Realtime demo for Bambara speech recognition based on a fine-tuning of the Whisper model.",
|
|
|
16 |
#model_checkpoint = "oza75/whisper-bambara-asr-002"
|
17 |
#revision = "831cd15ed74a554caac9f304cf50dc773841ba1b"
|
18 |
model_checkpoint = "oza75/whisper-bambara-asr-005"
|
19 |
+
#revision = "6a92cd0f19985d12739c2f6864607627115e015d" # first good checkpoint for bambara
|
20 |
+
#revision = "fb69a5750182933868397543366dbb63747cf40c" # this only translate in english
|
21 |
+
revision = "595f8a4cc58b5062c06e6b31a7e5575b00d46908" # support transcription and translation
|
22 |
# language = "bambara"
|
23 |
+
language = "icelandic" # we use icelandic as the model was trained to replace the icelandic with bambara.
|
|
|
24 |
|
25 |
|
26 |
# Load the custom tokenizer designed for Bambara and the ASR model
|
|
|
49 |
return waveform, target_sample_rate
|
50 |
|
51 |
@spaces.GPU()
|
52 |
+
def transcribe(audio, task_type):
|
53 |
"""
|
54 |
Transcribes the provided audio file into text using the configured ASR pipeline.
|
55 |
|
|
|
63 |
waveform, sample_rate = resample_audio(audio)
|
64 |
|
65 |
# Use the pipeline to perform transcription
|
66 |
+
text = pipe({"array": waveform.squeeze().numpy(), "sampling_rate": sample_rate}, generate_kwargs={"task": task_type, "language": language})["text"]
|
67 |
|
68 |
return text
|
69 |
|
|
|
90 |
# Setup Gradio interface
|
91 |
iface = gr.Interface(
|
92 |
fn=transcribe,
|
93 |
+
inputs=[
|
94 |
+
gr.Audio(type="filepath", value=example_files[0]),
|
95 |
+
gr.Radio(choices=["transcribe", "translate"], label="Task Type", value="transcribe")
|
96 |
+
],
|
97 |
outputs="text",
|
98 |
title="Bambara Automatic Speech Recognition",
|
99 |
description="Realtime demo for Bambara speech recognition based on a fine-tuning of the Whisper model.",
|