Spaces:

oza75
/

bambara-asr

Sleeping

App Files Files Community

oza75 commited on Oct 20, 2024

Commit

2968589

verified ·

1 Parent(s): 8c49c39

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -7

app.py CHANGED Viewed

@@ -16,11 +16,11 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
 #model_checkpoint = "oza75/whisper-bambara-asr-002"
 #revision = "831cd15ed74a554caac9f304cf50dc773841ba1b"
 model_checkpoint = "oza75/whisper-bambara-asr-005"
-revision = "6a92cd0f19985d12739c2f6864607627115e015d"
-#revision = "fb69a5750182933868397543366dbb63747cf40c"
 # language = "bambara"
-language = "icelandic"
-task = "transcribe"
 # Load the custom tokenizer designed for Bambara and the ASR model
@@ -49,7 +49,7 @@ def resample_audio(audio_path, target_sample_rate=16000):
     return waveform, target_sample_rate
 @spaces.GPU()
-def transcribe(audio):
     """
     Transcribes the provided audio file into text using the configured ASR pipeline.
@@ -63,7 +63,7 @@ def transcribe(audio):
     waveform, sample_rate = resample_audio(audio)
     # Use the pipeline to perform transcription
-    text = pipe({"array": waveform.squeeze().numpy(), "sampling_rate": sample_rate}, generate_kwargs={"task": task})["text"]
     return text
@@ -90,7 +90,10 @@ def main():
     # Setup Gradio interface
     iface = gr.Interface(
         fn=transcribe,
-        inputs=gr.Audio(type="filepath", value=example_files[0]),
         outputs="text",
         title="Bambara Automatic Speech Recognition",
         description="Realtime demo for Bambara speech recognition based on a fine-tuning of the Whisper model.",

 #model_checkpoint = "oza75/whisper-bambara-asr-002"
 #revision = "831cd15ed74a554caac9f304cf50dc773841ba1b"
 model_checkpoint = "oza75/whisper-bambara-asr-005"
+#revision = "6a92cd0f19985d12739c2f6864607627115e015d" # first good checkpoint for bambara
+#revision = "fb69a5750182933868397543366dbb63747cf40c" # this only translate in english
+revision = "595f8a4cc58b5062c06e6b31a7e5575b00d46908" # support transcription and translation
 # language = "bambara"
+language = "icelandic" # we use icelandic as the model was trained to replace the icelandic with bambara.
 # Load the custom tokenizer designed for Bambara and the ASR model
     return waveform, target_sample_rate
 @spaces.GPU()
+def transcribe(audio, task_type):
     """
     Transcribes the provided audio file into text using the configured ASR pipeline.
     waveform, sample_rate = resample_audio(audio)
     # Use the pipeline to perform transcription
+    text = pipe({"array": waveform.squeeze().numpy(), "sampling_rate": sample_rate}, generate_kwargs={"task": task_type, "language": language})["text"]
     return text
     # Setup Gradio interface
     iface = gr.Interface(
         fn=transcribe,
+        inputs=[
+            gr.Audio(type="filepath", value=example_files[0]),
+            gr.Radio(choices=["transcribe", "translate"], label="Task Type", value="transcribe")
+        ],
         outputs="text",
         title="Bambara Automatic Speech Recognition",
         description="Realtime demo for Bambara speech recognition based on a fine-tuning of the Whisper model.",