oza75 commited on
Commit
2968589
·
verified ·
1 Parent(s): 8c49c39

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -7
app.py CHANGED
@@ -16,11 +16,11 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
16
  #model_checkpoint = "oza75/whisper-bambara-asr-002"
17
  #revision = "831cd15ed74a554caac9f304cf50dc773841ba1b"
18
  model_checkpoint = "oza75/whisper-bambara-asr-005"
19
- revision = "6a92cd0f19985d12739c2f6864607627115e015d"
20
- #revision = "fb69a5750182933868397543366dbb63747cf40c"
 
21
  # language = "bambara"
22
- language = "icelandic"
23
- task = "transcribe"
24
 
25
 
26
  # Load the custom tokenizer designed for Bambara and the ASR model
@@ -49,7 +49,7 @@ def resample_audio(audio_path, target_sample_rate=16000):
49
  return waveform, target_sample_rate
50
 
51
  @spaces.GPU()
52
- def transcribe(audio):
53
  """
54
  Transcribes the provided audio file into text using the configured ASR pipeline.
55
 
@@ -63,7 +63,7 @@ def transcribe(audio):
63
  waveform, sample_rate = resample_audio(audio)
64
 
65
  # Use the pipeline to perform transcription
66
- text = pipe({"array": waveform.squeeze().numpy(), "sampling_rate": sample_rate}, generate_kwargs={"task": task})["text"]
67
 
68
  return text
69
 
@@ -90,7 +90,10 @@ def main():
90
  # Setup Gradio interface
91
  iface = gr.Interface(
92
  fn=transcribe,
93
- inputs=gr.Audio(type="filepath", value=example_files[0]),
 
 
 
94
  outputs="text",
95
  title="Bambara Automatic Speech Recognition",
96
  description="Realtime demo for Bambara speech recognition based on a fine-tuning of the Whisper model.",
 
16
  #model_checkpoint = "oza75/whisper-bambara-asr-002"
17
  #revision = "831cd15ed74a554caac9f304cf50dc773841ba1b"
18
  model_checkpoint = "oza75/whisper-bambara-asr-005"
19
+ #revision = "6a92cd0f19985d12739c2f6864607627115e015d" # first good checkpoint for bambara
20
+ #revision = "fb69a5750182933868397543366dbb63747cf40c" # this only translate in english
21
+ revision = "595f8a4cc58b5062c06e6b31a7e5575b00d46908" # support transcription and translation
22
  # language = "bambara"
23
+ language = "icelandic" # we use icelandic as the model was trained to replace the icelandic with bambara.
 
24
 
25
 
26
  # Load the custom tokenizer designed for Bambara and the ASR model
 
49
  return waveform, target_sample_rate
50
 
51
  @spaces.GPU()
52
+ def transcribe(audio, task_type):
53
  """
54
  Transcribes the provided audio file into text using the configured ASR pipeline.
55
 
 
63
  waveform, sample_rate = resample_audio(audio)
64
 
65
  # Use the pipeline to perform transcription
66
+ text = pipe({"array": waveform.squeeze().numpy(), "sampling_rate": sample_rate}, generate_kwargs={"task": task_type, "language": language})["text"]
67
 
68
  return text
69
 
 
90
  # Setup Gradio interface
91
  iface = gr.Interface(
92
  fn=transcribe,
93
+ inputs=[
94
+ gr.Audio(type="filepath", value=example_files[0]),
95
+ gr.Radio(choices=["transcribe", "translate"], label="Task Type", value="transcribe")
96
+ ],
97
  outputs="text",
98
  title="Bambara Automatic Speech Recognition",
99
  description="Realtime demo for Bambara speech recognition based on a fine-tuning of the Whisper model.",