sudoping01 commited on
Commit
13fccc4
Β·
verified Β·
1 Parent(s): c7e5b6a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +137 -68
app.py CHANGED
@@ -24,11 +24,18 @@ else:
24
 
25
  model_id = "sudoping01/maliba-asr-v1"
26
  transcriber = WhosperTranscriber(model_id=model_id)
27
- logger.info(f"MALIBA-ASR-v1 transcriber initialized successfully")
28
 
29
  def resample_audio(audio_path, target_sample_rate=16000):
 
30
  """
31
  Converts the audio file to the target sampling rate (16000 Hz).
 
 
 
 
 
 
32
  """
33
  try:
34
  waveform, original_sample_rate = torchaudio.load(audio_path)
@@ -47,15 +54,25 @@ def resample_audio(audio_path, target_sample_rate=16000):
47
 
48
  @spaces.GPU()
49
  def transcribe_audio(audio_file):
 
50
  """
51
  Transcribes the provided audio file into Bambara text using Whosper.
 
 
 
 
 
52
  """
 
53
  if audio_file is None:
54
  return "Please provide an audio file for transcription."
55
 
56
  try:
57
  logger.info(f"Transcribing audio file: {audio_file}")
 
 
58
  result = transcriber.transcribe_audio(audio_file)
 
59
  logger.info("Transcription successful.")
60
  return result
61
 
@@ -64,14 +81,16 @@ def transcribe_audio(audio_file):
64
  return f"Error during transcription: {str(e)}"
65
 
66
  def get_example_files(directory="./examples"):
 
67
  """
68
- Returns a list of audio files from the examples directory formatted for gr.Interface examples.
69
 
70
  Args:
71
  directory (str): The directory to search for audio files.
72
  Returns:
73
- list: A list of [audio_path] for each example file.
74
  """
 
75
  if not os.path.exists(directory):
76
  logger.warning(f"Examples directory {directory} not found.")
77
  return []
@@ -82,14 +101,10 @@ def get_example_files(directory="./examples"):
82
 
83
  try:
84
  files = os.listdir(directory)
85
- =
86
- files.sort()
87
-
88
  for file in files:
89
  if any(file.lower().endswith(ext) for ext in audio_extensions):
90
  full_path = os.path.abspath(os.path.join(directory, file))
91
- =
92
- audio_files.append([full_path])
93
 
94
  logger.info(f"Found {len(audio_files)} example audio files.")
95
  return audio_files[:5]
@@ -98,73 +113,127 @@ def get_example_files(directory="./examples"):
98
  logger.error(f"Error reading examples directory: {e}")
99
  return []
100
 
101
- def main():
102
  """
103
- Main function to launch the Gradio interface using gr.Interface.
104
  """
105
- logger.info("Starting MALIBA-ASR-v1 Gradio interface.")
106
-
107
 
108
  example_files = get_example_files()
109
 
110
-
111
- interface = gr.Interface(
112
- fn=transcribe_audio,
113
- inputs=[
114
- gr.Audio(
115
- label=" Record or Upload Audio",
116
- type="filepath",
117
- sources=["microphone", "upload"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  )
119
- ],
120
- outputs=gr.Textbox(
121
- label="πŸ“ Transcribed Text (Bambara)",
122
- lines=8,
123
- placeholder="Your transcribed Bambara text will appear here..."
124
- ),
125
- title="🎀 MALIBA-ASR-v1: Bambara Speech Recognition",
126
- description="""
127
- **Revolutionizing Bambara Speech Technology | Powered by MALIBA-AI**
128
-
129
- Experience breakthrough Bambara speech recognition with **MALIBA-ASR-v1** - the most advanced open-source ASR model for Bambara, serving over 22 million speakers across Mali and West Africa.
130
-
131
-
132
- **Performance**: WER 0.226 | CER 0.109 on (6-hour test set)
133
- """,
134
- examples=example_files if example_files else None,
135
- cache_examples=False,
136
- article="""
137
- ---
138
-
139
- ## πŸ† MALIBA-ASR-v1 Performance
140
-
141
- | Metric | Value | Benchmark |
142
- |--------|-------|-----------|
143
- | **WER** | **0.226** | oza75/bambara-asr (test set) |
144
- | **CER** | **0.109** | oza75/bambara-asr (test set) |
145
- | **Test Duration** | **6 hours** | Diverse speakers & dialects |
146
-
147
- ## Revolutionary Impact
148
-
149
- **MALIBA-ASR-v1** sets a new standard for Bambara speech recognition, significantly outperforming all existing open-source solutions. This breakthrough enables:
150
-
151
- ## πŸ‡²πŸ‡± About MALIBA-AI πŸ‡²πŸ‡±
152
-
153
- MALIBA-AI is committed to ensuring **"No Malian Language Left Behind"** by:
154
- - Breaking digital language barriers for 22+ million Bambara speakers
155
- - Building cutting-edge AI technology for African languages
156
- - Preserving Mali's rich linguistic and cultural heritage
157
- - Democratizing access to voice technology across literacy levels
158
- - Training the next generation of African AI researchers
159
-
160
- ---
161
-
162
- **Model**: [sudoping01/maliba-asr-v1](https://huggingface.co/sudoping01/maliba-asr-v1) | **Dataset**: [oza75/bambara-asr](https://huggingface.co/datasets/oza75/bambara-asr)
163
-
164
- *Empowering Mali's Future Through Community-Driven AI Innovation* πŸ‡²πŸ‡±
165
- """
166
- )
 
 
 
 
 
 
 
 
 
167
 
 
 
168
  interface.launch(
169
  share=False,
170
  server_name="0.0.0.0",
 
24
 
25
  model_id = "sudoping01/maliba-asr-v1"
26
  transcriber = WhosperTranscriber(model_id=model_id)
27
+ logger.info(f"Transcriber initialized with model: {model_id}")
28
 
29
  def resample_audio(audio_path, target_sample_rate=16000):
30
+
31
  """
32
  Converts the audio file to the target sampling rate (16000 Hz).
33
+
34
+ Args:
35
+ audio_path (str): Path to the audio file.
36
+ target_sample_rate (int): The desired sample rate.
37
+ Returns:
38
+ A tensor containing the resampled audio data and the target sample rate.
39
  """
40
  try:
41
  waveform, original_sample_rate = torchaudio.load(audio_path)
 
54
 
55
  @spaces.GPU()
56
  def transcribe_audio(audio_file):
57
+
58
  """
59
  Transcribes the provided audio file into Bambara text using Whosper.
60
+
61
+ Args:
62
+ audio_file: The path to the audio file to transcribe.
63
+ Returns:
64
+ A string representing the transcribed Bambara text.
65
  """
66
+
67
  if audio_file is None:
68
  return "Please provide an audio file for transcription."
69
 
70
  try:
71
  logger.info(f"Transcribing audio file: {audio_file}")
72
+
73
+
74
  result = transcriber.transcribe_audio(audio_file)
75
+
76
  logger.info("Transcription successful.")
77
  return result
78
 
 
81
  return f"Error during transcription: {str(e)}"
82
 
83
  def get_example_files(directory="./examples"):
84
+
85
  """
86
+ Returns a list of audio files from the examples directory.
87
 
88
  Args:
89
  directory (str): The directory to search for audio files.
90
  Returns:
91
+ list: A list of paths to the audio files.
92
  """
93
+
94
  if not os.path.exists(directory):
95
  logger.warning(f"Examples directory {directory} not found.")
96
  return []
 
101
 
102
  try:
103
  files = os.listdir(directory)
 
 
 
104
  for file in files:
105
  if any(file.lower().endswith(ext) for ext in audio_extensions):
106
  full_path = os.path.abspath(os.path.join(directory, file))
107
+ audio_files.append(full_path)
 
108
 
109
  logger.info(f"Found {len(audio_files)} example audio files.")
110
  return audio_files[:5]
 
113
  logger.error(f"Error reading examples directory: {e}")
114
  return []
115
 
116
+ def build_interface():
117
  """
118
+ Builds the Gradio interface for Bambara speech recognition.
119
  """
 
 
120
 
121
  example_files = get_example_files()
122
 
123
+ with gr.Blocks(title="Bambara Speech Recognition") as demo:
124
+ gr.Markdown(
125
+ """
126
+ # 🎀 Bambara Automatic Speech Recognition
127
+
128
+ **Powered by MALIBA-AI**
129
+
130
+ Convert Bambara speech to text using our state-of-the-art ASR model. You can either:
131
+ - πŸŽ™οΈ **Record** your voice directly
132
+ - πŸ“ **Upload** an audio file
133
+ - 🎡 **Try** our example audio files
134
+
135
+ ## Supported Audio Formats
136
+ WAV, MP3, M4A, FLAC, OGG
137
+ """
138
+ )
139
+
140
+ with gr.Row():
141
+ with gr.Column():
142
+
143
+ audio_input = gr.Audio(
144
+ label="🎀 Record or Upload Audio",
145
+ type="filepath",
146
+ sources=["microphone", "upload"]
147
+ )
148
+
149
+ transcribe_btn = gr.Button(
150
+ "πŸ”„ Transcribe Audio",
151
+ variant="primary",
152
+ size="lg"
153
+ )
154
+
155
+
156
+ clear_btn = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
157
+
158
+ with gr.Column():
159
+ output_text = gr.Textbox(
160
+ label="πŸ“ Transcribed Text (Bambara)",
161
+ lines=8,
162
+ placeholder="Your transcribed Bambara text will appear here...",
163
+ interactive=False
164
+ )
165
+
166
+ # Examples section
167
+ if example_files:
168
+ gr.Markdown("## 🎡 Try These Examples")
169
+ gr.Examples(
170
+ examples=[[f] for f in example_files],
171
+ inputs=[audio_input],
172
+ outputs=output_text,
173
+ fn=transcribe_audio,
174
+ cache_examples=False,
175
+ label="Example Audio Files"
176
  )
177
+
178
+ # Information section
179
+ gr.Markdown(
180
+ """
181
+ ---
182
+
183
+ ## ℹ️ About This Model
184
+
185
+ - **Model:** [sudoping01/maliba-asr-v1](https://huggingface.co/sudoping01/maliba-asr-v1)
186
+ - **Developer:** MALIBA-AI
187
+ - **Language:** Bambara (bm)
188
+ - **Task:** Automatic Speech Recognition (ASR)
189
+ - **Sample Rate:** 16kHz (automatically resampled)
190
+
191
+ ## πŸš€ How to Use
192
+
193
+ 1. **Record Audio:** Click the microphone button and speak in Bambara
194
+ 2. **Upload File:** Click the upload button to select an audio file
195
+ 3. **Transcribe:** Click the "Transcribe Audio" button
196
+ 4. **View Results:** See your transcribed text in Bambara
197
+
198
+ ## πŸ“Š Performance Notes
199
+
200
+ - Best results with clear speech and minimal background noise
201
+ - Supports various audio formats and durations
202
+ - Optimized for Bambara language patterns and phonetics
203
+ """
204
+ )
205
+
206
+
207
+ transcribe_btn.click(
208
+ fn=transcribe_audio,
209
+ inputs=[audio_input],
210
+ outputs=output_text,
211
+ show_progress=True
212
+ )
213
+
214
+ clear_btn.click(
215
+ fn=lambda: (None, ""),
216
+ outputs=[audio_input, output_text]
217
+ )
218
+
219
+
220
+ audio_input.change(
221
+ fn=transcribe_audio,
222
+ inputs=[audio_input],
223
+ outputs=output_text,
224
+ show_progress=True
225
+ )
226
+
227
+ return demo
228
+
229
+ def main():
230
+ """
231
+ Main function to launch the Gradio interface.
232
+ """
233
+ logger.info("Starting Bambara ASR Gradio interface.")
234
 
235
+
236
+ interface = build_interface()
237
  interface.launch(
238
  share=False,
239
  server_name="0.0.0.0",