sudoping01 commited on
Commit
c7e5b6a
Β·
verified Β·
1 Parent(s): c8322f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -137
app.py CHANGED
@@ -24,18 +24,11 @@ else:
24
 
25
  model_id = "sudoping01/maliba-asr-v1"
26
  transcriber = WhosperTranscriber(model_id=model_id)
27
- logger.info(f"Transcriber initialized with model: {model_id}")
28
 
29
  def resample_audio(audio_path, target_sample_rate=16000):
30
-
31
  """
32
  Converts the audio file to the target sampling rate (16000 Hz).
33
-
34
- Args:
35
- audio_path (str): Path to the audio file.
36
- target_sample_rate (int): The desired sample rate.
37
- Returns:
38
- A tensor containing the resampled audio data and the target sample rate.
39
  """
40
  try:
41
  waveform, original_sample_rate = torchaudio.load(audio_path)
@@ -54,25 +47,15 @@ def resample_audio(audio_path, target_sample_rate=16000):
54
 
55
  @spaces.GPU()
56
  def transcribe_audio(audio_file):
57
-
58
  """
59
  Transcribes the provided audio file into Bambara text using Whosper.
60
-
61
- Args:
62
- audio_file: The path to the audio file to transcribe.
63
- Returns:
64
- A string representing the transcribed Bambara text.
65
  """
66
-
67
  if audio_file is None:
68
  return "Please provide an audio file for transcription."
69
 
70
  try:
71
  logger.info(f"Transcribing audio file: {audio_file}")
72
-
73
-
74
  result = transcriber.transcribe_audio(audio_file)
75
-
76
  logger.info("Transcription successful.")
77
  return result
78
 
@@ -81,16 +64,14 @@ def transcribe_audio(audio_file):
81
  return f"Error during transcription: {str(e)}"
82
 
83
  def get_example_files(directory="./examples"):
84
-
85
  """
86
- Returns a list of audio files from the examples directory.
87
 
88
  Args:
89
  directory (str): The directory to search for audio files.
90
  Returns:
91
- list: A list of paths to the audio files.
92
  """
93
-
94
  if not os.path.exists(directory):
95
  logger.warning(f"Examples directory {directory} not found.")
96
  return []
@@ -101,10 +82,14 @@ def get_example_files(directory="./examples"):
101
 
102
  try:
103
  files = os.listdir(directory)
 
 
 
104
  for file in files:
105
  if any(file.lower().endswith(ext) for ext in audio_extensions):
106
  full_path = os.path.abspath(os.path.join(directory, file))
107
- audio_files.append(full_path)
 
108
 
109
  logger.info(f"Found {len(audio_files)} example audio files.")
110
  return audio_files[:5]
@@ -113,127 +98,73 @@ def get_example_files(directory="./examples"):
113
  logger.error(f"Error reading examples directory: {e}")
114
  return []
115
 
116
- def build_interface():
117
  """
118
- Builds the Gradio interface for Bambara speech recognition.
119
  """
120
-
121
- example_files = get_example_files()
122
 
123
- with gr.Blocks(title="Bambara Speech Recognition") as demo:
124
- gr.Markdown(
125
- """
126
- # 🎀 Bambara Automatic Speech Recognition
127
-
128
- **Powered by MALIBA-AI**
129
-
130
- Convert Bambara speech to text using our state-of-the-art ASR model. You can either:
131
- - πŸŽ™οΈ **Record** your voice directly
132
- - πŸ“ **Upload** an audio file
133
- - 🎡 **Try** our example audio files
134
-
135
- ## Supported Audio Formats
136
- WAV, MP3, M4A, FLAC, OGG
137
- """
138
- )
139
-
140
- with gr.Row():
141
- with gr.Column():
142
-
143
- audio_input = gr.Audio(
144
- label="🎀 Record or Upload Audio",
145
- type="filepath",
146
- sources=["microphone", "upload"]
147
- )
148
-
149
- transcribe_btn = gr.Button(
150
- "πŸ”„ Transcribe Audio",
151
- variant="primary",
152
- size="lg"
153
- )
154
-
155
-
156
- clear_btn = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
157
-
158
- with gr.Column():
159
- output_text = gr.Textbox(
160
- label="πŸ“ Transcribed Text (Bambara)",
161
- lines=8,
162
- placeholder="Your transcribed Bambara text will appear here...",
163
- interactive=False
164
- )
165
-
166
- # Examples section
167
- if example_files:
168
- gr.Markdown("## 🎡 Try These Examples")
169
- gr.Examples(
170
- examples=[[f] for f in example_files],
171
- inputs=[audio_input],
172
- outputs=output_text,
173
- fn=transcribe_audio,
174
- cache_examples=False,
175
- label="Example Audio Files"
176
- )
177
-
178
- # Information section
179
- gr.Markdown(
180
- """
181
- ---
182
-
183
- ## ℹ️ About This Model
184
-
185
- - **Model:** [sudoping01/maliba-asr-v1](https://huggingface.co/sudoping01/maliba-asr-v1)
186
- - **Developer:** MALIBA-AI
187
- - **Language:** Bambara (bm)
188
- - **Task:** Automatic Speech Recognition (ASR)
189
- - **Sample Rate:** 16kHz (automatically resampled)
190
-
191
- ## πŸš€ How to Use
192
-
193
- 1. **Record Audio:** Click the microphone button and speak in Bambara
194
- 2. **Upload File:** Click the upload button to select an audio file
195
- 3. **Transcribe:** Click the "Transcribe Audio" button
196
- 4. **View Results:** See your transcribed text in Bambara
197
-
198
- ## πŸ“Š Performance Notes
199
-
200
- - Best results with clear speech and minimal background noise
201
- - Supports various audio formats and durations
202
- - Optimized for Bambara language patterns and phonetics
203
- """
204
- )
205
-
206
-
207
- transcribe_btn.click(
208
- fn=transcribe_audio,
209
- inputs=[audio_input],
210
- outputs=output_text,
211
- show_progress=True
212
- )
213
-
214
- clear_btn.click(
215
- fn=lambda: (None, ""),
216
- outputs=[audio_input, output_text]
217
- )
218
-
219
 
220
- audio_input.change(
221
- fn=transcribe_audio,
222
- inputs=[audio_input],
223
- outputs=output_text,
224
- show_progress=True
225
- )
226
 
227
- return demo
228
 
229
- def main():
230
- """
231
- Main function to launch the Gradio interface.
232
- """
233
- logger.info("Starting Bambara ASR Gradio interface.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
 
235
-
236
- interface = build_interface()
237
  interface.launch(
238
  share=False,
239
  server_name="0.0.0.0",
 
24
 
25
  model_id = "sudoping01/maliba-asr-v1"
26
  transcriber = WhosperTranscriber(model_id=model_id)
27
+ logger.info(f"MALIBA-ASR-v1 transcriber initialized successfully")
28
 
29
  def resample_audio(audio_path, target_sample_rate=16000):
 
30
  """
31
  Converts the audio file to the target sampling rate (16000 Hz).
 
 
 
 
 
 
32
  """
33
  try:
34
  waveform, original_sample_rate = torchaudio.load(audio_path)
 
47
 
48
  @spaces.GPU()
49
  def transcribe_audio(audio_file):
 
50
  """
51
  Transcribes the provided audio file into Bambara text using Whosper.
 
 
 
 
 
52
  """
 
53
  if audio_file is None:
54
  return "Please provide an audio file for transcription."
55
 
56
  try:
57
  logger.info(f"Transcribing audio file: {audio_file}")
 
 
58
  result = transcriber.transcribe_audio(audio_file)
 
59
  logger.info("Transcription successful.")
60
  return result
61
 
 
64
  return f"Error during transcription: {str(e)}"
65
 
66
  def get_example_files(directory="./examples"):
 
67
  """
68
+ Returns a list of audio files from the examples directory formatted for gr.Interface examples.
69
 
70
  Args:
71
  directory (str): The directory to search for audio files.
72
  Returns:
73
+ list: A list of [audio_path] for each example file.
74
  """
 
75
  if not os.path.exists(directory):
76
  logger.warning(f"Examples directory {directory} not found.")
77
  return []
 
82
 
83
  try:
84
  files = os.listdir(directory)
85
+ =
86
+ files.sort()
87
+
88
  for file in files:
89
  if any(file.lower().endswith(ext) for ext in audio_extensions):
90
  full_path = os.path.abspath(os.path.join(directory, file))
91
+ =
92
+ audio_files.append([full_path])
93
 
94
  logger.info(f"Found {len(audio_files)} example audio files.")
95
  return audio_files[:5]
 
98
  logger.error(f"Error reading examples directory: {e}")
99
  return []
100
 
101
+ def main():
102
  """
103
+ Main function to launch the Gradio interface using gr.Interface.
104
  """
105
+ logger.info("Starting MALIBA-ASR-v1 Gradio interface.")
 
106
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
+ example_files = get_example_files()
 
 
 
 
 
109
 
 
110
 
111
+ interface = gr.Interface(
112
+ fn=transcribe_audio,
113
+ inputs=[
114
+ gr.Audio(
115
+ label=" Record or Upload Audio",
116
+ type="filepath",
117
+ sources=["microphone", "upload"]
118
+ )
119
+ ],
120
+ outputs=gr.Textbox(
121
+ label="πŸ“ Transcribed Text (Bambara)",
122
+ lines=8,
123
+ placeholder="Your transcribed Bambara text will appear here..."
124
+ ),
125
+ title="🎀 MALIBA-ASR-v1: Bambara Speech Recognition",
126
+ description="""
127
+ **Revolutionizing Bambara Speech Technology | Powered by MALIBA-AI**
128
+
129
+ Experience breakthrough Bambara speech recognition with **MALIBA-ASR-v1** - the most advanced open-source ASR model for Bambara, serving over 22 million speakers across Mali and West Africa.
130
+
131
+
132
+ **Performance**: WER 0.226 | CER 0.109 on (6-hour test set)
133
+ """,
134
+ examples=example_files if example_files else None,
135
+ cache_examples=False,
136
+ article="""
137
+ ---
138
+
139
+ ## πŸ† MALIBA-ASR-v1 Performance
140
+
141
+ | Metric | Value | Benchmark |
142
+ |--------|-------|-----------|
143
+ | **WER** | **0.226** | oza75/bambara-asr (test set) |
144
+ | **CER** | **0.109** | oza75/bambara-asr (test set) |
145
+ | **Test Duration** | **6 hours** | Diverse speakers & dialects |
146
+
147
+ ## Revolutionary Impact
148
+
149
+ **MALIBA-ASR-v1** sets a new standard for Bambara speech recognition, significantly outperforming all existing open-source solutions. This breakthrough enables:
150
+
151
+ ## πŸ‡²πŸ‡± About MALIBA-AI πŸ‡²πŸ‡±
152
+
153
+ MALIBA-AI is committed to ensuring **"No Malian Language Left Behind"** by:
154
+ - Breaking digital language barriers for 22+ million Bambara speakers
155
+ - Building cutting-edge AI technology for African languages
156
+ - Preserving Mali's rich linguistic and cultural heritage
157
+ - Democratizing access to voice technology across literacy levels
158
+ - Training the next generation of African AI researchers
159
+
160
+ ---
161
+
162
+ **Model**: [sudoping01/maliba-asr-v1](https://huggingface.co/sudoping01/maliba-asr-v1) | **Dataset**: [oza75/bambara-asr](https://huggingface.co/datasets/oza75/bambara-asr)
163
+
164
+ *Empowering Mali's Future Through Community-Driven AI Innovation* πŸ‡²πŸ‡±
165
+ """
166
+ )
167
 
 
 
168
  interface.launch(
169
  share=False,
170
  server_name="0.0.0.0",