sudoping01 commited on
Commit
8522dd2
Β·
verified Β·
1 Parent(s): 6f340af

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +208 -150
app.py CHANGED
@@ -1,188 +1,246 @@
1
- import gradio as gr
2
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
- def build_interface():
5
  """
6
- Builds an enhanced Gradio interface for Bambara speech recognition.
 
 
 
 
 
 
7
  """
8
- example_files = get_example_files()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- custom_css = """
11
- .gr-button-primary {
12
- background-color: #2c5282 !important;
13
- color: white !important;
14
- border-radius: 8px !important;
15
- font-weight: bold !important;
16
- }
17
- .gr-button-secondary {
18
- background-color: #e2e8f0 !important;
19
- color: #2d3748 !important;
20
- border-radius: 8px !important;
21
- }
22
- .example-container {
23
- background-color: #f7fafc;
24
- padding: 16px;
25
- border-radius: 8px;
26
- margin-top: 16px;
27
- }
28
- .gr-textbox {
29
- border-radius: 8px !important;
30
- border: 1px solid #cbd5e0 !important;
31
- }
32
- .gr-audio {
33
- border-radius: 8px !important;
34
- }
35
- .header {
36
- text-align: center;
37
- color: #2d3748;
38
- }
39
- .info-section {
40
- background-color: #edf2f7;
41
- padding: 16px;
42
- border-radius: 8px;
43
- margin-top: 16px;
44
- }
45
  """
46
 
47
- with gr.Blocks(title="Bambara Speech Recognition", css=custom_css) as demo:
48
- # Header
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  gr.Markdown(
50
  """
51
- <h1 class="header">🎀 Bambara Speech Recognition</h1>
52
- <p style="text-align: center; color: #4a5568;">
53
- Powered by <b>MALIBA-AI</b> | Convert Bambara speech to text effortlessly
54
- </p>
 
 
 
 
 
 
 
55
  """
56
  )
57
-
58
- # Main interaction section
59
  with gr.Row():
60
- with gr.Column(scale=1):
61
- gr.Markdown("### πŸŽ™οΈ Record or Upload Audio")
62
  audio_input = gr.Audio(
63
- label="Record or Upload Audio",
64
  type="filepath",
65
- sources=["microphone", "upload"],
66
- show_label=False
67
  )
68
- audio_preview = gr.Audio(
69
- label="Preview Your Audio",
70
- interactive=False,
71
- visible=False
 
72
  )
 
73
 
74
- with gr.Row():
75
- transcribe_btn = gr.Button(
76
- "πŸ”„ Transcribe Audio",
77
- variant="primary",
78
- size="lg"
79
- )
80
- clear_btn = gr.Button(
81
- "πŸ—‘οΈ Clear",
82
- variant="secondary",
83
- size="lg"
84
- )
85
-
86
- with gr.Column(scale=1):
87
- gr.Markdown("### πŸ“ Transcription Output")
88
  output_text = gr.Textbox(
89
- label="Transcribed Text (Bambara)",
90
- lines=6,
91
  placeholder="Your transcribed Bambara text will appear here...",
92
- interactive=False,
93
- show_copy_button=True
94
- )
95
- status_message = gr.Markdown(
96
- value="",
97
- visible=False
98
  )
 
99
 
100
- # Example audio section
101
  if example_files:
102
- gr.Markdown("## 🎡 Try Example Audio Files")
103
- with gr.Group(elem_classes="example-container"):
104
- gr.Markdown(
105
- """
106
- Listen to these sample Bambara audio files and transcribe them with one click.
107
- """
108
- )
109
- for idx, file in enumerate(example_files):
110
- with gr.Row():
111
- gr.Audio(
112
- value=file,
113
- label=f"Example {idx + 1}: {os.path.basename(file)}",
114
- interactive=False,
115
- show_label=True
116
- )
117
- gr.Button(
118
- f"Transcribe Example {idx + 1}",
119
- variant="primary",
120
- size="sm"
121
- ).click(
122
- fn=transcribe_audio,
123
- inputs=gr.State(value=file),
124
- outputs=[output_text, status_message],
125
- show_progress=True,
126
- _js="() => {return {show_progress: true}}"
127
- )
128
 
129
  gr.Markdown(
130
  """
131
- <div class="info-section">
132
- ## ℹ️ How to Use
 
 
 
 
 
 
 
 
 
133
 
134
- 1. **Record**: Click the microphone to speak in Bambara.
135
- 2. **Upload**: Select an audio file (WAV, MP3, M4A, FLAC, OGG).
136
- 3. **Transcribe**: Click "Transcribe Audio" or try an example.
137
- 4. **View**: See the transcribed text in Bambara.
138
 
139
- ## πŸ“Š Model Details
140
 
141
- - **Model**: [sudoping01/maliba-asr-v1](https://huggingface.co/sudoping01/maliba-asr-v1)
142
- - **Language**: Bambara (bm)
143
- - **Sample Rate**: 16kHz (auto-resampled)
144
- - **Best for**: Clear speech with minimal background noise
145
- </div>
146
  """
147
  )
 
148
 
149
-
150
- def update_audio_preview(audio_file):
151
- return gr.update(value=audio_file, visible=True), ""
152
-
153
- def clear_inputs():
154
- return None, "", gr.update(visible=False), ""
155
-
156
- def transcribe_with_status(audio_file):
157
- if not audio_file:
158
- return "", "**Error**: Please provide an audio file."
159
- result = transcribe_audio(audio_file)
160
- if "Error" in result:
161
- return result, f"**Error**: {result}"
162
- return result, "**Success**: Transcription completed!"
163
-
164
-
165
- audio_input.change(
166
- fn=update_audio_preview,
167
- inputs=audio_input,
168
- outputs=[audio_preview, status_message]
169
- ).then(
170
- fn=transcribe_with_status,
171
- inputs=audio_input,
172
- outputs=[output_text, status_message],
173
  show_progress=True
174
  )
 
 
 
 
 
 
175
 
176
- transcribe_btn.click(
177
- fn=transcribe_with_status,
178
- inputs=audio_input,
179
- outputs=[output_text, status_message],
180
  show_progress=True
181
  )
 
 
182
 
183
- clear_btn.click(
184
- fn=clear_inputs,
185
- outputs=[audio_input, output_text, audio_preview, status_message]
186
- )
 
 
 
 
 
 
 
 
 
 
 
187
 
188
- return demo
 
 
 
1
  import os
2
+ import spaces
3
+ import torch
4
+ import torchaudio
5
+ import gradio as gr
6
+ import logging
7
+ from whosper import WhosperTranscriber
8
+
9
+
10
+ logging.basicConfig(level=logging.INFO)
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ if torch.cuda.is_available():
15
+ device = "cuda"
16
+ logger.info("Using CUDA for inference.")
17
+ elif torch.backends.mps.is_available():
18
+ device = "mps"
19
+ logger.info("Using MPS for inference.")
20
+ else:
21
+ device = "cpu"
22
+ logger.info("Using CPU for inference.")
23
+
24
+
25
+ model_id = "sudoping01/maliba-asr-v1"
26
+ transcriber = WhosperTranscriber(model_id=model_id)
27
+ logger.info(f"Transcriber initialized with model: {model_id}")
28
+
29
+ def resample_audio(audio_path, target_sample_rate=16000):
30
 
 
31
  """
32
+ Converts the audio file to the target sampling rate (16000 Hz).
33
+
34
+ Args:
35
+ audio_path (str): Path to the audio file.
36
+ target_sample_rate (int): The desired sample rate.
37
+ Returns:
38
+ A tensor containing the resampled audio data and the target sample rate.
39
  """
40
+ try:
41
+ waveform, original_sample_rate = torchaudio.load(audio_path)
42
+
43
+ if original_sample_rate != target_sample_rate:
44
+ resampler = torchaudio.transforms.Resample(
45
+ orig_freq=original_sample_rate,
46
+ new_freq=target_sample_rate
47
+ )
48
+ waveform = resampler(waveform)
49
+
50
+ return waveform, target_sample_rate
51
+ except Exception as e:
52
+ logger.error(f"Error resampling audio: {e}")
53
+ raise e
54
+
55
+ @spaces.GPU()
56
+ def transcribe_audio(audio_file):
57
+
58
+ """
59
+ Transcribes the provided audio file into Bambara text using Whosper.
60
 
61
+ Args:
62
+ audio_file: The path to the audio file to transcribe.
63
+ Returns:
64
+ A string representing the transcribed Bambara text.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  """
66
 
67
+ if audio_file is None:
68
+ return "Please provide an audio file for transcription."
69
+
70
+ try:
71
+ logger.info(f"Transcribing audio file: {audio_file}")
72
+
73
+
74
+ result = transcriber.transcribe_audio(audio_file)
75
+
76
+ logger.info("Transcription successful.")
77
+ return result.get("text", "")
78
+
79
+ except Exception as e:
80
+ logger.error(f"Transcription failed: {e}")
81
+ return f"Error during transcription: {str(e)}"
82
+
83
+ def get_example_files(directory="./examples"):
84
+
85
+ """
86
+ Returns a list of audio files from the examples directory.
87
+
88
+ Args:
89
+ directory (str): The directory to search for audio files.
90
+ Returns:
91
+ list: A list of paths to the audio files.
92
+ """
93
+
94
+ if not os.path.exists(directory):
95
+ logger.warning(f"Examples directory {directory} not found.")
96
+ return []
97
+
98
+
99
+ audio_extensions = ['.wav', '.mp3', '.m4a', '.flac', '.ogg']
100
+ audio_files = []
101
+
102
+ try:
103
+ files = os.listdir(directory)
104
+ for file in files:
105
+ if any(file.lower().endswith(ext) for ext in audio_extensions):
106
+ full_path = os.path.abspath(os.path.join(directory, file))
107
+ audio_files.append(full_path)
108
+
109
+ logger.info(f"Found {len(audio_files)} example audio files.")
110
+ return audio_files[:5]
111
+
112
+ except Exception as e:
113
+ logger.error(f"Error reading examples directory: {e}")
114
+ return []
115
+
116
+ def build_interface():
117
+ """
118
+ Builds the Gradio interface for Bambara speech recognition.
119
+ """
120
+
121
+ example_files = get_example_files()
122
+
123
+ with gr.Blocks(title="Bambara Speech Recognition") as demo:
124
  gr.Markdown(
125
  """
126
+ # 🎀 Bambara Automatic Speech Recognition
127
+
128
+ **Powered by MALIBA-AI**
129
+
130
+ Convert Bambara speech to text using our state-of-the-art ASR model. You can either:
131
+ - πŸŽ™οΈ **Record** your voice directly
132
+ - πŸ“ **Upload** an audio file
133
+ - 🎡 **Try** our example audio files
134
+
135
+ ## Supported Audio Formats
136
+ WAV, MP3, M4A, FLAC, OGG
137
  """
138
  )
139
+
 
140
  with gr.Row():
141
+ with gr.Column():
142
+
143
  audio_input = gr.Audio(
144
+ label="🎀 Record or Upload Audio",
145
  type="filepath",
146
+ sources=["microphone", "upload"]
 
147
  )
148
+
149
+ transcribe_btn = gr.Button(
150
+ "πŸ”„ Transcribe Audio",
151
+ variant="primary",
152
+ size="lg"
153
  )
154
+
155
 
156
+ clear_btn = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
157
+
158
+ with gr.Column():
 
 
 
 
 
 
 
 
 
 
 
159
  output_text = gr.Textbox(
160
+ label="πŸ“ Transcribed Text (Bambara)",
161
+ lines=8,
162
  placeholder="Your transcribed Bambara text will appear here...",
163
+ interactive=False
 
 
 
 
 
164
  )
165
+
166
 
 
167
  if example_files:
168
+ gr.Markdown("## 🎡 Try These Examples")
169
+ gr.Examples(
170
+ examples=[[f] for f in example_files],
171
+ inputs=[audio_input],
172
+ outputs=output_text,
173
+ fn=transcribe_audio,
174
+ cache_examples=False,
175
+ label="Example Audio Files"
176
+ )
177
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
  gr.Markdown(
180
  """
181
+ ---
182
+
183
+ ## ℹ️ About This Model
184
+
185
+ - **Model:** [sudoping01/maliba-asr-v1](https://huggingface.co/sudoping01/maliba-asr-v1)
186
+ - **Developer:** MALIBA-AI
187
+ - **Language:** Bambara (bm)
188
+ - **Task:** Automatic Speech Recognition (ASR)
189
+ - **Sample Rate:** 16kHz (automatically resampled)
190
+
191
+ ## πŸš€ How to Use
192
 
193
+ 1. **Record Audio:** Click the microphone button and speak in Bambara
194
+ 2. **Upload File:** Click the upload button to select an audio file
195
+ 3. **Transcribe:** Click the "Transcribe Audio" button
196
+ 4. **View Results:** See your transcribed text in Bambara
197
 
198
+ ## πŸ“Š Performance Notes
199
 
200
+ - Best results with clear speech and minimal background noise
201
+ - Supports various audio formats and durations
202
+ - Optimized for Bambara language patterns and phonetics
 
 
203
  """
204
  )
205
+
206
 
207
+ transcribe_btn.click(
208
+ fn=transcribe_audio,
209
+ inputs=[audio_input],
210
+ outputs=output_text,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  show_progress=True
212
  )
213
+
214
+ clear_btn.click(
215
+ fn=lambda: (None, ""),
216
+ outputs=[audio_input, output_text]
217
+ )
218
+
219
 
220
+ audio_input.change(
221
+ fn=transcribe_audio,
222
+ inputs=[audio_input],
223
+ outputs=output_text,
224
  show_progress=True
225
  )
226
+
227
+ return demo
228
 
229
+ def main():
230
+ """
231
+ Main function to launch the Gradio interface.
232
+ """
233
+ logger.info("Starting Bambara ASR Gradio interface.")
234
+
235
+
236
+ interface = build_interface()
237
+ interface.launch(
238
+ share=False,
239
+ server_name="0.0.0.0",
240
+ server_port=7860
241
+ )
242
+
243
+ logger.info("Gradio interface launched successfully.")
244
 
245
+ if __name__ == "__main__":
246
+ main()