sudoping01 commited on
Commit
6f340af
Β·
verified Β·
1 Parent(s): 13fccc4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +152 -210
app.py CHANGED
@@ -1,246 +1,188 @@
1
- import os
2
- import spaces
3
- import torch
4
- import torchaudio
5
  import gradio as gr
6
- import logging
7
- from whosper import WhosperTranscriber
8
-
9
-
10
- logging.basicConfig(level=logging.INFO)
11
- logger = logging.getLogger(__name__)
12
-
13
-
14
- if torch.cuda.is_available():
15
- device = "cuda"
16
- logger.info("Using CUDA for inference.")
17
- elif torch.backends.mps.is_available():
18
- device = "mps"
19
- logger.info("Using MPS for inference.")
20
- else:
21
- device = "cpu"
22
- logger.info("Using CPU for inference.")
23
-
24
-
25
- model_id = "sudoping01/maliba-asr-v1"
26
- transcriber = WhosperTranscriber(model_id=model_id)
27
- logger.info(f"Transcriber initialized with model: {model_id}")
28
-
29
- def resample_audio(audio_path, target_sample_rate=16000):
30
-
31
- """
32
- Converts the audio file to the target sampling rate (16000 Hz).
33
-
34
- Args:
35
- audio_path (str): Path to the audio file.
36
- target_sample_rate (int): The desired sample rate.
37
- Returns:
38
- A tensor containing the resampled audio data and the target sample rate.
39
- """
40
- try:
41
- waveform, original_sample_rate = torchaudio.load(audio_path)
42
-
43
- if original_sample_rate != target_sample_rate:
44
- resampler = torchaudio.transforms.Resample(
45
- orig_freq=original_sample_rate,
46
- new_freq=target_sample_rate
47
- )
48
- waveform = resampler(waveform)
49
-
50
- return waveform, target_sample_rate
51
- except Exception as e:
52
- logger.error(f"Error resampling audio: {e}")
53
- raise e
54
-
55
- @spaces.GPU()
56
- def transcribe_audio(audio_file):
57
-
58
- """
59
- Transcribes the provided audio file into Bambara text using Whosper.
60
-
61
- Args:
62
- audio_file: The path to the audio file to transcribe.
63
- Returns:
64
- A string representing the transcribed Bambara text.
65
- """
66
-
67
- if audio_file is None:
68
- return "Please provide an audio file for transcription."
69
-
70
- try:
71
- logger.info(f"Transcribing audio file: {audio_file}")
72
-
73
-
74
- result = transcriber.transcribe_audio(audio_file)
75
-
76
- logger.info("Transcription successful.")
77
- return result
78
-
79
- except Exception as e:
80
- logger.error(f"Transcription failed: {e}")
81
- return f"Error during transcription: {str(e)}"
82
-
83
- def get_example_files(directory="./examples"):
84
-
85
- """
86
- Returns a list of audio files from the examples directory.
87
-
88
- Args:
89
- directory (str): The directory to search for audio files.
90
- Returns:
91
- list: A list of paths to the audio files.
92
- """
93
-
94
- if not os.path.exists(directory):
95
- logger.warning(f"Examples directory {directory} not found.")
96
- return []
97
-
98
-
99
- audio_extensions = ['.wav', '.mp3', '.m4a', '.flac', '.ogg']
100
- audio_files = []
101
-
102
- try:
103
- files = os.listdir(directory)
104
- for file in files:
105
- if any(file.lower().endswith(ext) for ext in audio_extensions):
106
- full_path = os.path.abspath(os.path.join(directory, file))
107
- audio_files.append(full_path)
108
-
109
- logger.info(f"Found {len(audio_files)} example audio files.")
110
- return audio_files[:5]
111
-
112
- except Exception as e:
113
- logger.error(f"Error reading examples directory: {e}")
114
- return []
115
 
116
  def build_interface():
117
  """
118
- Builds the Gradio interface for Bambara speech recognition.
119
  """
120
-
121
  example_files = get_example_files()
122
 
123
- with gr.Blocks(title="Bambara Speech Recognition") as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  gr.Markdown(
125
  """
126
- # 🎀 Bambara Automatic Speech Recognition
127
-
128
- **Powered by MALIBA-AI**
129
-
130
- Convert Bambara speech to text using our state-of-the-art ASR model. You can either:
131
- - πŸŽ™οΈ **Record** your voice directly
132
- - πŸ“ **Upload** an audio file
133
- - 🎡 **Try** our example audio files
134
-
135
- ## Supported Audio Formats
136
- WAV, MP3, M4A, FLAC, OGG
137
  """
138
  )
139
-
140
- with gr.Row():
141
- with gr.Column():
142
 
 
 
 
 
143
  audio_input = gr.Audio(
144
- label="🎀 Record or Upload Audio",
145
  type="filepath",
146
- sources=["microphone", "upload"]
 
147
  )
148
-
149
- transcribe_btn = gr.Button(
150
- "πŸ”„ Transcribe Audio",
151
- variant="primary",
152
- size="lg"
153
  )
154
-
155
 
156
- clear_btn = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
157
-
158
- with gr.Column():
 
 
 
 
 
 
 
 
 
 
 
159
  output_text = gr.Textbox(
160
- label="πŸ“ Transcribed Text (Bambara)",
161
- lines=8,
162
  placeholder="Your transcribed Bambara text will appear here...",
163
- interactive=False
 
164
  )
165
-
166
- # Examples section
 
 
 
 
167
  if example_files:
168
- gr.Markdown("## 🎡 Try These Examples")
169
- gr.Examples(
170
- examples=[[f] for f in example_files],
171
- inputs=[audio_input],
172
- outputs=output_text,
173
- fn=transcribe_audio,
174
- cache_examples=False,
175
- label="Example Audio Files"
176
- )
177
-
178
- # Information section
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  gr.Markdown(
180
  """
181
- ---
182
-
183
- ## ℹ️ About This Model
184
-
185
- - **Model:** [sudoping01/maliba-asr-v1](https://huggingface.co/sudoping01/maliba-asr-v1)
186
- - **Developer:** MALIBA-AI
187
- - **Language:** Bambara (bm)
188
- - **Task:** Automatic Speech Recognition (ASR)
189
- - **Sample Rate:** 16kHz (automatically resampled)
190
 
191
- ## πŸš€ How to Use
 
 
 
192
 
193
- 1. **Record Audio:** Click the microphone button and speak in Bambara
194
- 2. **Upload File:** Click the upload button to select an audio file
195
- 3. **Transcribe:** Click the "Transcribe Audio" button
196
- 4. **View Results:** See your transcribed text in Bambara
197
 
198
- ## πŸ“Š Performance Notes
199
-
200
- - Best results with clear speech and minimal background noise
201
- - Supports various audio formats and durations
202
- - Optimized for Bambara language patterns and phonetics
203
  """
204
  )
205
-
206
 
207
- transcribe_btn.click(
208
- fn=transcribe_audio,
209
- inputs=[audio_input],
210
- outputs=output_text,
211
- show_progress=True
212
- )
213
-
214
- clear_btn.click(
215
- fn=lambda: (None, ""),
216
- outputs=[audio_input, output_text]
217
- )
218
-
 
 
 
219
 
220
  audio_input.change(
221
- fn=transcribe_audio,
222
- inputs=[audio_input],
223
- outputs=output_text,
 
 
 
 
224
  show_progress=True
225
  )
226
-
227
- return demo
228
 
229
- def main():
230
- """
231
- Main function to launch the Gradio interface.
232
- """
233
- logger.info("Starting Bambara ASR Gradio interface.")
234
-
235
 
236
- interface = build_interface()
237
- interface.launch(
238
- share=False,
239
- server_name="0.0.0.0",
240
- server_port=7860
241
- )
242
-
243
- logger.info("Gradio interface launched successfully.")
244
 
245
- if __name__ == "__main__":
246
- main()
 
 
 
 
 
1
  import gradio as gr
2
+ import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  def build_interface():
5
  """
6
+ Builds an enhanced Gradio interface for Bambara speech recognition.
7
  """
 
8
  example_files = get_example_files()
9
 
10
+ custom_css = """
11
+ .gr-button-primary {
12
+ background-color: #2c5282 !important;
13
+ color: white !important;
14
+ border-radius: 8px !important;
15
+ font-weight: bold !important;
16
+ }
17
+ .gr-button-secondary {
18
+ background-color: #e2e8f0 !important;
19
+ color: #2d3748 !important;
20
+ border-radius: 8px !important;
21
+ }
22
+ .example-container {
23
+ background-color: #f7fafc;
24
+ padding: 16px;
25
+ border-radius: 8px;
26
+ margin-top: 16px;
27
+ }
28
+ .gr-textbox {
29
+ border-radius: 8px !important;
30
+ border: 1px solid #cbd5e0 !important;
31
+ }
32
+ .gr-audio {
33
+ border-radius: 8px !important;
34
+ }
35
+ .header {
36
+ text-align: center;
37
+ color: #2d3748;
38
+ }
39
+ .info-section {
40
+ background-color: #edf2f7;
41
+ padding: 16px;
42
+ border-radius: 8px;
43
+ margin-top: 16px;
44
+ }
45
+ """
46
+
47
+ with gr.Blocks(title="Bambara Speech Recognition", css=custom_css) as demo:
48
+ # Header
49
  gr.Markdown(
50
  """
51
+ <h1 class="header">🎀 Bambara Speech Recognition</h1>
52
+ <p style="text-align: center; color: #4a5568;">
53
+ Powered by <b>MALIBA-AI</b> | Convert Bambara speech to text effortlessly
54
+ </p>
 
 
 
 
 
 
 
55
  """
56
  )
 
 
 
57
 
58
+ # Main interaction section
59
+ with gr.Row():
60
+ with gr.Column(scale=1):
61
+ gr.Markdown("### πŸŽ™οΈ Record or Upload Audio")
62
  audio_input = gr.Audio(
63
+ label="Record or Upload Audio",
64
  type="filepath",
65
+ sources=["microphone", "upload"],
66
+ show_label=False
67
  )
68
+ audio_preview = gr.Audio(
69
+ label="Preview Your Audio",
70
+ interactive=False,
71
+ visible=False
 
72
  )
 
73
 
74
+ with gr.Row():
75
+ transcribe_btn = gr.Button(
76
+ "πŸ”„ Transcribe Audio",
77
+ variant="primary",
78
+ size="lg"
79
+ )
80
+ clear_btn = gr.Button(
81
+ "πŸ—‘οΈ Clear",
82
+ variant="secondary",
83
+ size="lg"
84
+ )
85
+
86
+ with gr.Column(scale=1):
87
+ gr.Markdown("### πŸ“ Transcription Output")
88
  output_text = gr.Textbox(
89
+ label="Transcribed Text (Bambara)",
90
+ lines=6,
91
  placeholder="Your transcribed Bambara text will appear here...",
92
+ interactive=False,
93
+ show_copy_button=True
94
  )
95
+ status_message = gr.Markdown(
96
+ value="",
97
+ visible=False
98
+ )
99
+
100
+ # Example audio section
101
  if example_files:
102
+ gr.Markdown("## 🎡 Try Example Audio Files")
103
+ with gr.Group(elem_classes="example-container"):
104
+ gr.Markdown(
105
+ """
106
+ Listen to these sample Bambara audio files and transcribe them with one click.
107
+ """
108
+ )
109
+ for idx, file in enumerate(example_files):
110
+ with gr.Row():
111
+ gr.Audio(
112
+ value=file,
113
+ label=f"Example {idx + 1}: {os.path.basename(file)}",
114
+ interactive=False,
115
+ show_label=True
116
+ )
117
+ gr.Button(
118
+ f"Transcribe Example {idx + 1}",
119
+ variant="primary",
120
+ size="sm"
121
+ ).click(
122
+ fn=transcribe_audio,
123
+ inputs=gr.State(value=file),
124
+ outputs=[output_text, status_message],
125
+ show_progress=True,
126
+ _js="() => {return {show_progress: true}}"
127
+ )
128
+
129
  gr.Markdown(
130
  """
131
+ <div class="info-section">
132
+ ## ℹ️ How to Use
 
 
 
 
 
 
 
133
 
134
+ 1. **Record**: Click the microphone to speak in Bambara.
135
+ 2. **Upload**: Select an audio file (WAV, MP3, M4A, FLAC, OGG).
136
+ 3. **Transcribe**: Click "Transcribe Audio" or try an example.
137
+ 4. **View**: See the transcribed text in Bambara.
138
 
139
+ ## πŸ“Š Model Details
 
 
 
140
 
141
+ - **Model**: [sudoping01/maliba-asr-v1](https://huggingface.co/sudoping01/maliba-asr-v1)
142
+ - **Language**: Bambara (bm)
143
+ - **Sample Rate**: 16kHz (auto-resampled)
144
+ - **Best for**: Clear speech with minimal background noise
145
+ </div>
146
  """
147
  )
 
148
 
149
+
150
+ def update_audio_preview(audio_file):
151
+ return gr.update(value=audio_file, visible=True), ""
152
+
153
+ def clear_inputs():
154
+ return None, "", gr.update(visible=False), ""
155
+
156
+ def transcribe_with_status(audio_file):
157
+ if not audio_file:
158
+ return "", "**Error**: Please provide an audio file."
159
+ result = transcribe_audio(audio_file)
160
+ if "Error" in result:
161
+ return result, f"**Error**: {result}"
162
+ return result, "**Success**: Transcription completed!"
163
+
164
 
165
  audio_input.change(
166
+ fn=update_audio_preview,
167
+ inputs=audio_input,
168
+ outputs=[audio_preview, status_message]
169
+ ).then(
170
+ fn=transcribe_with_status,
171
+ inputs=audio_input,
172
+ outputs=[output_text, status_message],
173
  show_progress=True
174
  )
 
 
175
 
176
+ transcribe_btn.click(
177
+ fn=transcribe_with_status,
178
+ inputs=audio_input,
179
+ outputs=[output_text, status_message],
180
+ show_progress=True
181
+ )
182
 
183
+ clear_btn.click(
184
+ fn=clear_inputs,
185
+ outputs=[audio_input, output_text, audio_preview, status_message]
186
+ )
 
 
 
 
187
 
188
+ return demo