Spaces:
Runtime error
Runtime error
[demo] feedbacks
Browse files
app.py
CHANGED
@@ -9,6 +9,9 @@ import librosa
|
|
9 |
import torch
|
10 |
import torch.cuda
|
11 |
import gc
|
|
|
|
|
|
|
12 |
|
13 |
# Check if required packages are installed, if not install them
|
14 |
try:
|
@@ -141,6 +144,32 @@ def transcribe_italian(audio_file):
|
|
141 |
def transcribe_german(audio_file):
|
142 |
return transcribe_audio(audio_file, "<deu>")
|
143 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
144 |
# Create the Gradio interface with tabs
|
145 |
demo = gr.Blocks(title="NVIDIA Research Multilingual Demo")
|
146 |
|
@@ -151,7 +180,7 @@ with demo:
|
|
151 |
with gr.Tabs():
|
152 |
with gr.TabItem("Microphone Recording"):
|
153 |
language_mic = gr.Radio(
|
154 |
-
["English", "Mandarin", "Japanese", "Korean", "Thai", "Italian", "German"],
|
155 |
label="Select Language",
|
156 |
value="English"
|
157 |
)
|
@@ -163,6 +192,13 @@ with demo:
|
|
163 |
with gr.Column():
|
164 |
mic_output = gr.Textbox(label="Transcription")
|
165 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
def transcribe_mic(audio, lang):
|
167 |
lang_map = {
|
168 |
"English": "<eng>",
|
@@ -176,6 +212,17 @@ with demo:
|
|
176 |
return transcribe_audio(audio, lang_map.get(lang, "<eng>"))
|
177 |
|
178 |
mic_button.click(fn=transcribe_mic, inputs=[mic_input, language_mic], outputs=mic_output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
|
180 |
with gr.TabItem("English"):
|
181 |
with gr.Row():
|
@@ -185,6 +232,13 @@ with demo:
|
|
185 |
with gr.Column():
|
186 |
en_output = gr.Textbox(label="Speech Transcription")
|
187 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
# Add example if the file exists
|
189 |
if os.path.exists("wav_en_sample_48k.wav"):
|
190 |
gr.Examples(
|
@@ -194,6 +248,16 @@ with demo:
|
|
194 |
|
195 |
en_button.click(fn=transcribe_english, inputs=en_input, outputs=en_output)
|
196 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
197 |
with gr.TabItem("Mandarin"):
|
198 |
with gr.Row():
|
199 |
with gr.Column():
|
@@ -202,6 +266,13 @@ with demo:
|
|
202 |
with gr.Column():
|
203 |
zh_output = gr.Textbox(label="Speech Transcription")
|
204 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
205 |
# Add example if the file exists
|
206 |
if os.path.exists("wav_zh_tw_sample_16k.wav"):
|
207 |
gr.Examples(
|
@@ -211,6 +282,16 @@ with demo:
|
|
211 |
|
212 |
zh_button.click(fn=transcribe_chinese, inputs=zh_input, outputs=zh_output)
|
213 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
214 |
with gr.TabItem("Japanese"):
|
215 |
with gr.Row():
|
216 |
with gr.Column():
|
@@ -219,6 +300,13 @@ with demo:
|
|
219 |
with gr.Column():
|
220 |
jp_output = gr.Textbox(label="Speech Transcription")
|
221 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
222 |
# Add example if the file exists
|
223 |
if os.path.exists("wav_jp_sample_48k.wav"):
|
224 |
gr.Examples(
|
@@ -228,6 +316,16 @@ with demo:
|
|
228 |
|
229 |
jp_button.click(fn=transcribe_japanese, inputs=jp_input, outputs=jp_output)
|
230 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
231 |
with gr.TabItem("Korean"):
|
232 |
with gr.Row():
|
233 |
with gr.Column():
|
@@ -236,6 +334,13 @@ with demo:
|
|
236 |
with gr.Column():
|
237 |
kr_output = gr.Textbox(label="Speech Transcription")
|
238 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
239 |
# Add example if the file exists
|
240 |
if os.path.exists("wav_kr_sample_48k.wav"):
|
241 |
gr.Examples(
|
@@ -245,6 +350,16 @@ with demo:
|
|
245 |
|
246 |
kr_button.click(fn=transcribe_korean, inputs=kr_input, outputs=kr_output)
|
247 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
248 |
with gr.TabItem("Thai"):
|
249 |
with gr.Row():
|
250 |
with gr.Column():
|
@@ -253,6 +368,13 @@ with demo:
|
|
253 |
with gr.Column():
|
254 |
th_output = gr.Textbox(label="Speech Transcription")
|
255 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
256 |
# Add example if the file exists
|
257 |
if os.path.exists("wav_thai_sample.wav"):
|
258 |
gr.Examples(
|
@@ -262,6 +384,16 @@ with demo:
|
|
262 |
|
263 |
th_button.click(fn=transcribe_thai, inputs=th_input, outputs=th_output)
|
264 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
265 |
with gr.TabItem("Italian"):
|
266 |
with gr.Row():
|
267 |
with gr.Column():
|
@@ -270,6 +402,13 @@ with demo:
|
|
270 |
with gr.Column():
|
271 |
it_output = gr.Textbox(label="Speech Transcription")
|
272 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
273 |
# Add example if the file exists
|
274 |
if os.path.exists("wav_it_sample.wav"):
|
275 |
gr.Examples(
|
@@ -278,6 +417,16 @@ with demo:
|
|
278 |
)
|
279 |
|
280 |
it_button.click(fn=transcribe_italian, inputs=it_input, outputs=it_output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
281 |
|
282 |
with gr.TabItem("German"):
|
283 |
with gr.Row():
|
@@ -287,6 +436,13 @@ with demo:
|
|
287 |
with gr.Column():
|
288 |
de_output = gr.Textbox(label="Speech Transcription")
|
289 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
290 |
# Add example if the file exists
|
291 |
if os.path.exists("wav_de_sample.wav"):
|
292 |
gr.Examples(
|
@@ -295,6 +451,16 @@ with demo:
|
|
295 |
)
|
296 |
|
297 |
de_button.click(fn=transcribe_german, inputs=de_input, outputs=de_output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
298 |
|
299 |
# Launch the app with Hugging Face Spaces compatible settings
|
300 |
if __name__ == "__main__":
|
|
|
9 |
import torch
|
10 |
import torch.cuda
|
11 |
import gc
|
12 |
+
import json
|
13 |
+
import datetime
|
14 |
+
from pathlib import Path
|
15 |
|
16 |
# Check if required packages are installed, if not install them
|
17 |
try:
|
|
|
144 |
def transcribe_german(audio_file):
|
145 |
return transcribe_audio(audio_file, "<deu>")
|
146 |
|
147 |
+
# Create a function to save feedback
|
148 |
+
def save_feedback(transcription, rating, language, audio_path=None):
|
149 |
+
"""Save user feedback to a JSON file"""
|
150 |
+
# Create feedback directory if it doesn't exist
|
151 |
+
feedback_dir = Path("feedback_data")
|
152 |
+
feedback_dir.mkdir(exist_ok=True)
|
153 |
+
|
154 |
+
# Create a unique filename based on timestamp
|
155 |
+
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
156 |
+
feedback_file = feedback_dir / f"feedback_{timestamp}.json"
|
157 |
+
|
158 |
+
# Prepare feedback data
|
159 |
+
feedback_data = {
|
160 |
+
"timestamp": timestamp,
|
161 |
+
"language": language,
|
162 |
+
"transcription": transcription,
|
163 |
+
"rating": rating,
|
164 |
+
"audio_path": str(audio_path) if audio_path else None
|
165 |
+
}
|
166 |
+
|
167 |
+
# Save to JSON file
|
168 |
+
with open(feedback_file, "w", encoding="utf-8") as f:
|
169 |
+
json.dump(feedback_data, f, ensure_ascii=False, indent=2)
|
170 |
+
|
171 |
+
return "Thank you for your feedback!"
|
172 |
+
|
173 |
# Create the Gradio interface with tabs
|
174 |
demo = gr.Blocks(title="NVIDIA Research Multilingual Demo")
|
175 |
|
|
|
180 |
with gr.Tabs():
|
181 |
with gr.TabItem("Microphone Recording"):
|
182 |
language_mic = gr.Radio(
|
183 |
+
["English", "English-Mandarin", "Japanese", "Korean", "Thai", "Italian", "German"],
|
184 |
label="Select Language",
|
185 |
value="English"
|
186 |
)
|
|
|
192 |
with gr.Column():
|
193 |
mic_output = gr.Textbox(label="Transcription")
|
194 |
|
195 |
+
# Add feedback components
|
196 |
+
with gr.Row():
|
197 |
+
mic_rating = gr.Slider(minimum=1, maximum=5, step=1, value=3,
|
198 |
+
label="Rate the transcription quality (1=worst, 5=best)")
|
199 |
+
mic_feedback_btn = gr.Button("Submit Feedback")
|
200 |
+
mic_feedback_msg = gr.Textbox(label="Feedback Status", visible=True)
|
201 |
+
|
202 |
def transcribe_mic(audio, lang):
|
203 |
lang_map = {
|
204 |
"English": "<eng>",
|
|
|
212 |
return transcribe_audio(audio, lang_map.get(lang, "<eng>"))
|
213 |
|
214 |
mic_button.click(fn=transcribe_mic, inputs=[mic_input, language_mic], outputs=mic_output)
|
215 |
+
|
216 |
+
# Add feedback submission function
|
217 |
+
def submit_mic_feedback(transcription, rating, language):
|
218 |
+
lang_name = language # Already a string like "English"
|
219 |
+
return save_feedback(transcription, rating, lang_name)
|
220 |
+
|
221 |
+
mic_feedback_btn.click(
|
222 |
+
fn=submit_mic_feedback,
|
223 |
+
inputs=[mic_output, mic_rating, language_mic],
|
224 |
+
outputs=mic_feedback_msg
|
225 |
+
)
|
226 |
|
227 |
with gr.TabItem("English"):
|
228 |
with gr.Row():
|
|
|
232 |
with gr.Column():
|
233 |
en_output = gr.Textbox(label="Speech Transcription")
|
234 |
|
235 |
+
# Add feedback components
|
236 |
+
with gr.Row():
|
237 |
+
en_rating = gr.Slider(minimum=1, maximum=5, step=1, value=3,
|
238 |
+
label="Rate the transcription quality (1=worst, 5=best)")
|
239 |
+
en_feedback_btn = gr.Button("Submit Feedback")
|
240 |
+
en_feedback_msg = gr.Textbox(label="Feedback Status", visible=True)
|
241 |
+
|
242 |
# Add example if the file exists
|
243 |
if os.path.exists("wav_en_sample_48k.wav"):
|
244 |
gr.Examples(
|
|
|
248 |
|
249 |
en_button.click(fn=transcribe_english, inputs=en_input, outputs=en_output)
|
250 |
|
251 |
+
# Add feedback submission
|
252 |
+
def submit_en_feedback(transcription, rating, audio_path):
|
253 |
+
return save_feedback(transcription, rating, "English", audio_path)
|
254 |
+
|
255 |
+
en_feedback_btn.click(
|
256 |
+
fn=submit_en_feedback,
|
257 |
+
inputs=[en_output, en_rating, en_input],
|
258 |
+
outputs=en_feedback_msg
|
259 |
+
)
|
260 |
+
|
261 |
with gr.TabItem("Mandarin"):
|
262 |
with gr.Row():
|
263 |
with gr.Column():
|
|
|
266 |
with gr.Column():
|
267 |
zh_output = gr.Textbox(label="Speech Transcription")
|
268 |
|
269 |
+
# Add feedback components
|
270 |
+
with gr.Row():
|
271 |
+
zh_rating = gr.Slider(minimum=1, maximum=5, step=1, value=3,
|
272 |
+
label="Rate the transcription quality (1=worst, 5=best)")
|
273 |
+
zh_feedback_btn = gr.Button("Submit Feedback")
|
274 |
+
zh_feedback_msg = gr.Textbox(label="Feedback Status", visible=True)
|
275 |
+
|
276 |
# Add example if the file exists
|
277 |
if os.path.exists("wav_zh_tw_sample_16k.wav"):
|
278 |
gr.Examples(
|
|
|
282 |
|
283 |
zh_button.click(fn=transcribe_chinese, inputs=zh_input, outputs=zh_output)
|
284 |
|
285 |
+
# Add feedback submission
|
286 |
+
def submit_zh_feedback(transcription, rating, audio_path):
|
287 |
+
return save_feedback(transcription, rating, "Mandarin", audio_path)
|
288 |
+
|
289 |
+
zh_feedback_btn.click(
|
290 |
+
fn=submit_zh_feedback,
|
291 |
+
inputs=[zh_output, zh_rating, zh_input],
|
292 |
+
outputs=zh_feedback_msg
|
293 |
+
)
|
294 |
+
|
295 |
with gr.TabItem("Japanese"):
|
296 |
with gr.Row():
|
297 |
with gr.Column():
|
|
|
300 |
with gr.Column():
|
301 |
jp_output = gr.Textbox(label="Speech Transcription")
|
302 |
|
303 |
+
# Add feedback components
|
304 |
+
with gr.Row():
|
305 |
+
jp_rating = gr.Slider(minimum=1, maximum=5, step=1, value=3,
|
306 |
+
label="Rate the transcription quality (1=worst, 5=best)")
|
307 |
+
jp_feedback_btn = gr.Button("Submit Feedback")
|
308 |
+
jp_feedback_msg = gr.Textbox(label="Feedback Status", visible=True)
|
309 |
+
|
310 |
# Add example if the file exists
|
311 |
if os.path.exists("wav_jp_sample_48k.wav"):
|
312 |
gr.Examples(
|
|
|
316 |
|
317 |
jp_button.click(fn=transcribe_japanese, inputs=jp_input, outputs=jp_output)
|
318 |
|
319 |
+
# Add feedback submission
|
320 |
+
def submit_jp_feedback(transcription, rating, audio_path):
|
321 |
+
return save_feedback(transcription, rating, "Japanese", audio_path)
|
322 |
+
|
323 |
+
jp_feedback_btn.click(
|
324 |
+
fn=submit_jp_feedback,
|
325 |
+
inputs=[jp_output, jp_rating, jp_input],
|
326 |
+
outputs=jp_feedback_msg
|
327 |
+
)
|
328 |
+
|
329 |
with gr.TabItem("Korean"):
|
330 |
with gr.Row():
|
331 |
with gr.Column():
|
|
|
334 |
with gr.Column():
|
335 |
kr_output = gr.Textbox(label="Speech Transcription")
|
336 |
|
337 |
+
# Add feedback components
|
338 |
+
with gr.Row():
|
339 |
+
kr_rating = gr.Slider(minimum=1, maximum=5, step=1, value=3,
|
340 |
+
label="Rate the transcription quality (1=worst, 5=best)")
|
341 |
+
kr_feedback_btn = gr.Button("Submit Feedback")
|
342 |
+
kr_feedback_msg = gr.Textbox(label="Feedback Status", visible=True)
|
343 |
+
|
344 |
# Add example if the file exists
|
345 |
if os.path.exists("wav_kr_sample_48k.wav"):
|
346 |
gr.Examples(
|
|
|
350 |
|
351 |
kr_button.click(fn=transcribe_korean, inputs=kr_input, outputs=kr_output)
|
352 |
|
353 |
+
# Add feedback submission
|
354 |
+
def submit_kr_feedback(transcription, rating, audio_path):
|
355 |
+
return save_feedback(transcription, rating, "Korean", audio_path)
|
356 |
+
|
357 |
+
kr_feedback_btn.click(
|
358 |
+
fn=submit_kr_feedback,
|
359 |
+
inputs=[kr_output, kr_rating, kr_input],
|
360 |
+
outputs=kr_feedback_msg
|
361 |
+
)
|
362 |
+
|
363 |
with gr.TabItem("Thai"):
|
364 |
with gr.Row():
|
365 |
with gr.Column():
|
|
|
368 |
with gr.Column():
|
369 |
th_output = gr.Textbox(label="Speech Transcription")
|
370 |
|
371 |
+
# Add feedback components
|
372 |
+
with gr.Row():
|
373 |
+
th_rating = gr.Slider(minimum=1, maximum=5, step=1, value=3,
|
374 |
+
label="Rate the transcription quality (1=worst, 5=best)")
|
375 |
+
th_feedback_btn = gr.Button("Submit Feedback")
|
376 |
+
th_feedback_msg = gr.Textbox(label="Feedback Status", visible=True)
|
377 |
+
|
378 |
# Add example if the file exists
|
379 |
if os.path.exists("wav_thai_sample.wav"):
|
380 |
gr.Examples(
|
|
|
384 |
|
385 |
th_button.click(fn=transcribe_thai, inputs=th_input, outputs=th_output)
|
386 |
|
387 |
+
# Add feedback submission
|
388 |
+
def submit_th_feedback(transcription, rating, audio_path):
|
389 |
+
return save_feedback(transcription, rating, "Thai", audio_path)
|
390 |
+
|
391 |
+
th_feedback_btn.click(
|
392 |
+
fn=submit_th_feedback,
|
393 |
+
inputs=[th_output, th_rating, th_input],
|
394 |
+
outputs=th_feedback_msg
|
395 |
+
)
|
396 |
+
|
397 |
with gr.TabItem("Italian"):
|
398 |
with gr.Row():
|
399 |
with gr.Column():
|
|
|
402 |
with gr.Column():
|
403 |
it_output = gr.Textbox(label="Speech Transcription")
|
404 |
|
405 |
+
# Add feedback components
|
406 |
+
with gr.Row():
|
407 |
+
it_rating = gr.Slider(minimum=1, maximum=5, step=1, value=3,
|
408 |
+
label="Rate the transcription quality (1=worst, 5=best)")
|
409 |
+
it_feedback_btn = gr.Button("Submit Feedback")
|
410 |
+
it_feedback_msg = gr.Textbox(label="Feedback Status", visible=True)
|
411 |
+
|
412 |
# Add example if the file exists
|
413 |
if os.path.exists("wav_it_sample.wav"):
|
414 |
gr.Examples(
|
|
|
417 |
)
|
418 |
|
419 |
it_button.click(fn=transcribe_italian, inputs=it_input, outputs=it_output)
|
420 |
+
|
421 |
+
# Add feedback submission
|
422 |
+
def submit_it_feedback(transcription, rating, audio_path):
|
423 |
+
return save_feedback(transcription, rating, "Italian", audio_path)
|
424 |
+
|
425 |
+
it_feedback_btn.click(
|
426 |
+
fn=submit_it_feedback,
|
427 |
+
inputs=[it_output, it_rating, it_input],
|
428 |
+
outputs=it_feedback_msg
|
429 |
+
)
|
430 |
|
431 |
with gr.TabItem("German"):
|
432 |
with gr.Row():
|
|
|
436 |
with gr.Column():
|
437 |
de_output = gr.Textbox(label="Speech Transcription")
|
438 |
|
439 |
+
# Add feedback components
|
440 |
+
with gr.Row():
|
441 |
+
de_rating = gr.Slider(minimum=1, maximum=5, step=1, value=3,
|
442 |
+
label="Rate the transcription quality (1=worst, 5=best)")
|
443 |
+
de_feedback_btn = gr.Button("Submit Feedback")
|
444 |
+
de_feedback_msg = gr.Textbox(label="Feedback Status", visible=True)
|
445 |
+
|
446 |
# Add example if the file exists
|
447 |
if os.path.exists("wav_de_sample.wav"):
|
448 |
gr.Examples(
|
|
|
451 |
)
|
452 |
|
453 |
de_button.click(fn=transcribe_german, inputs=de_input, outputs=de_output)
|
454 |
+
|
455 |
+
# Add feedback submission
|
456 |
+
def submit_de_feedback(transcription, rating, audio_path):
|
457 |
+
return save_feedback(transcription, rating, "German", audio_path)
|
458 |
+
|
459 |
+
de_feedback_btn.click(
|
460 |
+
fn=submit_de_feedback,
|
461 |
+
inputs=[de_output, de_rating, de_input],
|
462 |
+
outputs=de_feedback_msg
|
463 |
+
)
|
464 |
|
465 |
# Launch the app with Hugging Face Spaces compatible settings
|
466 |
if __name__ == "__main__":
|