Spaces:
Runtime error
Runtime error
[test] translation output
Browse files
app.py
CHANGED
@@ -123,6 +123,7 @@ def transcribe_audio(audio_file, language):
|
|
123 |
# Update the language symbol if needed
|
124 |
model.beam_search.hyps = None
|
125 |
model.beam_search.pre_beam_score_key = None
|
|
|
126 |
|
127 |
if language != None:
|
128 |
model.lang_sym = language
|
@@ -136,6 +137,44 @@ def transcribe_audio(audio_file, language):
|
|
136 |
|
137 |
return text
|
138 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
# Function to handle English transcription
|
140 |
def transcribe_english(audio_file):
|
141 |
return transcribe_audio(audio_file, "<eng>")
|
@@ -252,6 +291,7 @@ with demo:
|
|
252 |
mic_button = gr.Button("Transcribe Recording")
|
253 |
with gr.Column():
|
254 |
mic_output = gr.Textbox(label="Transcription")
|
|
|
255 |
|
256 |
# Add feedback components
|
257 |
with gr.Row():
|
@@ -273,11 +313,32 @@ with demo:
|
|
273 |
|
274 |
# Special handling for Chinese with variant selection
|
275 |
if lang == "Mandarin" and chinese_variant:
|
276 |
-
|
|
|
|
|
277 |
|
278 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
279 |
|
280 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
281 |
|
282 |
# Add feedback submission function
|
283 |
def submit_mic_feedback(transcription, rating, language, chinese_variant):
|
@@ -338,6 +399,7 @@ with demo:
|
|
338 |
zh_button = gr.Button("Transcribe Speech")
|
339 |
with gr.Column():
|
340 |
zh_output = gr.Textbox(label="Speech Transcription")
|
|
|
341 |
|
342 |
# Add feedback components
|
343 |
with gr.Row():
|
@@ -353,11 +415,17 @@ with demo:
|
|
353 |
inputs=zh_input
|
354 |
)
|
355 |
|
356 |
-
# Update the click function to include the Chinese variant
|
357 |
def transcribe_chinese_with_variant(audio_file, variant):
|
358 |
-
|
359 |
-
|
360 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
361 |
|
362 |
# Update feedback submission to include variant
|
363 |
def submit_zh_feedback(transcription, rating, audio_path, variant):
|
@@ -376,6 +444,7 @@ with demo:
|
|
376 |
jp_button = gr.Button("Transcribe Speech")
|
377 |
with gr.Column():
|
378 |
jp_output = gr.Textbox(label="Speech Transcription")
|
|
|
379 |
|
380 |
# Add feedback components
|
381 |
with gr.Row():
|
@@ -391,7 +460,16 @@ with demo:
|
|
391 |
inputs=jp_input
|
392 |
)
|
393 |
|
394 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
395 |
|
396 |
# Add feedback submission
|
397 |
def submit_jp_feedback(transcription, rating, audio_path):
|
@@ -410,6 +488,7 @@ with demo:
|
|
410 |
kr_button = gr.Button("Transcribe Speech")
|
411 |
with gr.Column():
|
412 |
kr_output = gr.Textbox(label="Speech Transcription")
|
|
|
413 |
|
414 |
# Add feedback components
|
415 |
with gr.Row():
|
@@ -425,7 +504,16 @@ with demo:
|
|
425 |
inputs=kr_input
|
426 |
)
|
427 |
|
428 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
429 |
|
430 |
# Add feedback submission
|
431 |
def submit_kr_feedback(transcription, rating, audio_path):
|
@@ -444,6 +532,7 @@ with demo:
|
|
444 |
th_button = gr.Button("Transcribe Speech")
|
445 |
with gr.Column():
|
446 |
th_output = gr.Textbox(label="Speech Transcription")
|
|
|
447 |
|
448 |
# Add feedback components
|
449 |
with gr.Row():
|
@@ -459,7 +548,16 @@ with demo:
|
|
459 |
inputs=th_input
|
460 |
)
|
461 |
|
462 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
463 |
|
464 |
# Add feedback submission
|
465 |
def submit_th_feedback(transcription, rating, audio_path):
|
@@ -478,6 +576,7 @@ with demo:
|
|
478 |
it_button = gr.Button("Transcribe Speech")
|
479 |
with gr.Column():
|
480 |
it_output = gr.Textbox(label="Speech Transcription")
|
|
|
481 |
|
482 |
# Add feedback components
|
483 |
with gr.Row():
|
@@ -493,7 +592,16 @@ with demo:
|
|
493 |
inputs=it_input
|
494 |
)
|
495 |
|
496 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
497 |
|
498 |
# Add feedback submission
|
499 |
def submit_it_feedback(transcription, rating, audio_path):
|
@@ -512,6 +620,7 @@ with demo:
|
|
512 |
de_button = gr.Button("Transcribe Speech")
|
513 |
with gr.Column():
|
514 |
de_output = gr.Textbox(label="Speech Transcription")
|
|
|
515 |
|
516 |
# Add feedback components
|
517 |
with gr.Row():
|
@@ -527,7 +636,16 @@ with demo:
|
|
527 |
inputs=de_input
|
528 |
)
|
529 |
|
530 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
531 |
|
532 |
# Add feedback submission
|
533 |
def submit_de_feedback(transcription, rating, audio_path):
|
|
|
123 |
# Update the language symbol if needed
|
124 |
model.beam_search.hyps = None
|
125 |
model.beam_search.pre_beam_score_key = None
|
126 |
+
model.task_sym = "<asr>" # Set default task to ASR
|
127 |
|
128 |
if language != None:
|
129 |
model.lang_sym = language
|
|
|
137 |
|
138 |
return text
|
139 |
|
140 |
+
# New function for speech translation to English
|
141 |
+
def translate_to_english(audio_file, source_language):
|
142 |
+
"""Process the audio file and return the English translation"""
|
143 |
+
if audio_file is None:
|
144 |
+
return "Please upload an audio file or record audio."
|
145 |
+
|
146 |
+
# If audio is a tuple (from microphone recording)
|
147 |
+
if isinstance(audio_file, tuple):
|
148 |
+
sr, audio_data = audio_file
|
149 |
+
# Create a temporary file to save the audio
|
150 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
|
151 |
+
temp_path = temp_audio.name
|
152 |
+
sf.write(temp_path, audio_data, sr)
|
153 |
+
audio_file = temp_path
|
154 |
+
|
155 |
+
# Load and resample the audio file to 16kHz
|
156 |
+
speech, _ = librosa.load(audio_file, sr=16000)
|
157 |
+
|
158 |
+
# Reset beam search state
|
159 |
+
model.beam_search.hyps = None
|
160 |
+
model.beam_search.pre_beam_score_key = None
|
161 |
+
|
162 |
+
# Set task to speech translation to English
|
163 |
+
model.task_sym = "<st_eng>"
|
164 |
+
|
165 |
+
# Set source language
|
166 |
+
if source_language != None:
|
167 |
+
model.lang_sym = source_language
|
168 |
+
|
169 |
+
# Perform speech translation
|
170 |
+
translation, *_ = model(speech)[0]
|
171 |
+
|
172 |
+
# Clean up temporary file if created
|
173 |
+
if isinstance(audio_file, str) and audio_file.startswith(tempfile.gettempdir()):
|
174 |
+
os.unlink(audio_file)
|
175 |
+
|
176 |
+
return translation
|
177 |
+
|
178 |
# Function to handle English transcription
|
179 |
def transcribe_english(audio_file):
|
180 |
return transcribe_audio(audio_file, "<eng>")
|
|
|
291 |
mic_button = gr.Button("Transcribe Recording")
|
292 |
with gr.Column():
|
293 |
mic_output = gr.Textbox(label="Transcription")
|
294 |
+
mic_translation = gr.Textbox(label="English Translation", visible=False)
|
295 |
|
296 |
# Add feedback components
|
297 |
with gr.Row():
|
|
|
313 |
|
314 |
# Special handling for Chinese with variant selection
|
315 |
if lang == "Mandarin" and chinese_variant:
|
316 |
+
transcription = transcribe_chinese(audio, chinese_variant)
|
317 |
+
else:
|
318 |
+
transcription = transcribe_audio(audio, lang_map.get(lang, "<eng>"))
|
319 |
|
320 |
+
# Get translation if not English
|
321 |
+
translation = ""
|
322 |
+
if lang != "English":
|
323 |
+
translation = translate_to_english(audio, lang_map.get(lang, "<eng>"))
|
324 |
+
|
325 |
+
return transcription, translation, gr.update(visible=(lang != "English"))
|
326 |
+
|
327 |
+
mic_button.click(
|
328 |
+
fn=transcribe_mic,
|
329 |
+
inputs=[mic_input, language_mic, chinese_variant_mic],
|
330 |
+
outputs=[mic_output, mic_translation, mic_translation]
|
331 |
+
)
|
332 |
|
333 |
+
# Update the visibility of translation box when language changes
|
334 |
+
def update_translation_visibility(lang):
|
335 |
+
return gr.update(visible=(lang == "Mandarin")), gr.update(visible=(lang != "English"))
|
336 |
+
|
337 |
+
language_mic.change(
|
338 |
+
fn=update_translation_visibility,
|
339 |
+
inputs=language_mic,
|
340 |
+
outputs=[chinese_variant_mic, mic_translation]
|
341 |
+
)
|
342 |
|
343 |
# Add feedback submission function
|
344 |
def submit_mic_feedback(transcription, rating, language, chinese_variant):
|
|
|
399 |
zh_button = gr.Button("Transcribe Speech")
|
400 |
with gr.Column():
|
401 |
zh_output = gr.Textbox(label="Speech Transcription")
|
402 |
+
zh_translation = gr.Textbox(label="English Translation")
|
403 |
|
404 |
# Add feedback components
|
405 |
with gr.Row():
|
|
|
415 |
inputs=zh_input
|
416 |
)
|
417 |
|
418 |
+
# Update the click function to include the Chinese variant and translation
|
419 |
def transcribe_chinese_with_variant(audio_file, variant):
|
420 |
+
transcription = transcribe_chinese(audio_file, variant)
|
421 |
+
translation = translate_to_english(audio_file, "<zho>")
|
422 |
+
return transcription, translation
|
423 |
+
|
424 |
+
zh_button.click(
|
425 |
+
fn=transcribe_chinese_with_variant,
|
426 |
+
inputs=[zh_input, chinese_variant],
|
427 |
+
outputs=[zh_output, zh_translation]
|
428 |
+
)
|
429 |
|
430 |
# Update feedback submission to include variant
|
431 |
def submit_zh_feedback(transcription, rating, audio_path, variant):
|
|
|
444 |
jp_button = gr.Button("Transcribe Speech")
|
445 |
with gr.Column():
|
446 |
jp_output = gr.Textbox(label="Speech Transcription")
|
447 |
+
jp_translation = gr.Textbox(label="English Translation")
|
448 |
|
449 |
# Add feedback components
|
450 |
with gr.Row():
|
|
|
460 |
inputs=jp_input
|
461 |
)
|
462 |
|
463 |
+
def transcribe_and_translate_japanese(audio_file):
|
464 |
+
transcription = transcribe_japanese(audio_file)
|
465 |
+
translation = translate_to_english(audio_file, "<jpn>")
|
466 |
+
return transcription, translation
|
467 |
+
|
468 |
+
jp_button.click(
|
469 |
+
fn=transcribe_and_translate_japanese,
|
470 |
+
inputs=jp_input,
|
471 |
+
outputs=[jp_output, jp_translation]
|
472 |
+
)
|
473 |
|
474 |
# Add feedback submission
|
475 |
def submit_jp_feedback(transcription, rating, audio_path):
|
|
|
488 |
kr_button = gr.Button("Transcribe Speech")
|
489 |
with gr.Column():
|
490 |
kr_output = gr.Textbox(label="Speech Transcription")
|
491 |
+
kr_translation = gr.Textbox(label="English Translation")
|
492 |
|
493 |
# Add feedback components
|
494 |
with gr.Row():
|
|
|
504 |
inputs=kr_input
|
505 |
)
|
506 |
|
507 |
+
def transcribe_and_translate_korean(audio_file):
|
508 |
+
transcription = transcribe_korean(audio_file)
|
509 |
+
translation = translate_to_english(audio_file, "<kor>")
|
510 |
+
return transcription, translation
|
511 |
+
|
512 |
+
kr_button.click(
|
513 |
+
fn=transcribe_and_translate_korean,
|
514 |
+
inputs=kr_input,
|
515 |
+
outputs=[kr_output, kr_translation]
|
516 |
+
)
|
517 |
|
518 |
# Add feedback submission
|
519 |
def submit_kr_feedback(transcription, rating, audio_path):
|
|
|
532 |
th_button = gr.Button("Transcribe Speech")
|
533 |
with gr.Column():
|
534 |
th_output = gr.Textbox(label="Speech Transcription")
|
535 |
+
th_translation = gr.Textbox(label="English Translation")
|
536 |
|
537 |
# Add feedback components
|
538 |
with gr.Row():
|
|
|
548 |
inputs=th_input
|
549 |
)
|
550 |
|
551 |
+
def transcribe_and_translate_thai(audio_file):
|
552 |
+
transcription = transcribe_thai(audio_file)
|
553 |
+
translation = translate_to_english(audio_file, "<tha>")
|
554 |
+
return transcription, translation
|
555 |
+
|
556 |
+
th_button.click(
|
557 |
+
fn=transcribe_and_translate_thai,
|
558 |
+
inputs=th_input,
|
559 |
+
outputs=[th_output, th_translation]
|
560 |
+
)
|
561 |
|
562 |
# Add feedback submission
|
563 |
def submit_th_feedback(transcription, rating, audio_path):
|
|
|
576 |
it_button = gr.Button("Transcribe Speech")
|
577 |
with gr.Column():
|
578 |
it_output = gr.Textbox(label="Speech Transcription")
|
579 |
+
it_translation = gr.Textbox(label="English Translation")
|
580 |
|
581 |
# Add feedback components
|
582 |
with gr.Row():
|
|
|
592 |
inputs=it_input
|
593 |
)
|
594 |
|
595 |
+
def transcribe_and_translate_italian(audio_file):
|
596 |
+
transcription = transcribe_italian(audio_file)
|
597 |
+
translation = translate_to_english(audio_file, "<ita>")
|
598 |
+
return transcription, translation
|
599 |
+
|
600 |
+
it_button.click(
|
601 |
+
fn=transcribe_and_translate_italian,
|
602 |
+
inputs=it_input,
|
603 |
+
outputs=[it_output, it_translation]
|
604 |
+
)
|
605 |
|
606 |
# Add feedback submission
|
607 |
def submit_it_feedback(transcription, rating, audio_path):
|
|
|
620 |
de_button = gr.Button("Transcribe Speech")
|
621 |
with gr.Column():
|
622 |
de_output = gr.Textbox(label="Speech Transcription")
|
623 |
+
de_translation = gr.Textbox(label="English Translation")
|
624 |
|
625 |
# Add feedback components
|
626 |
with gr.Row():
|
|
|
636 |
inputs=de_input
|
637 |
)
|
638 |
|
639 |
+
def transcribe_and_translate_german(audio_file):
|
640 |
+
transcription = transcribe_german(audio_file)
|
641 |
+
translation = translate_to_english(audio_file, "<deu>")
|
642 |
+
return transcription, translation
|
643 |
+
|
644 |
+
de_button.click(
|
645 |
+
fn=transcribe_and_translate_german,
|
646 |
+
inputs=de_input,
|
647 |
+
outputs=[de_output, de_translation]
|
648 |
+
)
|
649 |
|
650 |
# Add feedback submission
|
651 |
def submit_de_feedback(transcription, rating, audio_path):
|