Spaces:
Runtime error
Runtime error
Revert "[test] translation output"
Browse filesThis reverts commit 1dad4ac0a0ba28c8ec20e577e133a18b8a3e4853.
app.py
CHANGED
@@ -123,7 +123,6 @@ def transcribe_audio(audio_file, language):
|
|
123 |
# Update the language symbol if needed
|
124 |
model.beam_search.hyps = None
|
125 |
model.beam_search.pre_beam_score_key = None
|
126 |
-
model.task_sym = "<asr>" # Set default task to ASR
|
127 |
|
128 |
if language != None:
|
129 |
model.lang_sym = language
|
@@ -137,44 +136,6 @@ def transcribe_audio(audio_file, language):
|
|
137 |
|
138 |
return text
|
139 |
|
140 |
-
# New function for speech translation to English
|
141 |
-
def translate_to_english(audio_file, source_language):
|
142 |
-
"""Process the audio file and return the English translation"""
|
143 |
-
if audio_file is None:
|
144 |
-
return "Please upload an audio file or record audio."
|
145 |
-
|
146 |
-
# If audio is a tuple (from microphone recording)
|
147 |
-
if isinstance(audio_file, tuple):
|
148 |
-
sr, audio_data = audio_file
|
149 |
-
# Create a temporary file to save the audio
|
150 |
-
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
|
151 |
-
temp_path = temp_audio.name
|
152 |
-
sf.write(temp_path, audio_data, sr)
|
153 |
-
audio_file = temp_path
|
154 |
-
|
155 |
-
# Load and resample the audio file to 16kHz
|
156 |
-
speech, _ = librosa.load(audio_file, sr=16000)
|
157 |
-
|
158 |
-
# Reset beam search state
|
159 |
-
model.beam_search.hyps = None
|
160 |
-
model.beam_search.pre_beam_score_key = None
|
161 |
-
|
162 |
-
# Set task to speech translation to English
|
163 |
-
model.task_sym = "<st_eng>"
|
164 |
-
|
165 |
-
# Set source language
|
166 |
-
if source_language != None:
|
167 |
-
model.lang_sym = source_language
|
168 |
-
|
169 |
-
# Perform speech translation
|
170 |
-
translation, *_ = model(speech)[0]
|
171 |
-
|
172 |
-
# Clean up temporary file if created
|
173 |
-
if isinstance(audio_file, str) and audio_file.startswith(tempfile.gettempdir()):
|
174 |
-
os.unlink(audio_file)
|
175 |
-
|
176 |
-
return translation
|
177 |
-
|
178 |
# Function to handle English transcription
|
179 |
def transcribe_english(audio_file):
|
180 |
return transcribe_audio(audio_file, "<eng>")
|
@@ -291,7 +252,6 @@ with demo:
|
|
291 |
mic_button = gr.Button("Transcribe Recording")
|
292 |
with gr.Column():
|
293 |
mic_output = gr.Textbox(label="Transcription")
|
294 |
-
mic_translation = gr.Textbox(label="English Translation", visible=False)
|
295 |
|
296 |
# Add feedback components
|
297 |
with gr.Row():
|
@@ -313,32 +273,11 @@ with demo:
|
|
313 |
|
314 |
# Special handling for Chinese with variant selection
|
315 |
if lang == "Mandarin" and chinese_variant:
|
316 |
-
|
317 |
-
else:
|
318 |
-
transcription = transcribe_audio(audio, lang_map.get(lang, "<eng>"))
|
319 |
|
320 |
-
|
321 |
-
translation = ""
|
322 |
-
if lang != "English":
|
323 |
-
translation = translate_to_english(audio, lang_map.get(lang, "<eng>"))
|
324 |
-
|
325 |
-
return transcription, translation, gr.update(visible=(lang != "English"))
|
326 |
-
|
327 |
-
mic_button.click(
|
328 |
-
fn=transcribe_mic,
|
329 |
-
inputs=[mic_input, language_mic, chinese_variant_mic],
|
330 |
-
outputs=[mic_output, mic_translation, mic_translation]
|
331 |
-
)
|
332 |
|
333 |
-
|
334 |
-
def update_translation_visibility(lang):
|
335 |
-
return gr.update(visible=(lang == "Mandarin")), gr.update(visible=(lang != "English"))
|
336 |
-
|
337 |
-
language_mic.change(
|
338 |
-
fn=update_translation_visibility,
|
339 |
-
inputs=language_mic,
|
340 |
-
outputs=[chinese_variant_mic, mic_translation]
|
341 |
-
)
|
342 |
|
343 |
# Add feedback submission function
|
344 |
def submit_mic_feedback(transcription, rating, language, chinese_variant):
|
@@ -399,7 +338,6 @@ with demo:
|
|
399 |
zh_button = gr.Button("Transcribe Speech")
|
400 |
with gr.Column():
|
401 |
zh_output = gr.Textbox(label="Speech Transcription")
|
402 |
-
zh_translation = gr.Textbox(label="English Translation")
|
403 |
|
404 |
# Add feedback components
|
405 |
with gr.Row():
|
@@ -415,17 +353,11 @@ with demo:
|
|
415 |
inputs=zh_input
|
416 |
)
|
417 |
|
418 |
-
# Update the click function to include the Chinese variant
|
419 |
def transcribe_chinese_with_variant(audio_file, variant):
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
zh_button.click(
|
425 |
-
fn=transcribe_chinese_with_variant,
|
426 |
-
inputs=[zh_input, chinese_variant],
|
427 |
-
outputs=[zh_output, zh_translation]
|
428 |
-
)
|
429 |
|
430 |
# Update feedback submission to include variant
|
431 |
def submit_zh_feedback(transcription, rating, audio_path, variant):
|
@@ -444,7 +376,6 @@ with demo:
|
|
444 |
jp_button = gr.Button("Transcribe Speech")
|
445 |
with gr.Column():
|
446 |
jp_output = gr.Textbox(label="Speech Transcription")
|
447 |
-
jp_translation = gr.Textbox(label="English Translation")
|
448 |
|
449 |
# Add feedback components
|
450 |
with gr.Row():
|
@@ -460,16 +391,7 @@ with demo:
|
|
460 |
inputs=jp_input
|
461 |
)
|
462 |
|
463 |
-
|
464 |
-
transcription = transcribe_japanese(audio_file)
|
465 |
-
translation = translate_to_english(audio_file, "<jpn>")
|
466 |
-
return transcription, translation
|
467 |
-
|
468 |
-
jp_button.click(
|
469 |
-
fn=transcribe_and_translate_japanese,
|
470 |
-
inputs=jp_input,
|
471 |
-
outputs=[jp_output, jp_translation]
|
472 |
-
)
|
473 |
|
474 |
# Add feedback submission
|
475 |
def submit_jp_feedback(transcription, rating, audio_path):
|
@@ -488,7 +410,6 @@ with demo:
|
|
488 |
kr_button = gr.Button("Transcribe Speech")
|
489 |
with gr.Column():
|
490 |
kr_output = gr.Textbox(label="Speech Transcription")
|
491 |
-
kr_translation = gr.Textbox(label="English Translation")
|
492 |
|
493 |
# Add feedback components
|
494 |
with gr.Row():
|
@@ -504,16 +425,7 @@ with demo:
|
|
504 |
inputs=kr_input
|
505 |
)
|
506 |
|
507 |
-
|
508 |
-
transcription = transcribe_korean(audio_file)
|
509 |
-
translation = translate_to_english(audio_file, "<kor>")
|
510 |
-
return transcription, translation
|
511 |
-
|
512 |
-
kr_button.click(
|
513 |
-
fn=transcribe_and_translate_korean,
|
514 |
-
inputs=kr_input,
|
515 |
-
outputs=[kr_output, kr_translation]
|
516 |
-
)
|
517 |
|
518 |
# Add feedback submission
|
519 |
def submit_kr_feedback(transcription, rating, audio_path):
|
@@ -532,7 +444,6 @@ with demo:
|
|
532 |
th_button = gr.Button("Transcribe Speech")
|
533 |
with gr.Column():
|
534 |
th_output = gr.Textbox(label="Speech Transcription")
|
535 |
-
th_translation = gr.Textbox(label="English Translation")
|
536 |
|
537 |
# Add feedback components
|
538 |
with gr.Row():
|
@@ -548,16 +459,7 @@ with demo:
|
|
548 |
inputs=th_input
|
549 |
)
|
550 |
|
551 |
-
|
552 |
-
transcription = transcribe_thai(audio_file)
|
553 |
-
translation = translate_to_english(audio_file, "<tha>")
|
554 |
-
return transcription, translation
|
555 |
-
|
556 |
-
th_button.click(
|
557 |
-
fn=transcribe_and_translate_thai,
|
558 |
-
inputs=th_input,
|
559 |
-
outputs=[th_output, th_translation]
|
560 |
-
)
|
561 |
|
562 |
# Add feedback submission
|
563 |
def submit_th_feedback(transcription, rating, audio_path):
|
@@ -576,7 +478,6 @@ with demo:
|
|
576 |
it_button = gr.Button("Transcribe Speech")
|
577 |
with gr.Column():
|
578 |
it_output = gr.Textbox(label="Speech Transcription")
|
579 |
-
it_translation = gr.Textbox(label="English Translation")
|
580 |
|
581 |
# Add feedback components
|
582 |
with gr.Row():
|
@@ -592,16 +493,7 @@ with demo:
|
|
592 |
inputs=it_input
|
593 |
)
|
594 |
|
595 |
-
|
596 |
-
transcription = transcribe_italian(audio_file)
|
597 |
-
translation = translate_to_english(audio_file, "<ita>")
|
598 |
-
return transcription, translation
|
599 |
-
|
600 |
-
it_button.click(
|
601 |
-
fn=transcribe_and_translate_italian,
|
602 |
-
inputs=it_input,
|
603 |
-
outputs=[it_output, it_translation]
|
604 |
-
)
|
605 |
|
606 |
# Add feedback submission
|
607 |
def submit_it_feedback(transcription, rating, audio_path):
|
@@ -620,7 +512,6 @@ with demo:
|
|
620 |
de_button = gr.Button("Transcribe Speech")
|
621 |
with gr.Column():
|
622 |
de_output = gr.Textbox(label="Speech Transcription")
|
623 |
-
de_translation = gr.Textbox(label="English Translation")
|
624 |
|
625 |
# Add feedback components
|
626 |
with gr.Row():
|
@@ -636,16 +527,7 @@ with demo:
|
|
636 |
inputs=de_input
|
637 |
)
|
638 |
|
639 |
-
|
640 |
-
transcription = transcribe_german(audio_file)
|
641 |
-
translation = translate_to_english(audio_file, "<deu>")
|
642 |
-
return transcription, translation
|
643 |
-
|
644 |
-
de_button.click(
|
645 |
-
fn=transcribe_and_translate_german,
|
646 |
-
inputs=de_input,
|
647 |
-
outputs=[de_output, de_translation]
|
648 |
-
)
|
649 |
|
650 |
# Add feedback submission
|
651 |
def submit_de_feedback(transcription, rating, audio_path):
|
|
|
123 |
# Update the language symbol if needed
|
124 |
model.beam_search.hyps = None
|
125 |
model.beam_search.pre_beam_score_key = None
|
|
|
126 |
|
127 |
if language != None:
|
128 |
model.lang_sym = language
|
|
|
136 |
|
137 |
return text
|
138 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
# Function to handle English transcription
|
140 |
def transcribe_english(audio_file):
|
141 |
return transcribe_audio(audio_file, "<eng>")
|
|
|
252 |
mic_button = gr.Button("Transcribe Recording")
|
253 |
with gr.Column():
|
254 |
mic_output = gr.Textbox(label="Transcription")
|
|
|
255 |
|
256 |
# Add feedback components
|
257 |
with gr.Row():
|
|
|
273 |
|
274 |
# Special handling for Chinese with variant selection
|
275 |
if lang == "Mandarin" and chinese_variant:
|
276 |
+
return transcribe_chinese(audio, chinese_variant.lower())
|
|
|
|
|
277 |
|
278 |
+
return transcribe_audio(audio, lang_map.get(lang, "<eng>"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
279 |
|
280 |
+
mic_button.click(fn=transcribe_mic, inputs=[mic_input, language_mic, chinese_variant_mic], outputs=mic_output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
281 |
|
282 |
# Add feedback submission function
|
283 |
def submit_mic_feedback(transcription, rating, language, chinese_variant):
|
|
|
338 |
zh_button = gr.Button("Transcribe Speech")
|
339 |
with gr.Column():
|
340 |
zh_output = gr.Textbox(label="Speech Transcription")
|
|
|
341 |
|
342 |
# Add feedback components
|
343 |
with gr.Row():
|
|
|
353 |
inputs=zh_input
|
354 |
)
|
355 |
|
356 |
+
# Update the click function to include the Chinese variant
|
357 |
def transcribe_chinese_with_variant(audio_file, variant):
|
358 |
+
return transcribe_chinese(audio_file, variant.lower())
|
359 |
+
|
360 |
+
zh_button.click(fn=transcribe_chinese_with_variant, inputs=[zh_input, chinese_variant], outputs=zh_output)
|
|
|
|
|
|
|
|
|
|
|
|
|
361 |
|
362 |
# Update feedback submission to include variant
|
363 |
def submit_zh_feedback(transcription, rating, audio_path, variant):
|
|
|
376 |
jp_button = gr.Button("Transcribe Speech")
|
377 |
with gr.Column():
|
378 |
jp_output = gr.Textbox(label="Speech Transcription")
|
|
|
379 |
|
380 |
# Add feedback components
|
381 |
with gr.Row():
|
|
|
391 |
inputs=jp_input
|
392 |
)
|
393 |
|
394 |
+
jp_button.click(fn=transcribe_japanese, inputs=jp_input, outputs=jp_output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
395 |
|
396 |
# Add feedback submission
|
397 |
def submit_jp_feedback(transcription, rating, audio_path):
|
|
|
410 |
kr_button = gr.Button("Transcribe Speech")
|
411 |
with gr.Column():
|
412 |
kr_output = gr.Textbox(label="Speech Transcription")
|
|
|
413 |
|
414 |
# Add feedback components
|
415 |
with gr.Row():
|
|
|
425 |
inputs=kr_input
|
426 |
)
|
427 |
|
428 |
+
kr_button.click(fn=transcribe_korean, inputs=kr_input, outputs=kr_output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
429 |
|
430 |
# Add feedback submission
|
431 |
def submit_kr_feedback(transcription, rating, audio_path):
|
|
|
444 |
th_button = gr.Button("Transcribe Speech")
|
445 |
with gr.Column():
|
446 |
th_output = gr.Textbox(label="Speech Transcription")
|
|
|
447 |
|
448 |
# Add feedback components
|
449 |
with gr.Row():
|
|
|
459 |
inputs=th_input
|
460 |
)
|
461 |
|
462 |
+
th_button.click(fn=transcribe_thai, inputs=th_input, outputs=th_output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
463 |
|
464 |
# Add feedback submission
|
465 |
def submit_th_feedback(transcription, rating, audio_path):
|
|
|
478 |
it_button = gr.Button("Transcribe Speech")
|
479 |
with gr.Column():
|
480 |
it_output = gr.Textbox(label="Speech Transcription")
|
|
|
481 |
|
482 |
# Add feedback components
|
483 |
with gr.Row():
|
|
|
493 |
inputs=it_input
|
494 |
)
|
495 |
|
496 |
+
it_button.click(fn=transcribe_italian, inputs=it_input, outputs=it_output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
497 |
|
498 |
# Add feedback submission
|
499 |
def submit_it_feedback(transcription, rating, audio_path):
|
|
|
512 |
de_button = gr.Button("Transcribe Speech")
|
513 |
with gr.Column():
|
514 |
de_output = gr.Textbox(label="Speech Transcription")
|
|
|
515 |
|
516 |
# Add feedback components
|
517 |
with gr.Row():
|
|
|
527 |
inputs=de_input
|
528 |
)
|
529 |
|
530 |
+
de_button.click(fn=transcribe_german, inputs=de_input, outputs=de_output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
531 |
|
532 |
# Add feedback submission
|
533 |
def submit_de_feedback(transcription, rating, audio_path):
|