huckiyang commited on
Commit
838f0a0
·
1 Parent(s): f95bbb9

Revert "[test] translation output"

Browse files

This reverts commit 1dad4ac0a0ba28c8ec20e577e133a18b8a3e4853.

Files changed (1) hide show
  1. app.py +12 -130
app.py CHANGED
@@ -123,7 +123,6 @@ def transcribe_audio(audio_file, language):
123
  # Update the language symbol if needed
124
  model.beam_search.hyps = None
125
  model.beam_search.pre_beam_score_key = None
126
- model.task_sym = "<asr>" # Set default task to ASR
127
 
128
  if language != None:
129
  model.lang_sym = language
@@ -137,44 +136,6 @@ def transcribe_audio(audio_file, language):
137
 
138
  return text
139
 
140
- # New function for speech translation to English
141
- def translate_to_english(audio_file, source_language):
142
- """Process the audio file and return the English translation"""
143
- if audio_file is None:
144
- return "Please upload an audio file or record audio."
145
-
146
- # If audio is a tuple (from microphone recording)
147
- if isinstance(audio_file, tuple):
148
- sr, audio_data = audio_file
149
- # Create a temporary file to save the audio
150
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
151
- temp_path = temp_audio.name
152
- sf.write(temp_path, audio_data, sr)
153
- audio_file = temp_path
154
-
155
- # Load and resample the audio file to 16kHz
156
- speech, _ = librosa.load(audio_file, sr=16000)
157
-
158
- # Reset beam search state
159
- model.beam_search.hyps = None
160
- model.beam_search.pre_beam_score_key = None
161
-
162
- # Set task to speech translation to English
163
- model.task_sym = "<st_eng>"
164
-
165
- # Set source language
166
- if source_language != None:
167
- model.lang_sym = source_language
168
-
169
- # Perform speech translation
170
- translation, *_ = model(speech)[0]
171
-
172
- # Clean up temporary file if created
173
- if isinstance(audio_file, str) and audio_file.startswith(tempfile.gettempdir()):
174
- os.unlink(audio_file)
175
-
176
- return translation
177
-
178
  # Function to handle English transcription
179
  def transcribe_english(audio_file):
180
  return transcribe_audio(audio_file, "<eng>")
@@ -291,7 +252,6 @@ with demo:
291
  mic_button = gr.Button("Transcribe Recording")
292
  with gr.Column():
293
  mic_output = gr.Textbox(label="Transcription")
294
- mic_translation = gr.Textbox(label="English Translation", visible=False)
295
 
296
  # Add feedback components
297
  with gr.Row():
@@ -313,32 +273,11 @@ with demo:
313
 
314
  # Special handling for Chinese with variant selection
315
  if lang == "Mandarin" and chinese_variant:
316
- transcription = transcribe_chinese(audio, chinese_variant)
317
- else:
318
- transcription = transcribe_audio(audio, lang_map.get(lang, "<eng>"))
319
 
320
- # Get translation if not English
321
- translation = ""
322
- if lang != "English":
323
- translation = translate_to_english(audio, lang_map.get(lang, "<eng>"))
324
-
325
- return transcription, translation, gr.update(visible=(lang != "English"))
326
-
327
- mic_button.click(
328
- fn=transcribe_mic,
329
- inputs=[mic_input, language_mic, chinese_variant_mic],
330
- outputs=[mic_output, mic_translation, mic_translation]
331
- )
332
 
333
- # Update the visibility of translation box when language changes
334
- def update_translation_visibility(lang):
335
- return gr.update(visible=(lang == "Mandarin")), gr.update(visible=(lang != "English"))
336
-
337
- language_mic.change(
338
- fn=update_translation_visibility,
339
- inputs=language_mic,
340
- outputs=[chinese_variant_mic, mic_translation]
341
- )
342
 
343
  # Add feedback submission function
344
  def submit_mic_feedback(transcription, rating, language, chinese_variant):
@@ -399,7 +338,6 @@ with demo:
399
  zh_button = gr.Button("Transcribe Speech")
400
  with gr.Column():
401
  zh_output = gr.Textbox(label="Speech Transcription")
402
- zh_translation = gr.Textbox(label="English Translation")
403
 
404
  # Add feedback components
405
  with gr.Row():
@@ -415,17 +353,11 @@ with demo:
415
  inputs=zh_input
416
  )
417
 
418
- # Update the click function to include the Chinese variant and translation
419
  def transcribe_chinese_with_variant(audio_file, variant):
420
- transcription = transcribe_chinese(audio_file, variant)
421
- translation = translate_to_english(audio_file, "<zho>")
422
- return transcription, translation
423
-
424
- zh_button.click(
425
- fn=transcribe_chinese_with_variant,
426
- inputs=[zh_input, chinese_variant],
427
- outputs=[zh_output, zh_translation]
428
- )
429
 
430
  # Update feedback submission to include variant
431
  def submit_zh_feedback(transcription, rating, audio_path, variant):
@@ -444,7 +376,6 @@ with demo:
444
  jp_button = gr.Button("Transcribe Speech")
445
  with gr.Column():
446
  jp_output = gr.Textbox(label="Speech Transcription")
447
- jp_translation = gr.Textbox(label="English Translation")
448
 
449
  # Add feedback components
450
  with gr.Row():
@@ -460,16 +391,7 @@ with demo:
460
  inputs=jp_input
461
  )
462
 
463
- def transcribe_and_translate_japanese(audio_file):
464
- transcription = transcribe_japanese(audio_file)
465
- translation = translate_to_english(audio_file, "<jpn>")
466
- return transcription, translation
467
-
468
- jp_button.click(
469
- fn=transcribe_and_translate_japanese,
470
- inputs=jp_input,
471
- outputs=[jp_output, jp_translation]
472
- )
473
 
474
  # Add feedback submission
475
  def submit_jp_feedback(transcription, rating, audio_path):
@@ -488,7 +410,6 @@ with demo:
488
  kr_button = gr.Button("Transcribe Speech")
489
  with gr.Column():
490
  kr_output = gr.Textbox(label="Speech Transcription")
491
- kr_translation = gr.Textbox(label="English Translation")
492
 
493
  # Add feedback components
494
  with gr.Row():
@@ -504,16 +425,7 @@ with demo:
504
  inputs=kr_input
505
  )
506
 
507
- def transcribe_and_translate_korean(audio_file):
508
- transcription = transcribe_korean(audio_file)
509
- translation = translate_to_english(audio_file, "<kor>")
510
- return transcription, translation
511
-
512
- kr_button.click(
513
- fn=transcribe_and_translate_korean,
514
- inputs=kr_input,
515
- outputs=[kr_output, kr_translation]
516
- )
517
 
518
  # Add feedback submission
519
  def submit_kr_feedback(transcription, rating, audio_path):
@@ -532,7 +444,6 @@ with demo:
532
  th_button = gr.Button("Transcribe Speech")
533
  with gr.Column():
534
  th_output = gr.Textbox(label="Speech Transcription")
535
- th_translation = gr.Textbox(label="English Translation")
536
 
537
  # Add feedback components
538
  with gr.Row():
@@ -548,16 +459,7 @@ with demo:
548
  inputs=th_input
549
  )
550
 
551
- def transcribe_and_translate_thai(audio_file):
552
- transcription = transcribe_thai(audio_file)
553
- translation = translate_to_english(audio_file, "<tha>")
554
- return transcription, translation
555
-
556
- th_button.click(
557
- fn=transcribe_and_translate_thai,
558
- inputs=th_input,
559
- outputs=[th_output, th_translation]
560
- )
561
 
562
  # Add feedback submission
563
  def submit_th_feedback(transcription, rating, audio_path):
@@ -576,7 +478,6 @@ with demo:
576
  it_button = gr.Button("Transcribe Speech")
577
  with gr.Column():
578
  it_output = gr.Textbox(label="Speech Transcription")
579
- it_translation = gr.Textbox(label="English Translation")
580
 
581
  # Add feedback components
582
  with gr.Row():
@@ -592,16 +493,7 @@ with demo:
592
  inputs=it_input
593
  )
594
 
595
- def transcribe_and_translate_italian(audio_file):
596
- transcription = transcribe_italian(audio_file)
597
- translation = translate_to_english(audio_file, "<ita>")
598
- return transcription, translation
599
-
600
- it_button.click(
601
- fn=transcribe_and_translate_italian,
602
- inputs=it_input,
603
- outputs=[it_output, it_translation]
604
- )
605
 
606
  # Add feedback submission
607
  def submit_it_feedback(transcription, rating, audio_path):
@@ -620,7 +512,6 @@ with demo:
620
  de_button = gr.Button("Transcribe Speech")
621
  with gr.Column():
622
  de_output = gr.Textbox(label="Speech Transcription")
623
- de_translation = gr.Textbox(label="English Translation")
624
 
625
  # Add feedback components
626
  with gr.Row():
@@ -636,16 +527,7 @@ with demo:
636
  inputs=de_input
637
  )
638
 
639
- def transcribe_and_translate_german(audio_file):
640
- transcription = transcribe_german(audio_file)
641
- translation = translate_to_english(audio_file, "<deu>")
642
- return transcription, translation
643
-
644
- de_button.click(
645
- fn=transcribe_and_translate_german,
646
- inputs=de_input,
647
- outputs=[de_output, de_translation]
648
- )
649
 
650
  # Add feedback submission
651
  def submit_de_feedback(transcription, rating, audio_path):
 
123
  # Update the language symbol if needed
124
  model.beam_search.hyps = None
125
  model.beam_search.pre_beam_score_key = None
 
126
 
127
  if language != None:
128
  model.lang_sym = language
 
136
 
137
  return text
138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  # Function to handle English transcription
140
  def transcribe_english(audio_file):
141
  return transcribe_audio(audio_file, "<eng>")
 
252
  mic_button = gr.Button("Transcribe Recording")
253
  with gr.Column():
254
  mic_output = gr.Textbox(label="Transcription")
 
255
 
256
  # Add feedback components
257
  with gr.Row():
 
273
 
274
  # Special handling for Chinese with variant selection
275
  if lang == "Mandarin" and chinese_variant:
276
+ return transcribe_chinese(audio, chinese_variant.lower())
 
 
277
 
278
+ return transcribe_audio(audio, lang_map.get(lang, "<eng>"))
 
 
 
 
 
 
 
 
 
 
 
279
 
280
+ mic_button.click(fn=transcribe_mic, inputs=[mic_input, language_mic, chinese_variant_mic], outputs=mic_output)
 
 
 
 
 
 
 
 
281
 
282
  # Add feedback submission function
283
  def submit_mic_feedback(transcription, rating, language, chinese_variant):
 
338
  zh_button = gr.Button("Transcribe Speech")
339
  with gr.Column():
340
  zh_output = gr.Textbox(label="Speech Transcription")
 
341
 
342
  # Add feedback components
343
  with gr.Row():
 
353
  inputs=zh_input
354
  )
355
 
356
+ # Update the click function to include the Chinese variant
357
  def transcribe_chinese_with_variant(audio_file, variant):
358
+ return transcribe_chinese(audio_file, variant.lower())
359
+
360
+ zh_button.click(fn=transcribe_chinese_with_variant, inputs=[zh_input, chinese_variant], outputs=zh_output)
 
 
 
 
 
 
361
 
362
  # Update feedback submission to include variant
363
  def submit_zh_feedback(transcription, rating, audio_path, variant):
 
376
  jp_button = gr.Button("Transcribe Speech")
377
  with gr.Column():
378
  jp_output = gr.Textbox(label="Speech Transcription")
 
379
 
380
  # Add feedback components
381
  with gr.Row():
 
391
  inputs=jp_input
392
  )
393
 
394
+ jp_button.click(fn=transcribe_japanese, inputs=jp_input, outputs=jp_output)
 
 
 
 
 
 
 
 
 
395
 
396
  # Add feedback submission
397
  def submit_jp_feedback(transcription, rating, audio_path):
 
410
  kr_button = gr.Button("Transcribe Speech")
411
  with gr.Column():
412
  kr_output = gr.Textbox(label="Speech Transcription")
 
413
 
414
  # Add feedback components
415
  with gr.Row():
 
425
  inputs=kr_input
426
  )
427
 
428
+ kr_button.click(fn=transcribe_korean, inputs=kr_input, outputs=kr_output)
 
 
 
 
 
 
 
 
 
429
 
430
  # Add feedback submission
431
  def submit_kr_feedback(transcription, rating, audio_path):
 
444
  th_button = gr.Button("Transcribe Speech")
445
  with gr.Column():
446
  th_output = gr.Textbox(label="Speech Transcription")
 
447
 
448
  # Add feedback components
449
  with gr.Row():
 
459
  inputs=th_input
460
  )
461
 
462
+ th_button.click(fn=transcribe_thai, inputs=th_input, outputs=th_output)
 
 
 
 
 
 
 
 
 
463
 
464
  # Add feedback submission
465
  def submit_th_feedback(transcription, rating, audio_path):
 
478
  it_button = gr.Button("Transcribe Speech")
479
  with gr.Column():
480
  it_output = gr.Textbox(label="Speech Transcription")
 
481
 
482
  # Add feedback components
483
  with gr.Row():
 
493
  inputs=it_input
494
  )
495
 
496
+ it_button.click(fn=transcribe_italian, inputs=it_input, outputs=it_output)
 
 
 
 
 
 
 
 
 
497
 
498
  # Add feedback submission
499
  def submit_it_feedback(transcription, rating, audio_path):
 
512
  de_button = gr.Button("Transcribe Speech")
513
  with gr.Column():
514
  de_output = gr.Textbox(label="Speech Transcription")
 
515
 
516
  # Add feedback components
517
  with gr.Row():
 
527
  inputs=de_input
528
  )
529
 
530
+ de_button.click(fn=transcribe_german, inputs=de_input, outputs=de_output)
 
 
 
 
 
 
 
 
 
531
 
532
  # Add feedback submission
533
  def submit_de_feedback(transcription, rating, audio_path):