huckiyang commited on
Commit
1dad4ac
·
1 Parent(s): 746a0c3

[test] translation output

Browse files
Files changed (1) hide show
  1. app.py +130 -12
app.py CHANGED
@@ -123,6 +123,7 @@ def transcribe_audio(audio_file, language):
123
  # Update the language symbol if needed
124
  model.beam_search.hyps = None
125
  model.beam_search.pre_beam_score_key = None
 
126
 
127
  if language != None:
128
  model.lang_sym = language
@@ -136,6 +137,44 @@ def transcribe_audio(audio_file, language):
136
 
137
  return text
138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  # Function to handle English transcription
140
  def transcribe_english(audio_file):
141
  return transcribe_audio(audio_file, "<eng>")
@@ -252,6 +291,7 @@ with demo:
252
  mic_button = gr.Button("Transcribe Recording")
253
  with gr.Column():
254
  mic_output = gr.Textbox(label="Transcription")
 
255
 
256
  # Add feedback components
257
  with gr.Row():
@@ -273,11 +313,32 @@ with demo:
273
 
274
  # Special handling for Chinese with variant selection
275
  if lang == "Mandarin" and chinese_variant:
276
- return transcribe_chinese(audio, chinese_variant.lower())
 
 
277
 
278
- return transcribe_audio(audio, lang_map.get(lang, "<eng>"))
 
 
 
 
 
 
 
 
 
 
 
279
 
280
- mic_button.click(fn=transcribe_mic, inputs=[mic_input, language_mic, chinese_variant_mic], outputs=mic_output)
 
 
 
 
 
 
 
 
281
 
282
  # Add feedback submission function
283
  def submit_mic_feedback(transcription, rating, language, chinese_variant):
@@ -338,6 +399,7 @@ with demo:
338
  zh_button = gr.Button("Transcribe Speech")
339
  with gr.Column():
340
  zh_output = gr.Textbox(label="Speech Transcription")
 
341
 
342
  # Add feedback components
343
  with gr.Row():
@@ -353,11 +415,17 @@ with demo:
353
  inputs=zh_input
354
  )
355
 
356
- # Update the click function to include the Chinese variant
357
  def transcribe_chinese_with_variant(audio_file, variant):
358
- return transcribe_chinese(audio_file, variant.lower())
359
-
360
- zh_button.click(fn=transcribe_chinese_with_variant, inputs=[zh_input, chinese_variant], outputs=zh_output)
 
 
 
 
 
 
361
 
362
  # Update feedback submission to include variant
363
  def submit_zh_feedback(transcription, rating, audio_path, variant):
@@ -376,6 +444,7 @@ with demo:
376
  jp_button = gr.Button("Transcribe Speech")
377
  with gr.Column():
378
  jp_output = gr.Textbox(label="Speech Transcription")
 
379
 
380
  # Add feedback components
381
  with gr.Row():
@@ -391,7 +460,16 @@ with demo:
391
  inputs=jp_input
392
  )
393
 
394
- jp_button.click(fn=transcribe_japanese, inputs=jp_input, outputs=jp_output)
 
 
 
 
 
 
 
 
 
395
 
396
  # Add feedback submission
397
  def submit_jp_feedback(transcription, rating, audio_path):
@@ -410,6 +488,7 @@ with demo:
410
  kr_button = gr.Button("Transcribe Speech")
411
  with gr.Column():
412
  kr_output = gr.Textbox(label="Speech Transcription")
 
413
 
414
  # Add feedback components
415
  with gr.Row():
@@ -425,7 +504,16 @@ with demo:
425
  inputs=kr_input
426
  )
427
 
428
- kr_button.click(fn=transcribe_korean, inputs=kr_input, outputs=kr_output)
 
 
 
 
 
 
 
 
 
429
 
430
  # Add feedback submission
431
  def submit_kr_feedback(transcription, rating, audio_path):
@@ -444,6 +532,7 @@ with demo:
444
  th_button = gr.Button("Transcribe Speech")
445
  with gr.Column():
446
  th_output = gr.Textbox(label="Speech Transcription")
 
447
 
448
  # Add feedback components
449
  with gr.Row():
@@ -459,7 +548,16 @@ with demo:
459
  inputs=th_input
460
  )
461
 
462
- th_button.click(fn=transcribe_thai, inputs=th_input, outputs=th_output)
 
 
 
 
 
 
 
 
 
463
 
464
  # Add feedback submission
465
  def submit_th_feedback(transcription, rating, audio_path):
@@ -478,6 +576,7 @@ with demo:
478
  it_button = gr.Button("Transcribe Speech")
479
  with gr.Column():
480
  it_output = gr.Textbox(label="Speech Transcription")
 
481
 
482
  # Add feedback components
483
  with gr.Row():
@@ -493,7 +592,16 @@ with demo:
493
  inputs=it_input
494
  )
495
 
496
- it_button.click(fn=transcribe_italian, inputs=it_input, outputs=it_output)
 
 
 
 
 
 
 
 
 
497
 
498
  # Add feedback submission
499
  def submit_it_feedback(transcription, rating, audio_path):
@@ -512,6 +620,7 @@ with demo:
512
  de_button = gr.Button("Transcribe Speech")
513
  with gr.Column():
514
  de_output = gr.Textbox(label="Speech Transcription")
 
515
 
516
  # Add feedback components
517
  with gr.Row():
@@ -527,7 +636,16 @@ with demo:
527
  inputs=de_input
528
  )
529
 
530
- de_button.click(fn=transcribe_german, inputs=de_input, outputs=de_output)
 
 
 
 
 
 
 
 
 
531
 
532
  # Add feedback submission
533
  def submit_de_feedback(transcription, rating, audio_path):
 
123
  # Update the language symbol if needed
124
  model.beam_search.hyps = None
125
  model.beam_search.pre_beam_score_key = None
126
+ model.task_sym = "<asr>" # Set default task to ASR
127
 
128
  if language != None:
129
  model.lang_sym = language
 
137
 
138
  return text
139
 
140
+ # New function for speech translation to English
141
+ def translate_to_english(audio_file, source_language):
142
+ """Process the audio file and return the English translation"""
143
+ if audio_file is None:
144
+ return "Please upload an audio file or record audio."
145
+
146
+ # If audio is a tuple (from microphone recording)
147
+ if isinstance(audio_file, tuple):
148
+ sr, audio_data = audio_file
149
+ # Create a temporary file to save the audio
150
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
151
+ temp_path = temp_audio.name
152
+ sf.write(temp_path, audio_data, sr)
153
+ audio_file = temp_path
154
+
155
+ # Load and resample the audio file to 16kHz
156
+ speech, _ = librosa.load(audio_file, sr=16000)
157
+
158
+ # Reset beam search state
159
+ model.beam_search.hyps = None
160
+ model.beam_search.pre_beam_score_key = None
161
+
162
+ # Set task to speech translation to English
163
+ model.task_sym = "<st_eng>"
164
+
165
+ # Set source language
166
+ if source_language != None:
167
+ model.lang_sym = source_language
168
+
169
+ # Perform speech translation
170
+ translation, *_ = model(speech)[0]
171
+
172
+ # Clean up temporary file if created
173
+ if isinstance(audio_file, str) and audio_file.startswith(tempfile.gettempdir()):
174
+ os.unlink(audio_file)
175
+
176
+ return translation
177
+
178
  # Function to handle English transcription
179
  def transcribe_english(audio_file):
180
  return transcribe_audio(audio_file, "<eng>")
 
291
  mic_button = gr.Button("Transcribe Recording")
292
  with gr.Column():
293
  mic_output = gr.Textbox(label="Transcription")
294
+ mic_translation = gr.Textbox(label="English Translation", visible=False)
295
 
296
  # Add feedback components
297
  with gr.Row():
 
313
 
314
  # Special handling for Chinese with variant selection
315
  if lang == "Mandarin" and chinese_variant:
316
+ transcription = transcribe_chinese(audio, chinese_variant)
317
+ else:
318
+ transcription = transcribe_audio(audio, lang_map.get(lang, "<eng>"))
319
 
320
+ # Get translation if not English
321
+ translation = ""
322
+ if lang != "English":
323
+ translation = translate_to_english(audio, lang_map.get(lang, "<eng>"))
324
+
325
+ return transcription, translation, gr.update(visible=(lang != "English"))
326
+
327
+ mic_button.click(
328
+ fn=transcribe_mic,
329
+ inputs=[mic_input, language_mic, chinese_variant_mic],
330
+ outputs=[mic_output, mic_translation, mic_translation]
331
+ )
332
 
333
+ # Update the visibility of translation box when language changes
334
+ def update_translation_visibility(lang):
335
+ return gr.update(visible=(lang == "Mandarin")), gr.update(visible=(lang != "English"))
336
+
337
+ language_mic.change(
338
+ fn=update_translation_visibility,
339
+ inputs=language_mic,
340
+ outputs=[chinese_variant_mic, mic_translation]
341
+ )
342
 
343
  # Add feedback submission function
344
  def submit_mic_feedback(transcription, rating, language, chinese_variant):
 
399
  zh_button = gr.Button("Transcribe Speech")
400
  with gr.Column():
401
  zh_output = gr.Textbox(label="Speech Transcription")
402
+ zh_translation = gr.Textbox(label="English Translation")
403
 
404
  # Add feedback components
405
  with gr.Row():
 
415
  inputs=zh_input
416
  )
417
 
418
+ # Update the click function to include the Chinese variant and translation
419
  def transcribe_chinese_with_variant(audio_file, variant):
420
+ transcription = transcribe_chinese(audio_file, variant)
421
+ translation = translate_to_english(audio_file, "<zho>")
422
+ return transcription, translation
423
+
424
+ zh_button.click(
425
+ fn=transcribe_chinese_with_variant,
426
+ inputs=[zh_input, chinese_variant],
427
+ outputs=[zh_output, zh_translation]
428
+ )
429
 
430
  # Update feedback submission to include variant
431
  def submit_zh_feedback(transcription, rating, audio_path, variant):
 
444
  jp_button = gr.Button("Transcribe Speech")
445
  with gr.Column():
446
  jp_output = gr.Textbox(label="Speech Transcription")
447
+ jp_translation = gr.Textbox(label="English Translation")
448
 
449
  # Add feedback components
450
  with gr.Row():
 
460
  inputs=jp_input
461
  )
462
 
463
+ def transcribe_and_translate_japanese(audio_file):
464
+ transcription = transcribe_japanese(audio_file)
465
+ translation = translate_to_english(audio_file, "<jpn>")
466
+ return transcription, translation
467
+
468
+ jp_button.click(
469
+ fn=transcribe_and_translate_japanese,
470
+ inputs=jp_input,
471
+ outputs=[jp_output, jp_translation]
472
+ )
473
 
474
  # Add feedback submission
475
  def submit_jp_feedback(transcription, rating, audio_path):
 
488
  kr_button = gr.Button("Transcribe Speech")
489
  with gr.Column():
490
  kr_output = gr.Textbox(label="Speech Transcription")
491
+ kr_translation = gr.Textbox(label="English Translation")
492
 
493
  # Add feedback components
494
  with gr.Row():
 
504
  inputs=kr_input
505
  )
506
 
507
+ def transcribe_and_translate_korean(audio_file):
508
+ transcription = transcribe_korean(audio_file)
509
+ translation = translate_to_english(audio_file, "<kor>")
510
+ return transcription, translation
511
+
512
+ kr_button.click(
513
+ fn=transcribe_and_translate_korean,
514
+ inputs=kr_input,
515
+ outputs=[kr_output, kr_translation]
516
+ )
517
 
518
  # Add feedback submission
519
  def submit_kr_feedback(transcription, rating, audio_path):
 
532
  th_button = gr.Button("Transcribe Speech")
533
  with gr.Column():
534
  th_output = gr.Textbox(label="Speech Transcription")
535
+ th_translation = gr.Textbox(label="English Translation")
536
 
537
  # Add feedback components
538
  with gr.Row():
 
548
  inputs=th_input
549
  )
550
 
551
+ def transcribe_and_translate_thai(audio_file):
552
+ transcription = transcribe_thai(audio_file)
553
+ translation = translate_to_english(audio_file, "<tha>")
554
+ return transcription, translation
555
+
556
+ th_button.click(
557
+ fn=transcribe_and_translate_thai,
558
+ inputs=th_input,
559
+ outputs=[th_output, th_translation]
560
+ )
561
 
562
  # Add feedback submission
563
  def submit_th_feedback(transcription, rating, audio_path):
 
576
  it_button = gr.Button("Transcribe Speech")
577
  with gr.Column():
578
  it_output = gr.Textbox(label="Speech Transcription")
579
+ it_translation = gr.Textbox(label="English Translation")
580
 
581
  # Add feedback components
582
  with gr.Row():
 
592
  inputs=it_input
593
  )
594
 
595
+ def transcribe_and_translate_italian(audio_file):
596
+ transcription = transcribe_italian(audio_file)
597
+ translation = translate_to_english(audio_file, "<ita>")
598
+ return transcription, translation
599
+
600
+ it_button.click(
601
+ fn=transcribe_and_translate_italian,
602
+ inputs=it_input,
603
+ outputs=[it_output, it_translation]
604
+ )
605
 
606
  # Add feedback submission
607
  def submit_it_feedback(transcription, rating, audio_path):
 
620
  de_button = gr.Button("Transcribe Speech")
621
  with gr.Column():
622
  de_output = gr.Textbox(label="Speech Transcription")
623
+ de_translation = gr.Textbox(label="English Translation")
624
 
625
  # Add feedback components
626
  with gr.Row():
 
636
  inputs=de_input
637
  )
638
 
639
+ def transcribe_and_translate_german(audio_file):
640
+ transcription = transcribe_german(audio_file)
641
+ translation = translate_to_english(audio_file, "<deu>")
642
+ return transcription, translation
643
+
644
+ de_button.click(
645
+ fn=transcribe_and_translate_german,
646
+ inputs=de_input,
647
+ outputs=[de_output, de_translation]
648
+ )
649
 
650
  # Add feedback submission
651
  def submit_de_feedback(transcription, rating, audio_path):