huckiyang commited on
Commit
d7c422e
·
1 Parent(s): c4dfa25

Revert "last try to fix it"

Browse files

This reverts commit c4dfa25a3ab77abdad19c048916ca0eb11509801.

Files changed (1) hide show
  1. app.py +61 -31
app.py CHANGED
@@ -131,21 +131,45 @@ def transcribe_audio(audio_file, language):
131
  # Perform ASR
132
  text, *_ = model(speech)[0]
133
 
134
- # Also get translation to English if not already in English
135
- translation = ""
136
- if language != "<eng>":
137
- # Set task to speech translation to English
138
- model.task_sym = "<st_en>"
139
- # Keep the source language the same
140
-
141
- # Perform speech translation
142
- translation, *_ = model(speech)[0]
143
-
144
  # Clean up temporary file if created
145
  if isinstance(audio_file, str) and audio_file.startswith(tempfile.gettempdir()):
146
  os.unlink(audio_file)
 
 
147
 
148
- return text, translation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
  # Function to handle English transcription
151
  def transcribe_english(audio_file):
@@ -160,22 +184,22 @@ def transcribe_chinese(audio_file, chinese_variant="Traditional"):
160
  audio_file: Path to the audio file
161
  chinese_variant: Either "Simplified" or "Traditional"
162
  """
163
- # First get the base transcription and translation
164
- text, translation = transcribe_audio(audio_file, "<zho>")
165
 
166
  # Convert between simplified and traditional Chinese if needed
167
  if chinese_variant == "Traditional":
168
  # Convert simplified to traditional
169
  # Use s2t for more complete conversion from Simplified to Traditional
170
  cc = OpenCC('s2t') # s2t
171
- text = cc.convert(text)
172
- elif chinese_variant == "Simplified" and not text.isascii():
173
  # If the text contains non-ASCII characters, it might be traditional
174
  # Convert traditional to simplified just to be safe
175
  cc = OpenCC('t2s') # t2s: Traditional to Simplified
176
- text = cc.convert(text)
177
 
178
- return text, translation
179
 
180
  # Function to handle Japanese transcription
181
  def transcribe_japanese(audio_file):
@@ -285,14 +309,14 @@ with demo:
285
 
286
  # Special handling for Chinese with variant selection
287
  if lang == "Mandarin" and chinese_variant:
288
- transcription, translation = transcribe_chinese(audio, chinese_variant)
289
  else:
290
- result = transcribe_audio(audio, lang_map.get(lang, "<eng>"))
291
- if lang == "English":
292
- transcription = result
293
- translation = ""
294
- else:
295
- transcription, translation = result
296
 
297
  return transcription, translation, gr.update(visible=(lang != "English"))
298
 
@@ -389,7 +413,9 @@ with demo:
389
 
390
  # Update the click function to include the Chinese variant and translation
391
  def transcribe_chinese_with_variant(audio_file, variant):
392
- return transcribe_chinese(audio_file, variant)
 
 
393
 
394
  zh_button.click(
395
  fn=transcribe_chinese_with_variant,
@@ -431,7 +457,8 @@ with demo:
431
  )
432
 
433
  def transcribe_and_translate_japanese(audio_file):
434
- transcription, translation = transcribe_japanese(audio_file)
 
435
  return transcription, translation
436
 
437
  jp_button.click(
@@ -474,8 +501,8 @@ with demo:
474
  )
475
 
476
  def transcribe_and_translate_korean(audio_file):
477
- transcription, translation = transcribe_korean(audio_file)
478
-
479
  return transcription, translation
480
 
481
  kr_button.click(
@@ -518,7 +545,8 @@ with demo:
518
  )
519
 
520
  def transcribe_and_translate_thai(audio_file):
521
- transcription, translation = transcribe_thai(audio_file)
 
522
  return transcription, translation
523
 
524
  th_button.click(
@@ -561,7 +589,8 @@ with demo:
561
  )
562
 
563
  def transcribe_and_translate_italian(audio_file):
564
- transcription,translation = transcribe_italian(audio_file)
 
565
  return transcription, translation
566
 
567
  it_button.click(
@@ -604,7 +633,8 @@ with demo:
604
  )
605
 
606
  def transcribe_and_translate_german(audio_file):
607
- transcription, translation = transcribe_german(audio_file)
 
608
  return transcription, translation
609
 
610
  de_button.click(
 
131
  # Perform ASR
132
  text, *_ = model(speech)[0]
133
 
 
 
 
 
 
 
 
 
 
 
134
  # Clean up temporary file if created
135
  if isinstance(audio_file, str) and audio_file.startswith(tempfile.gettempdir()):
136
  os.unlink(audio_file)
137
+
138
+ return text
139
 
140
+ # New function for speech translation to English
141
+ def translate_to_english(audio_file, source_language):
142
+ """Process the audio file and return the English translation"""
143
+ if audio_file is None:
144
+ return "Please upload an audio file or record audio."
145
+
146
+ # If audio is a tuple (from microphone recording)
147
+ if isinstance(audio_file, tuple):
148
+ sr, audio_data = audio_file
149
+ # Create a temporary file to save the audio
150
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
151
+ temp_path = temp_audio.name
152
+ sf.write(temp_path, audio_data, sr)
153
+ audio_file = temp_path
154
+
155
+ # Load and resample the audio file to 16kHz
156
+ speech, _ = librosa.load(audio_file, sr=16000)
157
+
158
+ # Set task to speech translation to English
159
+ model.task_sym = "<st_en>"
160
+
161
+ # Set source language
162
+ if source_language != None:
163
+ model.lang_sym = source_language
164
+
165
+ # Perform speech translation
166
+ translation, *_ = model(speech)[0]
167
+
168
+ # Clean up temporary file if created
169
+ if isinstance(audio_file, str) and audio_file.startswith(tempfile.gettempdir()):
170
+ os.unlink(audio_file)
171
+
172
+ return translation
173
 
174
  # Function to handle English transcription
175
  def transcribe_english(audio_file):
 
184
  audio_file: Path to the audio file
185
  chinese_variant: Either "Simplified" or "Traditional"
186
  """
187
+ # First get the base transcription
188
+ asr_text = transcribe_audio(audio_file, "<zho>")
189
 
190
  # Convert between simplified and traditional Chinese if needed
191
  if chinese_variant == "Traditional":
192
  # Convert simplified to traditional
193
  # Use s2t for more complete conversion from Simplified to Traditional
194
  cc = OpenCC('s2t') # s2t
195
+ asr_text = cc.convert(asr_text)
196
+ elif chinese_variant == "Simplified" and not asr_text.isascii():
197
  # If the text contains non-ASCII characters, it might be traditional
198
  # Convert traditional to simplified just to be safe
199
  cc = OpenCC('t2s') # t2s: Traditional to Simplified
200
+ asr_text = cc.convert(asr_text)
201
 
202
+ return asr_text
203
 
204
  # Function to handle Japanese transcription
205
  def transcribe_japanese(audio_file):
 
309
 
310
  # Special handling for Chinese with variant selection
311
  if lang == "Mandarin" and chinese_variant:
312
+ transcription = transcribe_chinese(audio, chinese_variant)
313
  else:
314
+ transcription = transcribe_audio(audio, lang_map.get(lang, "<eng>"))
315
+
316
+ # Get translation if not English
317
+ translation = ""
318
+ if lang != "English":
319
+ translation = translate_to_english(audio, lang_map.get(lang, "<eng>"))
320
 
321
  return transcription, translation, gr.update(visible=(lang != "English"))
322
 
 
413
 
414
  # Update the click function to include the Chinese variant and translation
415
  def transcribe_chinese_with_variant(audio_file, variant):
416
+ transcription = transcribe_chinese(audio_file, variant)
417
+ translation = translate_to_english(audio_file, "<zho>")
418
+ return transcription, translation
419
 
420
  zh_button.click(
421
  fn=transcribe_chinese_with_variant,
 
457
  )
458
 
459
  def transcribe_and_translate_japanese(audio_file):
460
+ transcription = transcribe_japanese(audio_file)
461
+ translation = translate_to_english(audio_file, "<jpn>")
462
  return transcription, translation
463
 
464
  jp_button.click(
 
501
  )
502
 
503
  def transcribe_and_translate_korean(audio_file):
504
+ transcription = transcribe_korean(audio_file)
505
+ translation = translate_to_english(audio_file, "<kor>")
506
  return transcription, translation
507
 
508
  kr_button.click(
 
545
  )
546
 
547
  def transcribe_and_translate_thai(audio_file):
548
+ transcription = transcribe_thai(audio_file)
549
+ translation = translate_to_english(audio_file, "<tha>")
550
  return transcription, translation
551
 
552
  th_button.click(
 
589
  )
590
 
591
  def transcribe_and_translate_italian(audio_file):
592
+ transcription = transcribe_italian(audio_file)
593
+ translation = translate_to_english(audio_file, "<ita>")
594
  return transcription, translation
595
 
596
  it_button.click(
 
633
  )
634
 
635
  def transcribe_and_translate_german(audio_file):
636
+ transcription = transcribe_german(audio_file)
637
+ translation = translate_to_english(audio_file, "<deu>")
638
  return transcription, translation
639
 
640
  de_button.click(