huckiyang commited on
Commit
c4dfa25
·
1 Parent(s): afe4a7c

last try to fix it

Browse files
Files changed (1) hide show
  1. app.py +31 -61
app.py CHANGED
@@ -131,45 +131,21 @@ def transcribe_audio(audio_file, language):
131
  # Perform ASR
132
  text, *_ = model(speech)[0]
133
 
134
- # Clean up temporary file if created
135
- if isinstance(audio_file, str) and audio_file.startswith(tempfile.gettempdir()):
136
- os.unlink(audio_file)
137
-
138
- return text
139
-
140
- # New function for speech translation to English
141
- def translate_to_english(audio_file, source_language):
142
- """Process the audio file and return the English translation"""
143
- if audio_file is None:
144
- return "Please upload an audio file or record audio."
145
-
146
- # If audio is a tuple (from microphone recording)
147
- if isinstance(audio_file, tuple):
148
- sr, audio_data = audio_file
149
- # Create a temporary file to save the audio
150
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
151
- temp_path = temp_audio.name
152
- sf.write(temp_path, audio_data, sr)
153
- audio_file = temp_path
154
-
155
- # Load and resample the audio file to 16kHz
156
- speech, _ = librosa.load(audio_file, sr=16000)
157
-
158
- # Set task to speech translation to English
159
- model.task_sym = "<st_en>"
160
-
161
- # Set source language
162
- if source_language != None:
163
- model.lang_sym = source_language
164
-
165
- # Perform speech translation
166
- translation, *_ = model(speech)[0]
167
 
168
  # Clean up temporary file if created
169
  if isinstance(audio_file, str) and audio_file.startswith(tempfile.gettempdir()):
170
  os.unlink(audio_file)
171
-
172
- return translation
173
 
174
  # Function to handle English transcription
175
  def transcribe_english(audio_file):
@@ -184,22 +160,22 @@ def transcribe_chinese(audio_file, chinese_variant="Traditional"):
184
  audio_file: Path to the audio file
185
  chinese_variant: Either "Simplified" or "Traditional"
186
  """
187
- # First get the base transcription
188
- asr_text = transcribe_audio(audio_file, "<zho>")
189
 
190
  # Convert between simplified and traditional Chinese if needed
191
  if chinese_variant == "Traditional":
192
  # Convert simplified to traditional
193
  # Use s2t for more complete conversion from Simplified to Traditional
194
  cc = OpenCC('s2t') # s2t
195
- asr_text = cc.convert(asr_text)
196
- elif chinese_variant == "Simplified" and not asr_text.isascii():
197
  # If the text contains non-ASCII characters, it might be traditional
198
  # Convert traditional to simplified just to be safe
199
  cc = OpenCC('t2s') # t2s: Traditional to Simplified
200
- asr_text = cc.convert(asr_text)
201
 
202
- return asr_text
203
 
204
  # Function to handle Japanese transcription
205
  def transcribe_japanese(audio_file):
@@ -309,14 +285,14 @@ with demo:
309
 
310
  # Special handling for Chinese with variant selection
311
  if lang == "Mandarin" and chinese_variant:
312
- transcription = transcribe_chinese(audio, chinese_variant)
313
  else:
314
- transcription = transcribe_audio(audio, lang_map.get(lang, "<eng>"))
315
-
316
- # Get translation if not English
317
- translation = ""
318
- if lang != "English":
319
- translation = translate_to_english(audio, lang_map.get(lang, "<eng>"))
320
 
321
  return transcription, translation, gr.update(visible=(lang != "English"))
322
 
@@ -413,9 +389,7 @@ with demo:
413
 
414
  # Update the click function to include the Chinese variant and translation
415
  def transcribe_chinese_with_variant(audio_file, variant):
416
- transcription = transcribe_chinese(audio_file, variant)
417
- translation = translate_to_english(audio_file, "<zho>")
418
- return transcription, translation
419
 
420
  zh_button.click(
421
  fn=transcribe_chinese_with_variant,
@@ -457,8 +431,7 @@ with demo:
457
  )
458
 
459
  def transcribe_and_translate_japanese(audio_file):
460
- transcription = transcribe_japanese(audio_file)
461
- translation = translate_to_english(audio_file, "<jpn>")
462
  return transcription, translation
463
 
464
  jp_button.click(
@@ -501,8 +474,8 @@ with demo:
501
  )
502
 
503
  def transcribe_and_translate_korean(audio_file):
504
- transcription = transcribe_korean(audio_file)
505
- translation = translate_to_english(audio_file, "<kor>")
506
  return transcription, translation
507
 
508
  kr_button.click(
@@ -545,8 +518,7 @@ with demo:
545
  )
546
 
547
  def transcribe_and_translate_thai(audio_file):
548
- transcription = transcribe_thai(audio_file)
549
- translation = translate_to_english(audio_file, "<tha>")
550
  return transcription, translation
551
 
552
  th_button.click(
@@ -589,8 +561,7 @@ with demo:
589
  )
590
 
591
  def transcribe_and_translate_italian(audio_file):
592
- transcription = transcribe_italian(audio_file)
593
- translation = translate_to_english(audio_file, "<ita>")
594
  return transcription, translation
595
 
596
  it_button.click(
@@ -633,8 +604,7 @@ with demo:
633
  )
634
 
635
  def transcribe_and_translate_german(audio_file):
636
- transcription = transcribe_german(audio_file)
637
- translation = translate_to_english(audio_file, "<deu>")
638
  return transcription, translation
639
 
640
  de_button.click(
 
131
  # Perform ASR
132
  text, *_ = model(speech)[0]
133
 
134
+ # Also get translation to English if not already in English
135
+ translation = ""
136
+ if language != "<eng>":
137
+ # Set task to speech translation to English
138
+ model.task_sym = "<st_en>"
139
+ # Keep the source language the same
140
+
141
+ # Perform speech translation
142
+ translation, *_ = model(speech)[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
  # Clean up temporary file if created
145
  if isinstance(audio_file, str) and audio_file.startswith(tempfile.gettempdir()):
146
  os.unlink(audio_file)
147
+
148
+ return text, translation
149
 
150
  # Function to handle English transcription
151
  def transcribe_english(audio_file):
 
160
  audio_file: Path to the audio file
161
  chinese_variant: Either "Simplified" or "Traditional"
162
  """
163
+ # First get the base transcription and translation
164
+ text, translation = transcribe_audio(audio_file, "<zho>")
165
 
166
  # Convert between simplified and traditional Chinese if needed
167
  if chinese_variant == "Traditional":
168
  # Convert simplified to traditional
169
  # Use s2t for more complete conversion from Simplified to Traditional
170
  cc = OpenCC('s2t') # s2t
171
+ text = cc.convert(text)
172
+ elif chinese_variant == "Simplified" and not text.isascii():
173
  # If the text contains non-ASCII characters, it might be traditional
174
  # Convert traditional to simplified just to be safe
175
  cc = OpenCC('t2s') # t2s: Traditional to Simplified
176
+ text = cc.convert(text)
177
 
178
+ return text, translation
179
 
180
  # Function to handle Japanese transcription
181
  def transcribe_japanese(audio_file):
 
285
 
286
  # Special handling for Chinese with variant selection
287
  if lang == "Mandarin" and chinese_variant:
288
+ transcription, translation = transcribe_chinese(audio, chinese_variant)
289
  else:
290
+ result = transcribe_audio(audio, lang_map.get(lang, "<eng>"))
291
+ if lang == "English":
292
+ transcription = result
293
+ translation = ""
294
+ else:
295
+ transcription, translation = result
296
 
297
  return transcription, translation, gr.update(visible=(lang != "English"))
298
 
 
389
 
390
  # Update the click function to include the Chinese variant and translation
391
  def transcribe_chinese_with_variant(audio_file, variant):
392
+ return transcribe_chinese(audio_file, variant)
 
 
393
 
394
  zh_button.click(
395
  fn=transcribe_chinese_with_variant,
 
431
  )
432
 
433
  def transcribe_and_translate_japanese(audio_file):
434
+ transcription, translation = transcribe_japanese(audio_file)
 
435
  return transcription, translation
436
 
437
  jp_button.click(
 
474
  )
475
 
476
  def transcribe_and_translate_korean(audio_file):
477
+ transcription, translation = transcribe_korean(audio_file)
478
+
479
  return transcription, translation
480
 
481
  kr_button.click(
 
518
  )
519
 
520
  def transcribe_and_translate_thai(audio_file):
521
+ transcription, translation = transcribe_thai(audio_file)
 
522
  return transcription, translation
523
 
524
  th_button.click(
 
561
  )
562
 
563
  def transcribe_and_translate_italian(audio_file):
564
+ transcription,translation = transcribe_italian(audio_file)
 
565
  return transcription, translation
566
 
567
  it_button.click(
 
604
  )
605
 
606
  def transcribe_and_translate_german(audio_file):
607
+ transcription, translation = transcribe_german(audio_file)
 
608
  return transcription, translation
609
 
610
  de_button.click(