Spaces:
Runtime error
Runtime error
last try to fix it
Browse files
app.py
CHANGED
@@ -131,45 +131,21 @@ def transcribe_audio(audio_file, language):
|
|
131 |
# Perform ASR
|
132 |
text, *_ = model(speech)[0]
|
133 |
|
134 |
-
#
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
if audio_file is None:
|
144 |
-
return "Please upload an audio file or record audio."
|
145 |
-
|
146 |
-
# If audio is a tuple (from microphone recording)
|
147 |
-
if isinstance(audio_file, tuple):
|
148 |
-
sr, audio_data = audio_file
|
149 |
-
# Create a temporary file to save the audio
|
150 |
-
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
|
151 |
-
temp_path = temp_audio.name
|
152 |
-
sf.write(temp_path, audio_data, sr)
|
153 |
-
audio_file = temp_path
|
154 |
-
|
155 |
-
# Load and resample the audio file to 16kHz
|
156 |
-
speech, _ = librosa.load(audio_file, sr=16000)
|
157 |
-
|
158 |
-
# Set task to speech translation to English
|
159 |
-
model.task_sym = "<st_en>"
|
160 |
-
|
161 |
-
# Set source language
|
162 |
-
if source_language != None:
|
163 |
-
model.lang_sym = source_language
|
164 |
-
|
165 |
-
# Perform speech translation
|
166 |
-
translation, *_ = model(speech)[0]
|
167 |
|
168 |
# Clean up temporary file if created
|
169 |
if isinstance(audio_file, str) and audio_file.startswith(tempfile.gettempdir()):
|
170 |
os.unlink(audio_file)
|
171 |
-
|
172 |
-
return translation
|
173 |
|
174 |
# Function to handle English transcription
|
175 |
def transcribe_english(audio_file):
|
@@ -184,22 +160,22 @@ def transcribe_chinese(audio_file, chinese_variant="Traditional"):
|
|
184 |
audio_file: Path to the audio file
|
185 |
chinese_variant: Either "Simplified" or "Traditional"
|
186 |
"""
|
187 |
-
# First get the base transcription
|
188 |
-
|
189 |
|
190 |
# Convert between simplified and traditional Chinese if needed
|
191 |
if chinese_variant == "Traditional":
|
192 |
# Convert simplified to traditional
|
193 |
# Use s2t for more complete conversion from Simplified to Traditional
|
194 |
cc = OpenCC('s2t') # s2t
|
195 |
-
|
196 |
-
elif chinese_variant == "Simplified" and not
|
197 |
# If the text contains non-ASCII characters, it might be traditional
|
198 |
# Convert traditional to simplified just to be safe
|
199 |
cc = OpenCC('t2s') # t2s: Traditional to Simplified
|
200 |
-
|
201 |
|
202 |
-
return
|
203 |
|
204 |
# Function to handle Japanese transcription
|
205 |
def transcribe_japanese(audio_file):
|
@@ -309,14 +285,14 @@ with demo:
|
|
309 |
|
310 |
# Special handling for Chinese with variant selection
|
311 |
if lang == "Mandarin" and chinese_variant:
|
312 |
-
transcription = transcribe_chinese(audio, chinese_variant)
|
313 |
else:
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
|
321 |
return transcription, translation, gr.update(visible=(lang != "English"))
|
322 |
|
@@ -413,9 +389,7 @@ with demo:
|
|
413 |
|
414 |
# Update the click function to include the Chinese variant and translation
|
415 |
def transcribe_chinese_with_variant(audio_file, variant):
|
416 |
-
|
417 |
-
translation = translate_to_english(audio_file, "<zho>")
|
418 |
-
return transcription, translation
|
419 |
|
420 |
zh_button.click(
|
421 |
fn=transcribe_chinese_with_variant,
|
@@ -457,8 +431,7 @@ with demo:
|
|
457 |
)
|
458 |
|
459 |
def transcribe_and_translate_japanese(audio_file):
|
460 |
-
transcription = transcribe_japanese(audio_file)
|
461 |
-
translation = translate_to_english(audio_file, "<jpn>")
|
462 |
return transcription, translation
|
463 |
|
464 |
jp_button.click(
|
@@ -501,8 +474,8 @@ with demo:
|
|
501 |
)
|
502 |
|
503 |
def transcribe_and_translate_korean(audio_file):
|
504 |
-
transcription = transcribe_korean(audio_file)
|
505 |
-
|
506 |
return transcription, translation
|
507 |
|
508 |
kr_button.click(
|
@@ -545,8 +518,7 @@ with demo:
|
|
545 |
)
|
546 |
|
547 |
def transcribe_and_translate_thai(audio_file):
|
548 |
-
transcription = transcribe_thai(audio_file)
|
549 |
-
translation = translate_to_english(audio_file, "<tha>")
|
550 |
return transcription, translation
|
551 |
|
552 |
th_button.click(
|
@@ -589,8 +561,7 @@ with demo:
|
|
589 |
)
|
590 |
|
591 |
def transcribe_and_translate_italian(audio_file):
|
592 |
-
transcription = transcribe_italian(audio_file)
|
593 |
-
translation = translate_to_english(audio_file, "<ita>")
|
594 |
return transcription, translation
|
595 |
|
596 |
it_button.click(
|
@@ -633,8 +604,7 @@ with demo:
|
|
633 |
)
|
634 |
|
635 |
def transcribe_and_translate_german(audio_file):
|
636 |
-
transcription = transcribe_german(audio_file)
|
637 |
-
translation = translate_to_english(audio_file, "<deu>")
|
638 |
return transcription, translation
|
639 |
|
640 |
de_button.click(
|
|
|
131 |
# Perform ASR
|
132 |
text, *_ = model(speech)[0]
|
133 |
|
134 |
+
# Also get translation to English if not already in English
|
135 |
+
translation = ""
|
136 |
+
if language != "<eng>":
|
137 |
+
# Set task to speech translation to English
|
138 |
+
model.task_sym = "<st_en>"
|
139 |
+
# Keep the source language the same
|
140 |
+
|
141 |
+
# Perform speech translation
|
142 |
+
translation, *_ = model(speech)[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
|
144 |
# Clean up temporary file if created
|
145 |
if isinstance(audio_file, str) and audio_file.startswith(tempfile.gettempdir()):
|
146 |
os.unlink(audio_file)
|
147 |
+
|
148 |
+
return text, translation
|
149 |
|
150 |
# Function to handle English transcription
|
151 |
def transcribe_english(audio_file):
|
|
|
160 |
audio_file: Path to the audio file
|
161 |
chinese_variant: Either "Simplified" or "Traditional"
|
162 |
"""
|
163 |
+
# First get the base transcription and translation
|
164 |
+
text, translation = transcribe_audio(audio_file, "<zho>")
|
165 |
|
166 |
# Convert between simplified and traditional Chinese if needed
|
167 |
if chinese_variant == "Traditional":
|
168 |
# Convert simplified to traditional
|
169 |
# Use s2t for more complete conversion from Simplified to Traditional
|
170 |
cc = OpenCC('s2t') # s2t
|
171 |
+
text = cc.convert(text)
|
172 |
+
elif chinese_variant == "Simplified" and not text.isascii():
|
173 |
# If the text contains non-ASCII characters, it might be traditional
|
174 |
# Convert traditional to simplified just to be safe
|
175 |
cc = OpenCC('t2s') # t2s: Traditional to Simplified
|
176 |
+
text = cc.convert(text)
|
177 |
|
178 |
+
return text, translation
|
179 |
|
180 |
# Function to handle Japanese transcription
|
181 |
def transcribe_japanese(audio_file):
|
|
|
285 |
|
286 |
# Special handling for Chinese with variant selection
|
287 |
if lang == "Mandarin" and chinese_variant:
|
288 |
+
transcription, translation = transcribe_chinese(audio, chinese_variant)
|
289 |
else:
|
290 |
+
result = transcribe_audio(audio, lang_map.get(lang, "<eng>"))
|
291 |
+
if lang == "English":
|
292 |
+
transcription = result
|
293 |
+
translation = ""
|
294 |
+
else:
|
295 |
+
transcription, translation = result
|
296 |
|
297 |
return transcription, translation, gr.update(visible=(lang != "English"))
|
298 |
|
|
|
389 |
|
390 |
# Update the click function to include the Chinese variant and translation
|
391 |
def transcribe_chinese_with_variant(audio_file, variant):
|
392 |
+
return transcribe_chinese(audio_file, variant)
|
|
|
|
|
393 |
|
394 |
zh_button.click(
|
395 |
fn=transcribe_chinese_with_variant,
|
|
|
431 |
)
|
432 |
|
433 |
def transcribe_and_translate_japanese(audio_file):
|
434 |
+
transcription, translation = transcribe_japanese(audio_file)
|
|
|
435 |
return transcription, translation
|
436 |
|
437 |
jp_button.click(
|
|
|
474 |
)
|
475 |
|
476 |
def transcribe_and_translate_korean(audio_file):
|
477 |
+
transcription, translation = transcribe_korean(audio_file)
|
478 |
+
|
479 |
return transcription, translation
|
480 |
|
481 |
kr_button.click(
|
|
|
518 |
)
|
519 |
|
520 |
def transcribe_and_translate_thai(audio_file):
|
521 |
+
transcription, translation = transcribe_thai(audio_file)
|
|
|
522 |
return transcription, translation
|
523 |
|
524 |
th_button.click(
|
|
|
561 |
)
|
562 |
|
563 |
def transcribe_and_translate_italian(audio_file):
|
564 |
+
transcription,translation = transcribe_italian(audio_file)
|
|
|
565 |
return transcription, translation
|
566 |
|
567 |
it_button.click(
|
|
|
604 |
)
|
605 |
|
606 |
def transcribe_and_translate_german(audio_file):
|
607 |
+
transcription, translation = transcribe_german(audio_file)
|
|
|
608 |
return transcription, translation
|
609 |
|
610 |
de_button.click(
|