Spaces:
Runtime error
Runtime error
Revert "last try to fix it"
Browse filesThis reverts commit c4dfa25a3ab77abdad19c048916ca0eb11509801.
app.py
CHANGED
@@ -131,21 +131,45 @@ def transcribe_audio(audio_file, language):
|
|
131 |
# Perform ASR
|
132 |
text, *_ = model(speech)[0]
|
133 |
|
134 |
-
# Also get translation to English if not already in English
|
135 |
-
translation = ""
|
136 |
-
if language != "<eng>":
|
137 |
-
# Set task to speech translation to English
|
138 |
-
model.task_sym = "<st_en>"
|
139 |
-
# Keep the source language the same
|
140 |
-
|
141 |
-
# Perform speech translation
|
142 |
-
translation, *_ = model(speech)[0]
|
143 |
-
|
144 |
# Clean up temporary file if created
|
145 |
if isinstance(audio_file, str) and audio_file.startswith(tempfile.gettempdir()):
|
146 |
os.unlink(audio_file)
|
|
|
|
|
147 |
|
148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
# Function to handle English transcription
|
151 |
def transcribe_english(audio_file):
|
@@ -160,22 +184,22 @@ def transcribe_chinese(audio_file, chinese_variant="Traditional"):
|
|
160 |
audio_file: Path to the audio file
|
161 |
chinese_variant: Either "Simplified" or "Traditional"
|
162 |
"""
|
163 |
-
# First get the base transcription
|
164 |
-
|
165 |
|
166 |
# Convert between simplified and traditional Chinese if needed
|
167 |
if chinese_variant == "Traditional":
|
168 |
# Convert simplified to traditional
|
169 |
# Use s2t for more complete conversion from Simplified to Traditional
|
170 |
cc = OpenCC('s2t') # s2t
|
171 |
-
|
172 |
-
elif chinese_variant == "Simplified" and not
|
173 |
# If the text contains non-ASCII characters, it might be traditional
|
174 |
# Convert traditional to simplified just to be safe
|
175 |
cc = OpenCC('t2s') # t2s: Traditional to Simplified
|
176 |
-
|
177 |
|
178 |
-
return
|
179 |
|
180 |
# Function to handle Japanese transcription
|
181 |
def transcribe_japanese(audio_file):
|
@@ -285,14 +309,14 @@ with demo:
|
|
285 |
|
286 |
# Special handling for Chinese with variant selection
|
287 |
if lang == "Mandarin" and chinese_variant:
|
288 |
-
transcription
|
289 |
else:
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
|
297 |
return transcription, translation, gr.update(visible=(lang != "English"))
|
298 |
|
@@ -389,7 +413,9 @@ with demo:
|
|
389 |
|
390 |
# Update the click function to include the Chinese variant and translation
|
391 |
def transcribe_chinese_with_variant(audio_file, variant):
|
392 |
-
|
|
|
|
|
393 |
|
394 |
zh_button.click(
|
395 |
fn=transcribe_chinese_with_variant,
|
@@ -431,7 +457,8 @@ with demo:
|
|
431 |
)
|
432 |
|
433 |
def transcribe_and_translate_japanese(audio_file):
|
434 |
-
transcription
|
|
|
435 |
return transcription, translation
|
436 |
|
437 |
jp_button.click(
|
@@ -474,8 +501,8 @@ with demo:
|
|
474 |
)
|
475 |
|
476 |
def transcribe_and_translate_korean(audio_file):
|
477 |
-
transcription
|
478 |
-
|
479 |
return transcription, translation
|
480 |
|
481 |
kr_button.click(
|
@@ -518,7 +545,8 @@ with demo:
|
|
518 |
)
|
519 |
|
520 |
def transcribe_and_translate_thai(audio_file):
|
521 |
-
transcription
|
|
|
522 |
return transcription, translation
|
523 |
|
524 |
th_button.click(
|
@@ -561,7 +589,8 @@ with demo:
|
|
561 |
)
|
562 |
|
563 |
def transcribe_and_translate_italian(audio_file):
|
564 |
-
transcription
|
|
|
565 |
return transcription, translation
|
566 |
|
567 |
it_button.click(
|
@@ -604,7 +633,8 @@ with demo:
|
|
604 |
)
|
605 |
|
606 |
def transcribe_and_translate_german(audio_file):
|
607 |
-
transcription
|
|
|
608 |
return transcription, translation
|
609 |
|
610 |
de_button.click(
|
|
|
131 |
# Perform ASR
|
132 |
text, *_ = model(speech)[0]
|
133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
# Clean up temporary file if created
|
135 |
if isinstance(audio_file, str) and audio_file.startswith(tempfile.gettempdir()):
|
136 |
os.unlink(audio_file)
|
137 |
+
|
138 |
+
return text
|
139 |
|
140 |
+
# New function for speech translation to English
|
141 |
+
def translate_to_english(audio_file, source_language):
|
142 |
+
"""Process the audio file and return the English translation"""
|
143 |
+
if audio_file is None:
|
144 |
+
return "Please upload an audio file or record audio."
|
145 |
+
|
146 |
+
# If audio is a tuple (from microphone recording)
|
147 |
+
if isinstance(audio_file, tuple):
|
148 |
+
sr, audio_data = audio_file
|
149 |
+
# Create a temporary file to save the audio
|
150 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
|
151 |
+
temp_path = temp_audio.name
|
152 |
+
sf.write(temp_path, audio_data, sr)
|
153 |
+
audio_file = temp_path
|
154 |
+
|
155 |
+
# Load and resample the audio file to 16kHz
|
156 |
+
speech, _ = librosa.load(audio_file, sr=16000)
|
157 |
+
|
158 |
+
# Set task to speech translation to English
|
159 |
+
model.task_sym = "<st_en>"
|
160 |
+
|
161 |
+
# Set source language
|
162 |
+
if source_language != None:
|
163 |
+
model.lang_sym = source_language
|
164 |
+
|
165 |
+
# Perform speech translation
|
166 |
+
translation, *_ = model(speech)[0]
|
167 |
+
|
168 |
+
# Clean up temporary file if created
|
169 |
+
if isinstance(audio_file, str) and audio_file.startswith(tempfile.gettempdir()):
|
170 |
+
os.unlink(audio_file)
|
171 |
+
|
172 |
+
return translation
|
173 |
|
174 |
# Function to handle English transcription
|
175 |
def transcribe_english(audio_file):
|
|
|
184 |
audio_file: Path to the audio file
|
185 |
chinese_variant: Either "Simplified" or "Traditional"
|
186 |
"""
|
187 |
+
# First get the base transcription
|
188 |
+
asr_text = transcribe_audio(audio_file, "<zho>")
|
189 |
|
190 |
# Convert between simplified and traditional Chinese if needed
|
191 |
if chinese_variant == "Traditional":
|
192 |
# Convert simplified to traditional
|
193 |
# Use s2t for more complete conversion from Simplified to Traditional
|
194 |
cc = OpenCC('s2t') # s2t
|
195 |
+
asr_text = cc.convert(asr_text)
|
196 |
+
elif chinese_variant == "Simplified" and not asr_text.isascii():
|
197 |
# If the text contains non-ASCII characters, it might be traditional
|
198 |
# Convert traditional to simplified just to be safe
|
199 |
cc = OpenCC('t2s') # t2s: Traditional to Simplified
|
200 |
+
asr_text = cc.convert(asr_text)
|
201 |
|
202 |
+
return asr_text
|
203 |
|
204 |
# Function to handle Japanese transcription
|
205 |
def transcribe_japanese(audio_file):
|
|
|
309 |
|
310 |
# Special handling for Chinese with variant selection
|
311 |
if lang == "Mandarin" and chinese_variant:
|
312 |
+
transcription = transcribe_chinese(audio, chinese_variant)
|
313 |
else:
|
314 |
+
transcription = transcribe_audio(audio, lang_map.get(lang, "<eng>"))
|
315 |
+
|
316 |
+
# Get translation if not English
|
317 |
+
translation = ""
|
318 |
+
if lang != "English":
|
319 |
+
translation = translate_to_english(audio, lang_map.get(lang, "<eng>"))
|
320 |
|
321 |
return transcription, translation, gr.update(visible=(lang != "English"))
|
322 |
|
|
|
413 |
|
414 |
# Update the click function to include the Chinese variant and translation
|
415 |
def transcribe_chinese_with_variant(audio_file, variant):
|
416 |
+
transcription = transcribe_chinese(audio_file, variant)
|
417 |
+
translation = translate_to_english(audio_file, "<zho>")
|
418 |
+
return transcription, translation
|
419 |
|
420 |
zh_button.click(
|
421 |
fn=transcribe_chinese_with_variant,
|
|
|
457 |
)
|
458 |
|
459 |
def transcribe_and_translate_japanese(audio_file):
|
460 |
+
transcription = transcribe_japanese(audio_file)
|
461 |
+
translation = translate_to_english(audio_file, "<jpn>")
|
462 |
return transcription, translation
|
463 |
|
464 |
jp_button.click(
|
|
|
501 |
)
|
502 |
|
503 |
def transcribe_and_translate_korean(audio_file):
|
504 |
+
transcription = transcribe_korean(audio_file)
|
505 |
+
translation = translate_to_english(audio_file, "<kor>")
|
506 |
return transcription, translation
|
507 |
|
508 |
kr_button.click(
|
|
|
545 |
)
|
546 |
|
547 |
def transcribe_and_translate_thai(audio_file):
|
548 |
+
transcription = transcribe_thai(audio_file)
|
549 |
+
translation = translate_to_english(audio_file, "<tha>")
|
550 |
return transcription, translation
|
551 |
|
552 |
th_button.click(
|
|
|
589 |
)
|
590 |
|
591 |
def transcribe_and_translate_italian(audio_file):
|
592 |
+
transcription = transcribe_italian(audio_file)
|
593 |
+
translation = translate_to_english(audio_file, "<ita>")
|
594 |
return transcription, translation
|
595 |
|
596 |
it_button.click(
|
|
|
633 |
)
|
634 |
|
635 |
def transcribe_and_translate_german(audio_file):
|
636 |
+
transcription = transcribe_german(audio_file)
|
637 |
+
translation = translate_to_english(audio_file, "<deu>")
|
638 |
return transcription, translation
|
639 |
|
640 |
de_button.click(
|