speech_translation_integrate

Sleeping

App Files Files Community

jerrypan7 commited on Oct 7, 2024

Commit

df13cc5

verified ·

1 Parent(s): 47a5d48

Update app.py

Browse files

segment the translation

Files changed (1) hide show

app.py +25 -3

app.py CHANGED Viewed

@@ -96,6 +96,23 @@ def download_youtube_audio(youtube_url: str, output_dir: Optional[str] = None) -
         print("Error:", response.status_code, response.text)
         return None  # Return None on failure
 def inference_via_llm_api(input_text, min_new_tokens=2, max_new_tokens=64):
     print(input_text)
     one_vllm_input = f"<|im_start|>system\nYou are a translation expert.<|im_end|>\n<|im_start|>user\n{input_text}<|im_end|>\n<|im_start|>assistant"
@@ -142,10 +159,15 @@ def transcribe_and_speak(audio, source_lang, target_lang, youtube_url=None, targ
     else:
         return "ASR failed", None, None
-    translation_prompt = f"Translate the following text from {LANGUAGE_MAP[source_lang]} to {LANGUAGE_MAP[target_lang]}: {transcription}"
-    translated_text = inference_via_llm_api(translation_prompt)
-    print(f"Translation: {translated_text}")
     # TTS
     tts_params = {
         'language': target_lang,

         print("Error:", response.status_code, response.text)
         return None  # Return None on failure
+punctuation_marks = r'([\.!?！？。])'
+def split_text_with_punctuation(text):
+    # Split the text using the punctuation marks, keeping the punctuation marks
+    split_text = re.split(punctuation_marks, text)
+    # Combine each punctuation mark with the preceding segment
+    combined_segments = []
+    for i in range(0, len(split_text) - 1, 2):
+        combined_segments.append(split_text[i] + split_text[i + 1])
+    # If there's any remaining text after the last punctuation, append it as well
+    if len(split_text) % 2 != 0:
+        combined_segments.append(split_text[-1])
+    return combined_segments
 def inference_via_llm_api(input_text, min_new_tokens=2, max_new_tokens=64):
     print(input_text)
     one_vllm_input = f"<|im_start|>system\nYou are a translation expert.<|im_end|>\n<|im_start|>user\n{input_text}<|im_end|>\n<|im_start|>assistant"
     else:
         return "ASR failed", None, None
+    split_result = split_text_with_punctuation(transcription)
+    translate_segments=[]
+    for segment in split_result:
+        translation_prompt = f"Translate the following text from {LANGUAGE_MAP[source_lang]} to {LANGUAGE_MAP[target_lang]}: {segment}"
+        translated_seg_txt = inference_via_llm_api(translation_prompt)
+        translate_segments.append(translated_seg_txt)
+        print(f"Translation: {translated_seg_txt}")
+    translated_text = " ".join(translate_segments)
     # TTS
     tts_params = {
         'language': target_lang,