Spaces:

thianfoo
/

GenAI_StoryTeller

Sleeping

thianfoo commited on Nov 4, 2024

Commit

a8f1469

verified ·

1 Parent(s): 1e07b45

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -49,7 +49,7 @@ MAX_IMAGE_SIZE = 1024
 # Speech GenAI
 # Function for translating different language using pretrained models
 def translate(audio):
-    outputs = asr_pipe(audio, max_new_tokens=256, generate_kwargs={"task": "translate"})
     return outputs["text"]
 # Function to synthesise the text using the processor above
@@ -62,13 +62,13 @@ def synthesise(text):
 def speech_to_speech_translation(audio):
     translated_text = translate(audio)
     synthesised_speech = synthesise(translated_text)
-    synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
     return 16000, synthesised_speech
 # Function for text to speech
 def text_to_speech(text):
     synthesised_speech = synthesise(text)
-    synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
     return 16000, synthesised_speech
 # Image GenAI

 # Speech GenAI
 # Function for translating different language using pretrained models
 def translate(audio):
+    outputs = asr_pipe(input_features=audio, max_new_tokens=256, generate_kwargs={"task": "translate"})
     return outputs["text"]
 # Function to synthesise the text using the processor above
 def speech_to_speech_translation(audio):
     translated_text = translate(audio)
     synthesised_speech = synthesise(translated_text)
+    synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)  # Ensure int16 format
     return 16000, synthesised_speech
 # Function for text to speech
 def text_to_speech(text):
     synthesised_speech = synthesise(text)
+    synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)  # Ensure int16 format
     return 16000, synthesised_speech
 # Image GenAI