Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -49,7 +49,7 @@ MAX_IMAGE_SIZE = 1024
|
|
49 |
# Speech GenAI
|
50 |
# Function for translating different language using pretrained models
|
51 |
def translate(audio):
|
52 |
-
outputs = asr_pipe(audio, max_new_tokens=256, generate_kwargs={"task": "translate"})
|
53 |
return outputs["text"]
|
54 |
|
55 |
# Function to synthesise the text using the processor above
|
@@ -62,13 +62,13 @@ def synthesise(text):
|
|
62 |
def speech_to_speech_translation(audio):
|
63 |
translated_text = translate(audio)
|
64 |
synthesised_speech = synthesise(translated_text)
|
65 |
-
synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
|
66 |
return 16000, synthesised_speech
|
67 |
|
68 |
# Function for text to speech
|
69 |
def text_to_speech(text):
|
70 |
synthesised_speech = synthesise(text)
|
71 |
-
synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
|
72 |
return 16000, synthesised_speech
|
73 |
|
74 |
# Image GenAI
|
|
|
49 |
# Speech GenAI
|
50 |
# Function for translating different language using pretrained models
|
51 |
def translate(audio):
|
52 |
+
outputs = asr_pipe(input_features=audio, max_new_tokens=256, generate_kwargs={"task": "translate"})
|
53 |
return outputs["text"]
|
54 |
|
55 |
# Function to synthesise the text using the processor above
|
|
|
62 |
def speech_to_speech_translation(audio):
|
63 |
translated_text = translate(audio)
|
64 |
synthesised_speech = synthesise(translated_text)
|
65 |
+
synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16) # Ensure int16 format
|
66 |
return 16000, synthesised_speech
|
67 |
|
68 |
# Function for text to speech
|
69 |
def text_to_speech(text):
|
70 |
synthesised_speech = synthesise(text)
|
71 |
+
synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16) # Ensure int16 format
|
72 |
return 16000, synthesised_speech
|
73 |
|
74 |
# Image GenAI
|