Spaces:
Sleeping
Sleeping
Manu
commited on
Commit
·
4c84c16
1
Parent(s):
91ce86e
numpy audio
Browse files
app.py
CHANGED
@@ -7,6 +7,11 @@ from datasets import load_dataset
|
|
7 |
import soundfile as sf
|
8 |
import torch
|
9 |
import os
|
|
|
|
|
|
|
|
|
|
|
10 |
os.environ['TRANSFORMERS_CACHE'] = '.cache'
|
11 |
|
12 |
print ("----- setting up pipeline -----")
|
@@ -30,17 +35,44 @@ print ("----- synthetizing audio -----")
|
|
30 |
def greet(name):
|
31 |
return "Hello " + name + "!!"
|
32 |
|
|
|
33 |
def synthesise_audio(text, forward_params=None):
|
|
|
|
|
|
|
34 |
speech = synthesiser(text, forward_params={"speaker_embeddings": speaker_embedding})
|
35 |
-
sf.write("speech.wav", speech["audio"], samplerate=speech["sampling_rate"])
|
36 |
-
return "speech.wav"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
#demo = gr.Interface(fn=greet, inputs="text", outputs="text", description="----- TTS Testing -----")
|
39 |
|
|
|
|
|
|
|
40 |
demo = gr.Interface(fn=synthesise_audio,
|
41 |
-
inputs=
|
42 |
-
outputs="audio",
|
|
|
43 |
description="----- manuai Text To Speech generator -----",
|
44 |
allow_flagging = False)
|
45 |
|
46 |
-
demo.launch()
|
|
|
7 |
import soundfile as sf
|
8 |
import torch
|
9 |
import os
|
10 |
+
import io
|
11 |
+
import base64
|
12 |
+
import numpy as np
|
13 |
+
from pydub import AudioSegment
|
14 |
+
|
15 |
os.environ['TRANSFORMERS_CACHE'] = '.cache'
|
16 |
|
17 |
print ("----- setting up pipeline -----")
|
|
|
35 |
def greet(name):
|
36 |
return "Hello " + name + "!!"
|
37 |
|
38 |
+
|
39 |
def synthesise_audio(text, forward_params=None):
|
40 |
+
if len(text) > 100:
|
41 |
+
raise ValueError("Error: El texto es demasiado largo. Por favor, limita tu entrada a 100 caracteres.")
|
42 |
+
|
43 |
speech = synthesiser(text, forward_params={"speaker_embeddings": speaker_embedding})
|
44 |
+
# sf.write("speech.wav", speech["audio"], samplerate=speech["sampling_rate"])
|
45 |
+
# return "speech.wav"
|
46 |
+
|
47 |
+
# Convert numpy array to audio
|
48 |
+
#with io.BytesIO() as f:
|
49 |
+
# sf.write(f, speech["audio"], samplerate=speech["sampling_rate"], format='wav')
|
50 |
+
# audio = f.getvalue()
|
51 |
+
|
52 |
+
# Convert numpy array to audio
|
53 |
+
audio = np.int16(speech["audio"] * 32767).tobytes()
|
54 |
+
audio_segment = AudioSegment(audio, sample_width=2, frame_rate=speech["sampling_rate"], channels=1)
|
55 |
+
|
56 |
+
|
57 |
+
|
58 |
+
#return speech["audio"]
|
59 |
+
|
60 |
+
return audio
|
61 |
+
|
62 |
+
|
63 |
+
|
64 |
+
|
65 |
|
66 |
#demo = gr.Interface(fn=greet, inputs="text", outputs="text", description="----- TTS Testing -----")
|
67 |
|
68 |
+
input_text = gr.Textbox(lines=10, label="Enter text here")
|
69 |
+
|
70 |
+
|
71 |
demo = gr.Interface(fn=synthesise_audio,
|
72 |
+
inputs=input_text,
|
73 |
+
#outputs="audio",
|
74 |
+
outputs = gr.Audio(type="numpy"),
|
75 |
description="----- manuai Text To Speech generator -----",
|
76 |
allow_flagging = False)
|
77 |
|
78 |
+
demo.launch(debug = True)
|