File size: 1,582 Bytes
2d8da09 714d948 2d8da09 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
from playsound import playsound
from utils import setup_device
from TTS.api import TTS
import numpy as np
import soundfile
import pyaudio
import wave
FRAMES_PER_BUFFER = 1000
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 26000
device = setup_device()
tts = TTS("tts_models/en/jenny/jenny").to(device)
def add_echo(audio_file, output_file):
data, samplerate = soundfile.read(audio_file)
soundfile.write(audio_file, data, samplerate)
file = wave.open(audio_file, 'rb')
sample_freq = file.getframerate()
frames = file.getnframes()
signal_wave = file.readframes(frames)
file.close()
pa = pyaudio.PyAudio()
signal = np.frombuffer(signal_wave, dtype=np.int16)
echo_gain = 0.4
echo_delay = int(0.02 * sample_freq)
echo = np.zeros(len(signal) + echo_delay, dtype=np.int16)
echo[echo_delay:echo_delay+len(signal)] = signal * echo_gain
output = signal + echo[:len(signal)]
with wave.open(output_file, "wb") as out_file:
out_file.setnchannels(CHANNELS)
out_file.setsampwidth(pa.get_sample_size(FORMAT))
out_file.setframerate(RATE)
out_file.writeframes(output.tobytes())
pa.terminate()
def speak(text):
audio_file = "./database/audio.wav"
tts.tts_to_file(text=text, file_path=audio_file)
with open("./database/recognition.txt", 'w') as recognition:
recognition.write('')
add_echo(audio_file, audio_file)
add_echo(audio_file, audio_file)
playsound(audio_file)
if __name__ == "__main__":
speak("Hello! I am CRYSTAL! How can I help you today?")
|