File size: 1,582 Bytes

from playsound import playsound
from utils import setup_device
from TTS.api import TTS
import numpy as np
import soundfile
import pyaudio
import wave


FRAMES_PER_BUFFER = 1000
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 26000


device = setup_device()

tts = TTS("tts_models/en/jenny/jenny").to(device)

def add_echo(audio_file, output_file):
    data, samplerate = soundfile.read(audio_file)
    soundfile.write(audio_file, data, samplerate)

    file = wave.open(audio_file, 'rb')
    sample_freq = file.getframerate()
    frames = file.getnframes()
    signal_wave = file.readframes(frames)
    file.close()

    pa = pyaudio.PyAudio()

    signal = np.frombuffer(signal_wave, dtype=np.int16)

    echo_gain = 0.4
    echo_delay = int(0.02 * sample_freq)

    echo = np.zeros(len(signal) + echo_delay, dtype=np.int16)
    echo[echo_delay:echo_delay+len(signal)] = signal * echo_gain

    output = signal + echo[:len(signal)]

    with wave.open(output_file, "wb") as out_file:
        out_file.setnchannels(CHANNELS)
        out_file.setsampwidth(pa.get_sample_size(FORMAT))
        out_file.setframerate(RATE)
        out_file.writeframes(output.tobytes())

    pa.terminate()


def speak(text):
    audio_file = "./database/audio.wav"

    tts.tts_to_file(text=text, file_path=audio_file)

    with open("./database/recognition.txt", 'w') as recognition:
        recognition.write('')

    add_echo(audio_file, audio_file)
    add_echo(audio_file, audio_file)

    playsound(audio_file)

if __name__ == "__main__":
    speak("Hello! I am CRYSTAL! How can I help you today?")