nix-tts / elements /tts.py
rendchevi
initial commit
efaf417
# Utils
import os
import timeit
import soundfile as sf
# Streamlit
import streamlit as st
# Custom elements
from elements.component import (
centered_text,
)
def generate_voice(
input_text,
):
# TTS Inference
start_time = timeit.default_timer()
c, c_length, phoneme = st.session_state.TTS.tokenize(input_text)
tok_time = timeit.default_timer() - start_time
start_time = timeit.default_timer()
voice = st.session_state.TTS.vocalize(c, c_length)
tts_time = timeit.default_timer() - start_time
# Time stats
total_infer_time = tts_time + tok_time
audio_time = voice.shape[-1] / 22050
rtf = total_infer_time / audio_time
rt_ratio = 1 / rtf
# Save audio (bug in Streamlit, can't play numpy array directly)
sf.write(f"cache_sound/{st.session_state.random_str}.wav", voice[0,0], 22050)
# Play audio
st.audio(f"cache_sound/{st.session_state.random_str}.wav", format = "audio/wav")
os.remove(f"cache_sound/{st.session_state.random_str}.wav")
st.caption("Generated Voice")
st.code(
f"💬 Output Audio: {str(audio_time)[:6]} sec.\n\n⏳ Elapsed time for:\n => Tokenization: {str(tok_time)[:6]} sec.\n => Model Inference: {str(tts_time)[:6]} sec.\n\n⏰ Real-time Factor (RTF): {str(rtf)[:6]}\n\n🏃 The model runs {str(rt_ratio)[:6]} x faster than real-time \
",
language = "bash",
)
st.caption("Elapsed Time Stats")