Spaces:

djkesu
/

tortoise5c

Running

App Files Files Community

tortoise5c / app.py

djkesu

Caching in docker implemented

d2eb80b about 1 year ago

raw

history blame

3.85 kB

	import os
	import shutil
	import streamlit as st
	import torchaudio
	import IPython

	from tortoise.api import TextToSpeech
	from tortoise.utils.audio import load_voice

	# Initialize TextToSpeech model
	tts = TextToSpeech()

	# Constants
	PRESETS = ["ultra_fast", "fast", "standard", "high_quality", "very_fast"]
	UPLOAD_FOLDER = "./uploads"
	OUTPUT_FOLDER = "./output"

	voice_samples = None
	conditioning_latents = None

	# Create upload and output directories if they don't exist
	os.makedirs(UPLOAD_FOLDER, exist_ok=True)
	os.makedirs(OUTPUT_FOLDER, exist_ok=True)

	# Streamlit UI elements
	st.title("Tortoise Text-to-Speech App")

	# Upload .wav files
	st.sidebar.header("Upload Audio Samples")
	uploaded_files = st.sidebar.file_uploader(
	"Upload Audio Samples for a New Voice",
	accept_multiple_files=True,
	type=["wav"],
	)

	# Create a new voice
	voice_name = st.sidebar.text_input("New Voice Name", help="Enter a name for your new voice.")

	if st.sidebar.button("Create Voice") and voice_name.strip() != "":
	new_voice_name = voice_name.strip().replace(" ", "_")
	voices_dir = f"./tortoise/voices/{new_voice_name}/"
	if os.path.exists(voices_dir):
	shutil.rmtree(voices_dir)
	os.makedirs(voices_dir)

	for index, uploaded_file in enumerate(uploaded_files):
	bytes_data = uploaded_file.read()
	with open(f"{voices_dir}voice_sample{index}.wav", "wb") as wav_file:
	wav_file.write(bytes_data)

	st.sidebar.success(f"Voice '{voice_name}' created successfully!")
	voice_samples, conditioning_latents = load_voice(voice_name)

	# Input text and settings
	st.header("Text-to-Speech Generation")
	text = st.text_area(
	"Enter Text",
	help="Enter the text you want to convert to speech.",
	value="Joining two modalities results in a surprising increase in generalization! What would happen if we combined them all?",
	)

	preset = st.selectbox("Preset", PRESETS, help="Select a voice preset.")

	voices = [v for v in os.listdir("tortoise/voices") if v != "cond_latent_example"]
	voice = st.selectbox("Voice", voices, help="Select a voice to use for generation.")

	# Generate speech
	if st.button("Generate Speech"):
	if voice_name.strip() == "":
	st.warning("Please create a voice first.")
	else:
	st.info("Generating speech...")

	# Load voice samples
	# voice_samples, conditioning_latents = load_voice(voice)

	print(voice_samples)

	# Generate speech with Tortoise
	gen = tts.tts_with_preset(
	text,
	voice_samples=voice_samples,
	conditioning_latents=conditioning_latents,
	preset=preset,
	)

	# Save and display the generated audio
	output_path = os.path.join(OUTPUT_FOLDER, "generated.wav")
	torchaudio.save(output_path, gen.squeeze(0).cpu(), 24000)

	print(output_path)

	# Log the path of the generated audio
	st.write(f"Generated audio saved at: {output_path}")

	# Display the generated audio
	st.subheader("Generated Output")
	st.audio(output_path, format="audio/wav")
	if st.checkbox("Play Audio"):
	IPython.display.Audio(output_path)

	st.success("Speech generated successfully!")

	# Clean up uploaded files and output directory
	if st.sidebar.button("Clean Up"):
	shutil.rmtree(UPLOAD_FOLDER)
	os.makedirs(UPLOAD_FOLDER, exist_ok=True)
	shutil.rmtree(OUTPUT_FOLDER)
	os.makedirs(OUTPUT_FOLDER, exist_ok=True)
	st.sidebar.success("Clean up completed!")

	# Display information
	st.sidebar.header("Information")
	st.sidebar.markdown(
	"This app allows you to create a new voice by uploading .wav files. You can then generate speech "
	"using the selected voice and preset. You can play the generated audio and clean up uploaded files and the output directory when needed."
	)