Spaces:

Hammad112
/

Voice_clone

Sleeping

App Files Files Community

Voice_clone / app.py

Hammad112

Update app.py

31beb62 verified 4 months ago

raw

history blame

3.25 kB

	import streamlit as st
	import outetts
	from scipy.io.wavfile import write
	import tempfile
	import os
	from pydub import AudioSegment
	import sounddevice as sd
	import wave
	import numpy as np

	# Initialize model configuration
	model_config = outetts.HFModelConfig_v1(
	model_path="OuteAI/OuteTTS-0.2-500M",
	language="en" # Supported languages: en, zh, ja, ko
	)

	# Initialize the interface
	interface = outetts.InterfaceHF(model_version="0.2", cfg=model_config)

	# Streamlit UI
	st.title("OuteTTS Speech Synthesis")
	st.write("Enter text below to generate speech.")

	# Sidebar for reference voice
	st.sidebar.title("Voice Cloning")
	reference_audio = st.sidebar.file_uploader("Upload a reference audio (any format)", type=["wav", "mp3", "ogg", "flac", "m4a"])
	transcript = st.sidebar.text_area("Transcription of the reference audio")

	# Function to convert audio to WAV format
	def convert_to_wav(audio_file):
	temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
	audio = AudioSegment.from_file(audio_file)
	audio.export(temp_audio.name, format="wav")
	return temp_audio.name

	if reference_audio and transcript:
	ref_audio_path = convert_to_wav(reference_audio)
	# Create speaker profile
	speaker = interface.create_speaker(ref_audio_path, transcript)
	# Save the speaker profile
	interface.save_speaker(speaker, "speaker.json")
	else:
	speaker = None

	# Recording functionality
	def record_audio(duration=5, samplerate=44100):
	st.sidebar.write("Recording...")
	recording = sd.rec(int(duration * samplerate), samplerate=samplerate, channels=1, dtype=np.int16)
	sd.wait()
	temp_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
	with wave.open(temp_audio_path, "wb") as wf:
	wf.setnchannels(1)
	wf.setsampwidth(2)
	wf.setframerate(samplerate)
	wf.writeframes(recording.tobytes())
	return temp_audio_path

	if not speaker:
	st.sidebar.write("Or record your voice below:")
	if st.sidebar.button("Record Voice"):
	ref_audio_path = record_audio()
	st.sidebar.success("Recording complete!")
	transcript = st.sidebar.text_area("Transcription of the recorded audio")
	if transcript:
	# Create speaker profile from recorded audio
	speaker = interface.create_speaker(ref_audio_path, transcript)
	# Save the speaker profile
	interface.save_speaker(speaker, "speaker.json")

	text_input = st.text_area("Text to convert to speech:", "Hello, this is an AI-generated voice.")

	if st.button("Generate Speech"):
	with st.spinner("Generating audio..."):
	# Generate speech with or without the speaker profile
	output = interface.generate(
	text=text_input,
	temperature=0.1,
	repetition_penalty=1.1,
	max_length=4096,
	speaker=speaker
	)

	# Save the synthesized speech to a file
	output_path = "output.wav"
	output.save(output_path)

	# Play the audio in the Streamlit app
	st.audio(output_path, format="audio/wav")
	st.success("Speech generated successfully!")

	# Clean up temporary files
	if reference_audio:
	os.remove(ref_audio_path)