speech / app.py
antfraia's picture
Update app.py
f0afc12
raw
history blame
1.26 kB
import gradio as gr
import numpy as np
from elevenlabs import clone, generate, set_api_key
from pydub import AudioSegment
# Set up ElevenLabs API Key
set_api_key("73bb17b223e2b0f90a403eaeaa3b4d35")
# Function to convert MP3 to NumPy array
def mp3_to_numpy(file_path):
audio = AudioSegment.from_mp3(file_path)
samples = np.array(audio.get_array_of_samples())
return audio.frame_rate, samples
# Load and clone voice
voice = clone(
name="Voice Name",
description="An old American male voice with a slight hoarseness in his throat. Perfect for news.",
files=["./sample1.mp3", "./sample2.mp3"],
)
def generate_voice_output(text):
try:
# Generate audio for the provided text
audio = generate(text=text, voice=voice)
# Convert audio for Gradio
audio_numpy = (44100, np.frombuffer(audio, dtype=np.int16))
return audio_numpy
except Exception as e:
return str(e)
# Set up Gradio components and interface
input_text = gr.Textbox(label="Input Text", lines=2)
output_audio = gr.Audio(label="Generated Voice", type="numpy")
iface = gr.Interface(
fn=generate_voice_output,
inputs=input_text,
outputs=output_audio,
theme="Monochrome",
)
# Launch Gradio app
iface.launch()