speech

Paused

antfraia commited on Aug 30, 2023

Commit

db33eee

1 Parent(s): f0afc12

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,41 +1,32 @@
 import gradio as gr
-import numpy as np
-from elevenlabs import clone, generate, set_api_key
-from pydub import AudioSegment
 # Set up ElevenLabs API Key
 set_api_key("73bb17b223e2b0f90a403eaeaa3b4d35")
-# Function to convert MP3 to NumPy array
-def mp3_to_numpy(file_path):
-    audio = AudioSegment.from_mp3(file_path)
-    samples = np.array(audio.get_array_of_samples())
-    return audio.frame_rate, samples
-# Load and clone voice
-voice = clone(
-    name="Voice Name",
-    description="An old American male voice with a slight hoarseness in his throat. Perfect for news.",
-    files=["./sample1.mp3", "./sample2.mp3"],
-)
-def generate_voice_output(text):
-    try:
-        # Generate audio for the provided text
-        audio = generate(text=text, voice=voice)
-        # Convert audio for Gradio
-        audio_numpy = (44100, np.frombuffer(audio, dtype=np.int16))
-        return audio_numpy
-    except Exception as e:
-        return str(e)
 # Set up Gradio components and interface
 input_text = gr.Textbox(label="Input Text", lines=2)
-output_audio = gr.Audio(label="Generated Voice", type="numpy")
 iface = gr.Interface(
-    fn=generate_voice_output,
     inputs=input_text,
     outputs=output_audio,
     theme="Monochrome",

 import gradio as gr
+from elevenlabs import generate, stream, set_api_key
 # Set up ElevenLabs API Key
 set_api_key("73bb17b223e2b0f90a403eaeaa3b4d35")
+def generate_streamed_audio(text):
+    audio_stream = generate(
+        text=text,
+        stream=True
+    )
+    # This is where we'd integrate the streaming into Gradio.
+    # However, Gradio's native components don't support audio streaming directly.
+    # As a workaround, we might need to save the streamed audio to a file and return that.
+    # But this is not true real-time streaming.
+    audio_filename = "temp_audio.mp3"
+    with open(audio_filename, "wb") as f:
+        for chunk in audio_stream:
+            f.write(chunk)
+    return audio_filename
 # Set up Gradio components and interface
 input_text = gr.Textbox(label="Input Text", lines=2)
+output_audio = gr.Audio(label="Generated Voice", type="file")
 iface = gr.Interface(
+    fn=generate_streamed_audio,
     inputs=input_text,
     outputs=output_audio,
     theme="Monochrome",