Spaces:

capradeepgujaran
/

VoiceOversV3

Running

App Files Files Community

capradeepgujaran commited on Oct 7, 2024

Commit

21d5d4f

verified ·

1 Parent(s): 86c089a

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -34

app.py CHANGED Viewed

@@ -1,18 +1,19 @@
 import gradio as gr
-import pyttsx3
 import torch
 import torchaudio
 from torch import nn
 import numpy as np
 import tempfile
 import os
-# Initialize TTS engine
-try:
-    engine = pyttsx3.init()
-except Exception as e:
-    print(f"Error initializing TTS engine: {e}")
-    engine = None
 class SimpleWaveformGenerator(nn.Module):
     def __init__(self):
@@ -22,29 +23,19 @@ class SimpleWaveformGenerator(nn.Module):
     def forward(self, t):
         return torch.sin(2 * np.pi * self.frequency * t)
-def text_to_speech_with_emotion(text, emotion, lang='en'):
-    if engine is None:
-        return None, "TTS engine not initialized correctly."
-    # Set voice properties based on emotion
-    if emotion == "Happy":
-        engine.setProperty('rate', 175)
-        engine.setProperty('pitch', 75)
-    elif emotion == "Sad":
-        engine.setProperty('rate', 125)
-        engine.setProperty('pitch', 25)
-    elif emotion == "Angry":
-        engine.setProperty('rate', 150)
-        engine.setProperty('pitch', 100)
-    else:  # Neutral
-        engine.setProperty('rate', 150)
-        engine.setProperty('pitch', 50)
-    # Generate speech
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
-        engine.save_to_file(text, fp.name)
-        engine.runAndWait()
-        return fp.name, "Speech generated successfully"
 def generate_sound(description):
     duration = 3  # seconds
@@ -68,12 +59,12 @@ def generate_sound(description):
 # Gradio interface
 with gr.Blocks() as iface:
-    gr.Markdown("# Reliable Text-to-Speech and Sound Generation Tool")
     with gr.Tab("Text-to-Speech"):
         text_input = gr.Textbox(label="Enter text for speech generation")
-        emotion_input = gr.Dropdown(["Neutral", "Happy", "Sad", "Angry"], label="Select Emotion", value="Neutral")
-        lang_input = gr.Dropdown(["en"], label="Select Language", value="en")
         speech_button = gr.Button("Generate Speech")
         speech_output = gr.Audio(label="Generated Speech")
         speech_message = gr.Textbox(label="Message")
@@ -85,7 +76,7 @@ with gr.Blocks() as iface:
         sound_message = gr.Textbox(label="Message")
     speech_button.click(text_to_speech_with_emotion,
-                        inputs=[text_input, emotion_input, lang_input],
                         outputs=[speech_output, speech_message])
     sound_button.click(generate_sound,
                        inputs=[sound_input],

 import gradio as gr
 import torch
 import torchaudio
 from torch import nn
 import numpy as np
 import tempfile
 import os
+from elevenlabs import generate, set_api_key
+import soundfile as sf
+# Set your Elevenlabs API key
+ELEVENLABS_API_KEY = os.environ.get('ELEVENLABS_API_KEY')
+if ELEVENLABS_API_KEY:
+    set_api_key(ELEVENLABS_API_KEY)
+else:
+    print("Warning: ELEVENLABS_API_KEY not set in environment variables.")
 class SimpleWaveformGenerator(nn.Module):
     def __init__(self):
     def forward(self, t):
         return torch.sin(2 * np.pi * self.frequency * t)
+def text_to_speech_with_emotion(text, voice, model):
+    try:
+        audio = generate(
+            text=text,
+            voice=voice,
+            model=model
+        )
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
+            sf.write(fp.name, audio, 44100)  # Assuming 44.1kHz sample rate
+            return fp.name, "Speech generated successfully"
+    except Exception as e:
+        return None, f"Error in speech generation: {str(e)}"
 def generate_sound(description):
     duration = 3  # seconds
 # Gradio interface
 with gr.Blocks() as iface:
+    gr.Markdown("# Elevenlabs TTS and Sound Generation Tool")
     with gr.Tab("Text-to-Speech"):
         text_input = gr.Textbox(label="Enter text for speech generation")
+        voice_input = gr.Dropdown(["Rachel", "Domi", "Bella", "Antoni", "Elli", "Josh", "Arnold", "Adam", "Sam"], label="Select Voice", value="Rachel")
+        model_input = gr.Dropdown(["eleven_monolingual_v1", "eleven_multilingual_v1"], label="Select Model", value="eleven_monolingual_v1")
         speech_button = gr.Button("Generate Speech")
         speech_output = gr.Audio(label="Generated Speech")
         speech_message = gr.Textbox(label="Message")
         sound_message = gr.Textbox(label="Message")
     speech_button.click(text_to_speech_with_emotion,
+                        inputs=[text_input, voice_input, model_input],
                         outputs=[speech_output, speech_message])
     sound_button.click(generate_sound,
                        inputs=[sound_input],