capradeepgujaran commited on
Commit
21d5d4f
·
verified ·
1 Parent(s): 86c089a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -34
app.py CHANGED
@@ -1,18 +1,19 @@
1
  import gradio as gr
2
- import pyttsx3
3
  import torch
4
  import torchaudio
5
  from torch import nn
6
  import numpy as np
7
  import tempfile
8
  import os
 
 
9
 
10
- # Initialize TTS engine
11
- try:
12
- engine = pyttsx3.init()
13
- except Exception as e:
14
- print(f"Error initializing TTS engine: {e}")
15
- engine = None
16
 
17
  class SimpleWaveformGenerator(nn.Module):
18
  def __init__(self):
@@ -22,29 +23,19 @@ class SimpleWaveformGenerator(nn.Module):
22
  def forward(self, t):
23
  return torch.sin(2 * np.pi * self.frequency * t)
24
 
25
- def text_to_speech_with_emotion(text, emotion, lang='en'):
26
- if engine is None:
27
- return None, "TTS engine not initialized correctly."
28
-
29
- # Set voice properties based on emotion
30
- if emotion == "Happy":
31
- engine.setProperty('rate', 175)
32
- engine.setProperty('pitch', 75)
33
- elif emotion == "Sad":
34
- engine.setProperty('rate', 125)
35
- engine.setProperty('pitch', 25)
36
- elif emotion == "Angry":
37
- engine.setProperty('rate', 150)
38
- engine.setProperty('pitch', 100)
39
- else: # Neutral
40
- engine.setProperty('rate', 150)
41
- engine.setProperty('pitch', 50)
42
-
43
- # Generate speech
44
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
45
- engine.save_to_file(text, fp.name)
46
- engine.runAndWait()
47
- return fp.name, "Speech generated successfully"
48
 
49
  def generate_sound(description):
50
  duration = 3 # seconds
@@ -68,12 +59,12 @@ def generate_sound(description):
68
 
69
  # Gradio interface
70
  with gr.Blocks() as iface:
71
- gr.Markdown("# Reliable Text-to-Speech and Sound Generation Tool")
72
 
73
  with gr.Tab("Text-to-Speech"):
74
  text_input = gr.Textbox(label="Enter text for speech generation")
75
- emotion_input = gr.Dropdown(["Neutral", "Happy", "Sad", "Angry"], label="Select Emotion", value="Neutral")
76
- lang_input = gr.Dropdown(["en"], label="Select Language", value="en")
77
  speech_button = gr.Button("Generate Speech")
78
  speech_output = gr.Audio(label="Generated Speech")
79
  speech_message = gr.Textbox(label="Message")
@@ -85,7 +76,7 @@ with gr.Blocks() as iface:
85
  sound_message = gr.Textbox(label="Message")
86
 
87
  speech_button.click(text_to_speech_with_emotion,
88
- inputs=[text_input, emotion_input, lang_input],
89
  outputs=[speech_output, speech_message])
90
  sound_button.click(generate_sound,
91
  inputs=[sound_input],
 
1
  import gradio as gr
 
2
  import torch
3
  import torchaudio
4
  from torch import nn
5
  import numpy as np
6
  import tempfile
7
  import os
8
+ from elevenlabs import generate, set_api_key
9
+ import soundfile as sf
10
 
11
+ # Set your Elevenlabs API key
12
+ ELEVENLABS_API_KEY = os.environ.get('ELEVENLABS_API_KEY')
13
+ if ELEVENLABS_API_KEY:
14
+ set_api_key(ELEVENLABS_API_KEY)
15
+ else:
16
+ print("Warning: ELEVENLABS_API_KEY not set in environment variables.")
17
 
18
  class SimpleWaveformGenerator(nn.Module):
19
  def __init__(self):
 
23
  def forward(self, t):
24
  return torch.sin(2 * np.pi * self.frequency * t)
25
 
26
+ def text_to_speech_with_emotion(text, voice, model):
27
+ try:
28
+ audio = generate(
29
+ text=text,
30
+ voice=voice,
31
+ model=model
32
+ )
33
+
34
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
35
+ sf.write(fp.name, audio, 44100) # Assuming 44.1kHz sample rate
36
+ return fp.name, "Speech generated successfully"
37
+ except Exception as e:
38
+ return None, f"Error in speech generation: {str(e)}"
 
 
 
 
 
 
 
 
 
 
39
 
40
  def generate_sound(description):
41
  duration = 3 # seconds
 
59
 
60
  # Gradio interface
61
  with gr.Blocks() as iface:
62
+ gr.Markdown("# Elevenlabs TTS and Sound Generation Tool")
63
 
64
  with gr.Tab("Text-to-Speech"):
65
  text_input = gr.Textbox(label="Enter text for speech generation")
66
+ voice_input = gr.Dropdown(["Rachel", "Domi", "Bella", "Antoni", "Elli", "Josh", "Arnold", "Adam", "Sam"], label="Select Voice", value="Rachel")
67
+ model_input = gr.Dropdown(["eleven_monolingual_v1", "eleven_multilingual_v1"], label="Select Model", value="eleven_monolingual_v1")
68
  speech_button = gr.Button("Generate Speech")
69
  speech_output = gr.Audio(label="Generated Speech")
70
  speech_message = gr.Textbox(label="Message")
 
76
  sound_message = gr.Textbox(label="Message")
77
 
78
  speech_button.click(text_to_speech_with_emotion,
79
+ inputs=[text_input, voice_input, model_input],
80
  outputs=[speech_output, speech_message])
81
  sound_button.click(generate_sound,
82
  inputs=[sound_input],