Spaces:

capradeepgujaran
/

VoiceOversV3

Running

App Files Files Community

capradeepgujaran commited on Oct 8, 2024

Commit

fa1f44a

verified ·

1 Parent(s): 783f242

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -80

app.py CHANGED Viewed

@@ -1,19 +1,13 @@
 import gradio as gr
-import numpy as np
 import tempfile
 import os
 from openai import OpenAI
-import soundfile as sf
-import requests
-import json
 # Initialize OpenAI client
 client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
-# ChatTTS API endpoint and key
-CHATTTS_API_ENDPOINT = "https://api.chattts.com/v1/generate"
-CHATTTS_API_KEY = os.environ.get("CHATTTS_API_KEY")
 def openai_tts(text, voice, model):
     try:
         response = client.audio.speech.create(
@@ -31,94 +25,56 @@ def openai_tts(text, voice, model):
     except Exception as e:
         return None, f"Error in OpenAI TTS speech generation: {str(e)}"
-def chattts(text, voice, style):
     try:
-        payload = {
-            "text": text,
-            "voice": voice,
-            "style": style
-        }
-        headers = {
-            "Content-Type": "application/json",
-            "Authorization": f"Bearer {CHATTTS_API_KEY}"
-        }
-        response = requests.post(CHATTTS_API_ENDPOINT, json=payload, headers=headers)
-        response.raise_for_status()
         # Save the audio to a temporary file
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
-            temp_audio.write(response.content)
             temp_audio_path = temp_audio.name
-        return temp_audio_path, f"Speech generated with ChatTTS using {voice} voice and {style} style"
-    except Exception as e:
-        return None, f"Error in ChatTTS speech generation: {str(e)}"
-def generate_simple_sound(description, duration, frequency):
-    try:
-        sample_rate = 44100
-        t = np.linspace(0, duration, int(sample_rate * duration), False)
-        if "rain" in description.lower():
-            audio = np.random.normal(0, 0.1, len(t))
-        elif "wind" in description.lower():
-            audio = np.sin(2 * np.pi * frequency * t) * np.random.normal(1, 0.1, len(t))
-        elif "bird" in description.lower():
-            audio = np.sin(2 * np.pi * frequency * t) * np.exp(-0.5 * t)
-            audio = np.tile(audio, int(duration / 0.5))[:len(t)]
-        else:
-            audio = np.sin(2 * np.pi * frequency * t)
-        audio = audio / np.max(np.abs(audio))
-        output_path = tempfile.mktemp(suffix=".wav")
-        sf.write(output_path, audio, sample_rate)
-        return output_path, f"Simple sound generated for '{description}'"
     except Exception as e:
-        return None, f"Error in sound generation: {str(e)}"
 # Gradio interface
 with gr.Blocks() as iface:
-    gr.Markdown("# OpenAI TTS, ChatTTS, and Simple Sound Generation Tool")
-    with gr.Tab("Text-to-Speech"):
-        text_input = gr.Textbox(label="Enter text for speech generation")
-        tts_method = gr.Radio(["OpenAI TTS", "ChatTTS"], label="TTS Method", value="OpenAI TTS")
-        with gr.Group():
-            gr.Markdown("OpenAI TTS Options")
-            openai_voice_input = gr.Dropdown(["alloy", "echo", "fable", "onyx", "nova", "shimmer"], label="Select Voice", value="nova")
-            openai_model_input = gr.Dropdown(["tts-1", "tts-1-hd"], label="Select Model", value="tts-1")
-        with gr.Group():
-            gr.Markdown("ChatTTS Options")
-            chattts_voice_input = gr.Dropdown(["en-US-1", "en-US-2", "en-GB-1", "en-GB-2"], label="Select Voice", value="en-US-1")
-            chattts_style_input = gr.Dropdown(["neutral", "happy", "sad", "angry", "fearful", "disgusted", "surprised"], label="Select Style", value="neutral")
-        speech_button = gr.Button("Generate Speech")
-        speech_output = gr.Audio(label="Generated Speech")
-        speech_message = gr.Textbox(label="Message")
-    with gr.Tab("Simple Sound Generation"):
-        prompt_input = gr.Textbox(label="Sound Description", placeholder="Describe the sound (e.g., rain, wind, bird)...")
-        duration_input = gr.Slider(label="Duration (seconds)", minimum=1.0, maximum=30.0, step=0.5, value=5.0)
-        frequency_input = gr.Slider(label="Base Frequency (Hz)", minimum=20, maximum=2000, step=10, value=440)
-        sound_button = gr.Button("Generate Sound")
-        sound_output = gr.Audio(label="Generated Sound")
-        sound_message = gr.Textbox(label="Message")
-    def generate_speech(text, method, openai_voice, openai_model, chattts_voice, chattts_style):
         if method == "OpenAI TTS":
             return openai_tts(text, openai_voice, openai_model)
         else:
-            return chattts(text, chattts_voice, chattts_style)
     speech_button.click(generate_speech,
-                        inputs=[text_input, tts_method, openai_voice_input, openai_model_input, chattts_voice_input, chattts_style_input],
                         outputs=[speech_output, speech_message])
-    sound_button.click(generate_simple_sound,
-                       inputs=[prompt_input, duration_input, frequency_input],
-                       outputs=[sound_output, sound_message])
 iface.launch()

 import gradio as gr
 import tempfile
 import os
 from openai import OpenAI
+from gtts import gTTS
+import base64
 # Initialize OpenAI client
 client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
 def openai_tts(text, voice, model):
     try:
         response = client.audio.speech.create(
     except Exception as e:
         return None, f"Error in OpenAI TTS speech generation: {str(e)}"
+def google_tts(text, lang, tld):
     try:
+        tts = gTTS(text=text, lang=lang, tld=tld, slow=False)
         # Save the audio to a temporary file
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
+            tts.save(temp_audio.name)
             temp_audio_path = temp_audio.name
+        return temp_audio_path, f"Speech generated with Google TTS using {lang} language and {tld} TLD"
     except Exception as e:
+        return None, f"Error in Google TTS speech generation: {str(e)}"
 # Gradio interface
 with gr.Blocks() as iface:
+    gr.Markdown("# OpenAI TTS and Google TTS Tool")
+    text_input = gr.Textbox(label="Enter text for speech generation")
+    tts_method = gr.Radio(["OpenAI TTS", "Google TTS"], label="TTS Method", value="OpenAI TTS")
+    with gr.Group() as openai_options:
+        gr.Markdown("OpenAI TTS Options")
+        openai_voice_input = gr.Dropdown(["alloy", "echo", "fable", "onyx", "nova", "shimmer"], label="Select Voice", value="nova")
+        openai_model_input = gr.Dropdown(["tts-1", "tts-1-hd"], label="Select Model", value="tts-1")
+    with gr.Group() as google_options:
+        gr.Markdown("Google TTS Options")
+        google_lang_input = gr.Dropdown(["en", "es", "fr", "de", "it", "ja", "ko", "pt", "ru", "zh-CN"], label="Select Language", value="en")
+        google_tld_input = gr.Dropdown(["com", "co.uk", "com.au", "co.in", "ca", "ie", "co.za"], label="Select TLD (Accent)", value="com")
+    speech_button = gr.Button("Generate Speech")
+    speech_output = gr.Audio(label="Generated Speech")
+    speech_message = gr.Textbox(label="Message")
+    def generate_speech(text, method, openai_voice, openai_model, google_lang, google_tld):
         if method == "OpenAI TTS":
             return openai_tts(text, openai_voice, openai_model)
         else:
+            return google_tts(text, google_lang, google_tld)
+    def update_visible_options(method):
+        return (
+            gr.Group.update(visible=(method == "OpenAI TTS")),
+            gr.Group.update(visible=(method == "Google TTS"))
+        )
     speech_button.click(generate_speech,
+                        inputs=[text_input, tts_method, openai_voice_input, openai_model_input, google_lang_input, google_tld_input],
                         outputs=[speech_output, speech_message])
+    tts_method.change(update_visible_options, inputs=[tts_method], outputs=[openai_options, google_options])
 iface.launch()