Spaces:

capradeepgujaran
/

VoiceOversV3

Running

App Files Files Community

capradeepgujaran commited on Oct 7, 2024

Commit

05609a5

verified ·

1 Parent(s): 019e308

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -42

app.py CHANGED Viewed

@@ -1,70 +1,46 @@
 import gradio as gr
 from transformers import pipeline
 import torch
-import numpy as np
-import os
-from functools import lru_cache
-# Initialize TTS pipeline
 try:
     device = 0 if torch.cuda.is_available() else -1
-    tts_pipeline = pipeline("text-to-speech", model="microsoft/speecht5_tts", device=device)
-except Exception as e:
-    print(f"Error initializing TTS pipeline: {e}")
-    tts_pipeline = None
-# Initialize text-to-audio pipeline
-try:
     text_to_audio = pipeline("text-to-audio", model="facebook/musicgen-small", device=device)
 except Exception as e:
     print(f"Error initializing text-to-audio pipeline: {e}")
     text_to_audio = None
-@lru_cache(maxsize=32)
-def generate_speech_cached(text, emotion):
     try:
-        if tts_pipeline is not None:
-            # Note: emotion is not used in this basic implementation
-            speech = tts_pipeline(text, forward_params={"vocoder_kwargs": {"do_denormalize": True}})
-            return (speech['audio'], speech['sampling_rate']), "Speech generated successfully"
-        else:
-            return None, "TTS pipeline not available. Check logs for initialization error."
     except Exception as e:
         return None, f"Error in speech generation: {str(e)}"
-@lru_cache(maxsize=32)
-def generate_sound_cached(text):
     try:
         if text_to_audio is not None:
             audio = text_to_audio(text, forward_params={"do_sample": True, "max_new_tokens": 256})
-            return audio['audio'], audio['sampling_rate'], "Sound generated successfully"
         else:
-            return None, None, "Text-to-audio pipeline not available. Check logs for initialization error."
     except Exception as e:
-        return None, None, f"Error in sound generation: {str(e)}"
-def generate_speech(text, emotion):
-    result, message = generate_speech_cached(text, emotion)
-    if result:
-        audio, sampling_rate = result
-        return (gr.Audio(value=(sampling_rate, audio)), message)
-    else:
-        return (None, message)
-def generate_sound(text):
-    audio, sampling_rate, message = generate_sound_cached(text)
-    if audio is not None:
-        return (gr.Audio(value=(sampling_rate, audio)), message)
-    else:
-        return (None, message)
 # Gradio interface
 with gr.Blocks() as iface:
-    gr.Markdown("# Quick Text-to-Speech and Text-to-Sound Generation Tool")
     with gr.Tab("Text-to-Speech"):
         text_input = gr.Textbox(label="Enter text for speech generation")
-        emotion_input = gr.Dropdown(["Neutral", "Happy", "Sad", "Angry"], label="Select Emotion (Not implemented)")
         speech_button = gr.Button("Generate Speech")
         speech_output = gr.Audio(label="Generated Speech")
         speech_message = gr.Textbox(label="Message")
@@ -75,7 +51,7 @@ with gr.Blocks() as iface:
         sound_output = gr.Audio(label="Generated Sound")
         sound_message = gr.Textbox(label="Message")
-    speech_button.click(generate_speech, inputs=[text_input, emotion_input], outputs=[speech_output, speech_message])
     sound_button.click(generate_sound, inputs=[sound_input], outputs=[sound_output, sound_message])
 iface.launch()

 import gradio as gr
+from gtts import gTTS
+import os
+import tempfile
 from transformers import pipeline
 import torch
+# Initialize text-to-audio pipeline for sound generation
 try:
     device = 0 if torch.cuda.is_available() else -1
     text_to_audio = pipeline("text-to-audio", model="facebook/musicgen-small", device=device)
 except Exception as e:
     print(f"Error initializing text-to-audio pipeline: {e}")
     text_to_audio = None
+def generate_speech(text, language):
     try:
+        tts = gTTS(text=text, lang=language)
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
+            tts.save(fp.name)
+            return gr.Audio(value=fp.name, type="filepath"), "Speech generated successfully"
     except Exception as e:
         return None, f"Error in speech generation: {str(e)}"
+def generate_sound(text):
     try:
         if text_to_audio is not None:
             audio = text_to_audio(text, forward_params={"do_sample": True, "max_new_tokens": 256})
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
+                audio['audio'].save(fp.name)
+                return gr.Audio(value=fp.name, type="filepath"), "Sound generated successfully"
         else:
+            return None, "Text-to-audio pipeline not available. Check logs for initialization error."
     except Exception as e:
+        return None, f"Error in sound generation: {str(e)}"
 # Gradio interface
 with gr.Blocks() as iface:
+    gr.Markdown("# Lightweight Text-to-Speech and Text-to-Sound Generation Tool")
     with gr.Tab("Text-to-Speech"):
         text_input = gr.Textbox(label="Enter text for speech generation")
+        language_input = gr.Dropdown(["en", "es", "fr", "de", "it"], label="Select Language", value="en")
         speech_button = gr.Button("Generate Speech")
         speech_output = gr.Audio(label="Generated Speech")
         speech_message = gr.Textbox(label="Message")
         sound_output = gr.Audio(label="Generated Sound")
         sound_message = gr.Textbox(label="Message")
+    speech_button.click(generate_speech, inputs=[text_input, language_input], outputs=[speech_output, speech_message])
     sound_button.click(generate_sound, inputs=[sound_input], outputs=[sound_output, sound_message])
 iface.launch()