capradeepgujaran commited on
Commit
05609a5
·
verified ·
1 Parent(s): 019e308

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -42
app.py CHANGED
@@ -1,70 +1,46 @@
1
  import gradio as gr
 
 
 
2
  from transformers import pipeline
3
  import torch
4
- import numpy as np
5
- import os
6
- from functools import lru_cache
7
 
8
- # Initialize TTS pipeline
9
  try:
10
  device = 0 if torch.cuda.is_available() else -1
11
- tts_pipeline = pipeline("text-to-speech", model="microsoft/speecht5_tts", device=device)
12
- except Exception as e:
13
- print(f"Error initializing TTS pipeline: {e}")
14
- tts_pipeline = None
15
-
16
- # Initialize text-to-audio pipeline
17
- try:
18
  text_to_audio = pipeline("text-to-audio", model="facebook/musicgen-small", device=device)
19
  except Exception as e:
20
  print(f"Error initializing text-to-audio pipeline: {e}")
21
  text_to_audio = None
22
 
23
- @lru_cache(maxsize=32)
24
- def generate_speech_cached(text, emotion):
25
  try:
26
- if tts_pipeline is not None:
27
- # Note: emotion is not used in this basic implementation
28
- speech = tts_pipeline(text, forward_params={"vocoder_kwargs": {"do_denormalize": True}})
29
- return (speech['audio'], speech['sampling_rate']), "Speech generated successfully"
30
- else:
31
- return None, "TTS pipeline not available. Check logs for initialization error."
32
  except Exception as e:
33
  return None, f"Error in speech generation: {str(e)}"
34
 
35
- @lru_cache(maxsize=32)
36
- def generate_sound_cached(text):
37
  try:
38
  if text_to_audio is not None:
39
  audio = text_to_audio(text, forward_params={"do_sample": True, "max_new_tokens": 256})
40
- return audio['audio'], audio['sampling_rate'], "Sound generated successfully"
 
 
41
  else:
42
- return None, None, "Text-to-audio pipeline not available. Check logs for initialization error."
43
  except Exception as e:
44
- return None, None, f"Error in sound generation: {str(e)}"
45
-
46
- def generate_speech(text, emotion):
47
- result, message = generate_speech_cached(text, emotion)
48
- if result:
49
- audio, sampling_rate = result
50
- return (gr.Audio(value=(sampling_rate, audio)), message)
51
- else:
52
- return (None, message)
53
-
54
- def generate_sound(text):
55
- audio, sampling_rate, message = generate_sound_cached(text)
56
- if audio is not None:
57
- return (gr.Audio(value=(sampling_rate, audio)), message)
58
- else:
59
- return (None, message)
60
 
61
  # Gradio interface
62
  with gr.Blocks() as iface:
63
- gr.Markdown("# Quick Text-to-Speech and Text-to-Sound Generation Tool")
64
 
65
  with gr.Tab("Text-to-Speech"):
66
  text_input = gr.Textbox(label="Enter text for speech generation")
67
- emotion_input = gr.Dropdown(["Neutral", "Happy", "Sad", "Angry"], label="Select Emotion (Not implemented)")
68
  speech_button = gr.Button("Generate Speech")
69
  speech_output = gr.Audio(label="Generated Speech")
70
  speech_message = gr.Textbox(label="Message")
@@ -75,7 +51,7 @@ with gr.Blocks() as iface:
75
  sound_output = gr.Audio(label="Generated Sound")
76
  sound_message = gr.Textbox(label="Message")
77
 
78
- speech_button.click(generate_speech, inputs=[text_input, emotion_input], outputs=[speech_output, speech_message])
79
  sound_button.click(generate_sound, inputs=[sound_input], outputs=[sound_output, sound_message])
80
 
81
  iface.launch()
 
1
  import gradio as gr
2
+ from gtts import gTTS
3
+ import os
4
+ import tempfile
5
  from transformers import pipeline
6
  import torch
 
 
 
7
 
8
+ # Initialize text-to-audio pipeline for sound generation
9
  try:
10
  device = 0 if torch.cuda.is_available() else -1
 
 
 
 
 
 
 
11
  text_to_audio = pipeline("text-to-audio", model="facebook/musicgen-small", device=device)
12
  except Exception as e:
13
  print(f"Error initializing text-to-audio pipeline: {e}")
14
  text_to_audio = None
15
 
16
+ def generate_speech(text, language):
 
17
  try:
18
+ tts = gTTS(text=text, lang=language)
19
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
20
+ tts.save(fp.name)
21
+ return gr.Audio(value=fp.name, type="filepath"), "Speech generated successfully"
 
 
22
  except Exception as e:
23
  return None, f"Error in speech generation: {str(e)}"
24
 
25
+ def generate_sound(text):
 
26
  try:
27
  if text_to_audio is not None:
28
  audio = text_to_audio(text, forward_params={"do_sample": True, "max_new_tokens": 256})
29
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
30
+ audio['audio'].save(fp.name)
31
+ return gr.Audio(value=fp.name, type="filepath"), "Sound generated successfully"
32
  else:
33
+ return None, "Text-to-audio pipeline not available. Check logs for initialization error."
34
  except Exception as e:
35
+ return None, f"Error in sound generation: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  # Gradio interface
38
  with gr.Blocks() as iface:
39
+ gr.Markdown("# Lightweight Text-to-Speech and Text-to-Sound Generation Tool")
40
 
41
  with gr.Tab("Text-to-Speech"):
42
  text_input = gr.Textbox(label="Enter text for speech generation")
43
+ language_input = gr.Dropdown(["en", "es", "fr", "de", "it"], label="Select Language", value="en")
44
  speech_button = gr.Button("Generate Speech")
45
  speech_output = gr.Audio(label="Generated Speech")
46
  speech_message = gr.Textbox(label="Message")
 
51
  sound_output = gr.Audio(label="Generated Sound")
52
  sound_message = gr.Textbox(label="Message")
53
 
54
+ speech_button.click(generate_speech, inputs=[text_input, language_input], outputs=[speech_output, speech_message])
55
  sound_button.click(generate_sound, inputs=[sound_input], outputs=[sound_output, sound_message])
56
 
57
  iface.launch()