Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -12,20 +12,23 @@ model = ParlerTTSForConditionalGeneration.from_pretrained("ai4bharat/indic-parle
|
|
12 |
tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-parler-tts")
|
13 |
description_tokenizer = AutoTokenizer.from_pretrained(model.config.text_encoder._name_or_path)
|
14 |
|
15 |
-
def generate_audio(
|
16 |
"""
|
17 |
-
Generate synthesized speech audio based on the input
|
18 |
|
19 |
Args:
|
20 |
-
|
21 |
-
description (str): A description to guide the voice characteristics.
|
22 |
|
23 |
Returns:
|
24 |
tuple: A tuple containing the audio numpy array and the sampling rate.
|
25 |
"""
|
26 |
-
#
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
|
|
29 |
|
30 |
# Generate the audio tensor using the model
|
31 |
generation = model.generate(
|
@@ -42,13 +45,10 @@ def generate_audio(prompt: str, description: str):
|
|
42 |
sampling_rate = model.config.sampling_rate
|
43 |
return (audio_arr, sampling_rate)
|
44 |
|
45 |
-
# Build the Gradio interface
|
46 |
iface = gr.Interface(
|
47 |
fn=generate_audio,
|
48 |
-
inputs=
|
49 |
-
gr.Textbox(label="Prompt", value="เค
เคฐเฅ, เคคเฅเคฎ เคเค เคเฅเคธเฅ เคนเฅ?"),
|
50 |
-
gr.Textbox(label="Description", value="Divya's voice is monotone yet slightly fast in delivery, with a very close recording that almost has no background noise.")
|
51 |
-
],
|
52 |
outputs=gr.Audio(label="Generated Audio"),
|
53 |
title="Indic Parler TTS",
|
54 |
description="Generate synthesized speech using the Indic Parler TTS model from ai4bharat."
|
|
|
12 |
tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-parler-tts")
|
13 |
description_tokenizer = AutoTokenizer.from_pretrained(model.config.text_encoder._name_or_path)
|
14 |
|
15 |
+
def generate_audio(text: str):
|
16 |
"""
|
17 |
+
Generate synthesized speech audio based on the input text.
|
18 |
|
19 |
Args:
|
20 |
+
text (str): The text prompt to be spoken.
|
|
|
21 |
|
22 |
Returns:
|
23 |
tuple: A tuple containing the audio numpy array and the sampling rate.
|
24 |
"""
|
25 |
+
# Set a default voice description
|
26 |
+
default_description = ("Divya's voice is monotone yet slightly fast in delivery, with a very close recording "
|
27 |
+
"that almost has no background noise.")
|
28 |
+
|
29 |
+
# Tokenize the default description and the input text
|
30 |
+
description_tokens = description_tokenizer(default_description, return_tensors="pt").to(device)
|
31 |
+
prompt_tokens = tokenizer(text, return_tensors="pt").to(device)
|
32 |
|
33 |
# Generate the audio tensor using the model
|
34 |
generation = model.generate(
|
|
|
45 |
sampling_rate = model.config.sampling_rate
|
46 |
return (audio_arr, sampling_rate)
|
47 |
|
48 |
+
# Build the Gradio interface with a single text input
|
49 |
iface = gr.Interface(
|
50 |
fn=generate_audio,
|
51 |
+
inputs=gr.Textbox(label="Enter Text", value="เค
เคฐเฅ, เคคเฅเคฎ เคเค เคเฅเคธเฅ เคนเฅ?"),
|
|
|
|
|
|
|
52 |
outputs=gr.Audio(label="Generated Audio"),
|
53 |
title="Indic Parler TTS",
|
54 |
description="Generate synthesized speech using the Indic Parler TTS model from ai4bharat."
|