NLPV commited on
Commit
3e166ec
ยท
verified ยท
1 Parent(s): 8c08a70

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -12
app.py CHANGED
@@ -12,20 +12,23 @@ model = ParlerTTSForConditionalGeneration.from_pretrained("ai4bharat/indic-parle
12
  tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-parler-tts")
13
  description_tokenizer = AutoTokenizer.from_pretrained(model.config.text_encoder._name_or_path)
14
 
15
- def generate_audio(prompt: str, description: str):
16
  """
17
- Generate synthesized speech audio based on the input prompt and description.
18
 
19
  Args:
20
- prompt (str): The text prompt to be spoken.
21
- description (str): A description to guide the voice characteristics.
22
 
23
  Returns:
24
  tuple: A tuple containing the audio numpy array and the sampling rate.
25
  """
26
- # Tokenize inputs for the description and prompt
27
- description_tokens = description_tokenizer(description, return_tensors="pt").to(device)
28
- prompt_tokens = tokenizer(prompt, return_tensors="pt").to(device)
 
 
 
 
29
 
30
  # Generate the audio tensor using the model
31
  generation = model.generate(
@@ -42,13 +45,10 @@ def generate_audio(prompt: str, description: str):
42
  sampling_rate = model.config.sampling_rate
43
  return (audio_arr, sampling_rate)
44
 
45
- # Build the Gradio interface
46
  iface = gr.Interface(
47
  fn=generate_audio,
48
- inputs=[
49
- gr.Textbox(label="Prompt", value="เค…เคฐเฅ‡, เคคเฅเคฎ เค†เคœ เค•เฅˆเคธเฅ‡ เคนเฅ‹?"),
50
- gr.Textbox(label="Description", value="Divya's voice is monotone yet slightly fast in delivery, with a very close recording that almost has no background noise.")
51
- ],
52
  outputs=gr.Audio(label="Generated Audio"),
53
  title="Indic Parler TTS",
54
  description="Generate synthesized speech using the Indic Parler TTS model from ai4bharat."
 
12
  tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-parler-tts")
13
  description_tokenizer = AutoTokenizer.from_pretrained(model.config.text_encoder._name_or_path)
14
 
15
+ def generate_audio(text: str):
16
  """
17
+ Generate synthesized speech audio based on the input text.
18
 
19
  Args:
20
+ text (str): The text prompt to be spoken.
 
21
 
22
  Returns:
23
  tuple: A tuple containing the audio numpy array and the sampling rate.
24
  """
25
+ # Set a default voice description
26
+ default_description = ("Divya's voice is monotone yet slightly fast in delivery, with a very close recording "
27
+ "that almost has no background noise.")
28
+
29
+ # Tokenize the default description and the input text
30
+ description_tokens = description_tokenizer(default_description, return_tensors="pt").to(device)
31
+ prompt_tokens = tokenizer(text, return_tensors="pt").to(device)
32
 
33
  # Generate the audio tensor using the model
34
  generation = model.generate(
 
45
  sampling_rate = model.config.sampling_rate
46
  return (audio_arr, sampling_rate)
47
 
48
+ # Build the Gradio interface with a single text input
49
  iface = gr.Interface(
50
  fn=generate_audio,
51
+ inputs=gr.Textbox(label="Enter Text", value="เค…เคฐเฅ‡, เคคเฅเคฎ เค†เคœ เค•เฅˆเคธเฅ‡ เคนเฅ‹?"),
 
 
 
52
  outputs=gr.Audio(label="Generated Audio"),
53
  title="Indic Parler TTS",
54
  description="Generate synthesized speech using the Indic Parler TTS model from ai4bharat."