Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -42,7 +42,10 @@ voices = {
|
|
42 |
"Whisper": "A soft, whispered voice, with very low volume and an intimate tone.",
|
43 |
"Formal": "A formal, authoritative voice with clear articulation and a steady pace.",
|
44 |
"Happy": "A cheerful, upbeat voice with a positive tone and lively intonation.",
|
45 |
-
"Mysterious": "A mysterious and low-pitched voice, with slow delivery and a sense of intrigue."
|
|
|
|
|
|
|
46 |
}
|
47 |
|
48 |
# Sidebar for voice selection
|
@@ -64,7 +67,17 @@ try:
|
|
64 |
input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
|
65 |
prompt_input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
|
66 |
|
67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
audio_arr = generation.cpu().numpy().squeeze()
|
69 |
|
70 |
# Save the audio file
|
|
|
42 |
"Whisper": "A soft, whispered voice, with very low volume and an intimate tone.",
|
43 |
"Formal": "A formal, authoritative voice with clear articulation and a steady pace.",
|
44 |
"Happy": "A cheerful, upbeat voice with a positive tone and lively intonation.",
|
45 |
+
"Mysterious": "A mysterious and low-pitched voice, with slow delivery and a sense of intrigue.",
|
46 |
+
"Bass-Heavy Male": "A deep, resonant male voice with a strong bass, ideal for dramatic and powerful delivery.",
|
47 |
+
"Actor Voice 1": "An actor's voice with a dynamic range, capable of various emotional tones and expressions.",
|
48 |
+
"Actor Voice 2": "A distinct and engaging actor's voice, providing a unique flair and character to the speech."
|
49 |
}
|
50 |
|
51 |
# Sidebar for voice selection
|
|
|
67 |
input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
|
68 |
prompt_input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
|
69 |
|
70 |
+
# Create attention masks
|
71 |
+
attention_mask = tokenizer(description, return_tensors="pt").attention_mask.to(device)
|
72 |
+
prompt_attention_mask = tokenizer(prompt, return_tensors="pt").attention_mask.to(device)
|
73 |
+
|
74 |
+
# Generate speech
|
75 |
+
generation = model.generate(
|
76 |
+
input_ids=input_ids,
|
77 |
+
prompt_input_ids=prompt_input_ids,
|
78 |
+
attention_mask=attention_mask,
|
79 |
+
prompt_attention_mask=prompt_attention_mask
|
80 |
+
)
|
81 |
audio_arr = generation.cpu().numpy().squeeze()
|
82 |
|
83 |
# Save the audio file
|