Spaces:

ibrahim313
/

TextToAudio

Build error

ibrahim313 commited on Aug 28, 2024

Commit

a891c75

verified ·

1 Parent(s): 13175c1

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -42,7 +42,10 @@ voices = {
     "Whisper": "A soft, whispered voice, with very low volume and an intimate tone.",
     "Formal": "A formal, authoritative voice with clear articulation and a steady pace.",
     "Happy": "A cheerful, upbeat voice with a positive tone and lively intonation.",
-    "Mysterious": "A mysterious and low-pitched voice, with slow delivery and a sense of intrigue."
 }
 # Sidebar for voice selection
@@ -64,7 +67,17 @@ try:
         input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
         prompt_input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
-        generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
         audio_arr = generation.cpu().numpy().squeeze()
         # Save the audio file

     "Whisper": "A soft, whispered voice, with very low volume and an intimate tone.",
     "Formal": "A formal, authoritative voice with clear articulation and a steady pace.",
     "Happy": "A cheerful, upbeat voice with a positive tone and lively intonation.",
+    "Mysterious": "A mysterious and low-pitched voice, with slow delivery and a sense of intrigue.",
+    "Bass-Heavy Male": "A deep, resonant male voice with a strong bass, ideal for dramatic and powerful delivery.",
+    "Actor Voice 1": "An actor's voice with a dynamic range, capable of various emotional tones and expressions.",
+    "Actor Voice 2": "A distinct and engaging actor's voice, providing a unique flair and character to the speech."
 }
 # Sidebar for voice selection
         input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
         prompt_input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
+        # Create attention masks
+        attention_mask = tokenizer(description, return_tensors="pt").attention_mask.to(device)
+        prompt_attention_mask = tokenizer(prompt, return_tensors="pt").attention_mask.to(device)
+        # Generate speech
+        generation = model.generate(
+            input_ids=input_ids,
+            prompt_input_ids=prompt_input_ids,
+            attention_mask=attention_mask,
+            prompt_attention_mask=prompt_attention_mask
+        )
         audio_arr = generation.cpu().numpy().squeeze()
         # Save the audio file