ibrahim313 commited on
Commit
a891c75
·
verified ·
1 Parent(s): 13175c1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -2
app.py CHANGED
@@ -42,7 +42,10 @@ voices = {
42
  "Whisper": "A soft, whispered voice, with very low volume and an intimate tone.",
43
  "Formal": "A formal, authoritative voice with clear articulation and a steady pace.",
44
  "Happy": "A cheerful, upbeat voice with a positive tone and lively intonation.",
45
- "Mysterious": "A mysterious and low-pitched voice, with slow delivery and a sense of intrigue."
 
 
 
46
  }
47
 
48
  # Sidebar for voice selection
@@ -64,7 +67,17 @@ try:
64
  input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
65
  prompt_input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
66
 
67
- generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
 
 
 
 
 
 
 
 
 
 
68
  audio_arr = generation.cpu().numpy().squeeze()
69
 
70
  # Save the audio file
 
42
  "Whisper": "A soft, whispered voice, with very low volume and an intimate tone.",
43
  "Formal": "A formal, authoritative voice with clear articulation and a steady pace.",
44
  "Happy": "A cheerful, upbeat voice with a positive tone and lively intonation.",
45
+ "Mysterious": "A mysterious and low-pitched voice, with slow delivery and a sense of intrigue.",
46
+ "Bass-Heavy Male": "A deep, resonant male voice with a strong bass, ideal for dramatic and powerful delivery.",
47
+ "Actor Voice 1": "An actor's voice with a dynamic range, capable of various emotional tones and expressions.",
48
+ "Actor Voice 2": "A distinct and engaging actor's voice, providing a unique flair and character to the speech."
49
  }
50
 
51
  # Sidebar for voice selection
 
67
  input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
68
  prompt_input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
69
 
70
+ # Create attention masks
71
+ attention_mask = tokenizer(description, return_tensors="pt").attention_mask.to(device)
72
+ prompt_attention_mask = tokenizer(prompt, return_tensors="pt").attention_mask.to(device)
73
+
74
+ # Generate speech
75
+ generation = model.generate(
76
+ input_ids=input_ids,
77
+ prompt_input_ids=prompt_input_ids,
78
+ attention_mask=attention_mask,
79
+ prompt_attention_mask=prompt_attention_mask
80
+ )
81
  audio_arr = generation.cpu().numpy().squeeze()
82
 
83
  # Save the audio file