saboor345 commited on
Commit
e9f567e
·
1 Parent(s): 42c5848

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +168 -28
app.py CHANGED
@@ -6,6 +6,7 @@ import numpy as np
6
  from bark.generation import preload_models, SAMPLE_RATE
7
  from bark import generate_audio
8
  from scipy.io import wavfile
 
9
  import gradio as gr
10
  nltk.download('punkt')
11
  os.environ["CUDA_VISIBLE_DEVICES"] = "0"
@@ -13,47 +14,179 @@ preload_models()
13
 
14
 
15
 
16
- def generate_audio_from_text(text,language_prompt,gender_prompt):
17
  if language_prompt == "english":
18
- if gender_prompt == "male":
19
- history_prompt = "v2/en_speaker_8"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  else:
21
  history_prompt = "v2/en_speaker_9"
 
22
  elif language_prompt == "french":
23
- if gender_prompt == "male":
24
- history_prompt = "v2/fr_speaker_0"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  else:
26
- history_prompt = "v2/fr_speaker_1"
 
27
  elif language_prompt =="german":
28
- if gender_prompt=="male":
29
- history_prompt = "v2/de_speaker_2"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  else:
31
- history_prompt="v2/de_speaker_3"
 
32
  elif language_prompt =="hindi":
33
- if gender_prompt=="male":
34
- history_prompt = "v2/hi_speaker_8"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  else:
36
- history_prompt="v2/hi_speaker_3"
 
37
  elif language_prompt =="chinese":
38
- if gender_prompt=="male":
39
- history_prompt = "v2/zh_speaker_1"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  else:
41
- history_prompt="v2/zh_speaker_4"
 
42
  elif language_prompt =="italian":
43
- if gender_prompt=="male":
44
- history_prompt = "v2/it_speaker_4"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  else:
46
- history_prompt="v2/it_speaker_7"
 
47
  elif language_prompt =="japanese":
48
- if gender_prompt=="male":
49
- history_prompt = "v2/ja_speaker_2"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  else:
51
- history_prompt="v2/ja_speaker_0"
52
  else:
53
  raise ValueError("Invalid language or gender selection")
54
 
55
  sentences = nltk.sent_tokenize(text)
56
- # silence = np.zeros(int(0.25 * SAMPLE_RATE)) # quarter second of silence
57
 
58
  pieces = []
59
  for sentence in sentences:
@@ -83,18 +216,25 @@ language_options = [
83
 
84
  ]
85
 
86
- gender_options = [
87
- "male",
88
- "female",
 
 
 
 
 
 
 
 
89
  ]
90
-
91
  # Create a Gradio interface with text input and dropdown menus for language and gender
92
  iface = gr.Interface(
93
  fn=generate_audio_from_text,
94
  inputs=[
95
  gr.Textbox(text="Enter text to convert to speech:"),
96
  gr.Dropdown(choices=language_options, label="Select language:"),
97
- gr.Dropdown(choices=gender_options, label="Select gender:"),
98
  ],
99
  outputs=gr.outputs.File(label="Download WAV File"),
100
  title="Text-to-Speech App Vertical Solution",
@@ -102,4 +242,4 @@ iface = gr.Interface(
102
  )
103
 
104
  # Launch the Gradio app with sharing enabled
105
- iface.launch(debug=True, enable_queue=True)
 
6
  from bark.generation import preload_models, SAMPLE_RATE
7
  from bark import generate_audio
8
  from scipy.io import wavfile
9
+ from IPython.display import Audio
10
  import gradio as gr
11
  nltk.download('punkt')
12
  os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 
14
 
15
 
16
 
17
+ def generate_audio_from_text(text,language_prompt,speaker_prompt):
18
  if language_prompt == "english":
19
+ if speaker_prompt=="speaker 1":
20
+ history_prompt = "v2/en_speaker_0"
21
+ elif speaker_prompt=="speaker 2":
22
+ history_prompt = "v2/en_speaker_1"
23
+ elif speaker_prompt=="speaker 3":
24
+ history_prompt = "v2/en_speaker_2"
25
+ elif speaker_prompt=="speaker 4":
26
+ history_prompt = "v2/en_speaker_3"
27
+ elif speaker_prompt=="speaker 5":
28
+ history_prompt = "v2/en_speaker_4"
29
+ elif speaker_prompt=="speaker 6":
30
+ history_prompt = "v2/en_speaker_5"
31
+ elif speaker_prompt=="speaker 7":
32
+ history_prompt = "v2/en_speaker_6"
33
+ elif speaker_prompt=="speaker 8":
34
+ history_prompt = "v2/en_speaker_7"
35
+ elif speaker_prompt=="speaker 9":
36
+ history_prompt = "v2/en_speaker_8"
37
+ elif speaker_prompt=="speaker 10":
38
+ history_prompt = "v2/en_speaker_9"
39
  else:
40
  history_prompt = "v2/en_speaker_9"
41
+
42
  elif language_prompt == "french":
43
+ if speaker_prompt=="speaker 1":
44
+ history_prompt = "v2/fr_speaker_0"
45
+ elif speaker_prompt=="speaker 2":
46
+ history_prompt = "v2/fr_speaker_1"
47
+ elif speaker_prompt=="speaker 3":
48
+ history_prompt = "v2/fr_speaker_2"
49
+ elif speaker_prompt=="speaker 4":
50
+ history_prompt = "v2/fr_speaker_3"
51
+ elif speaker_prompt=="speaker 5":
52
+ history_prompt = "v2/fr_speaker_4"
53
+ elif speaker_prompt=="speaker 6":
54
+ history_prompt = "v2/fr_speaker_5"
55
+ elif speaker_prompt=="speaker 7":
56
+ history_prompt = "v2/fr_speaker_6"
57
+ elif speaker_prompt=="speaker 8":
58
+ history_prompt = "v2/fr_speaker_7"
59
+ elif speaker_prompt=="speaker 9":
60
+ history_prompt = "v2/fr_speaker_8"
61
+ elif speaker_prompt=="speaker 10":
62
+ history_prompt = "v2/fr_speaker_9"
63
  else:
64
+ history_prompt = "v2/fr_speaker_9"
65
+
66
  elif language_prompt =="german":
67
+ if speaker_prompt=="speaker 1":
68
+ history_prompt = "v2/de_speaker_0"
69
+ elif speaker_prompt=="speaker 2":
70
+ history_prompt = "v2/de_speaker_1"
71
+ elif speaker_prompt=="speaker 3":
72
+ history_prompt = "v2/de_speaker_2"
73
+ elif speaker_prompt=="speaker 4":
74
+ history_prompt = "v2/de_speaker_3"
75
+ elif speaker_prompt=="speaker 5":
76
+ history_prompt = "v2/de_speaker_4"
77
+ elif speaker_prompt=="speaker 6":
78
+ history_prompt = "v2/de_speaker_5"
79
+ elif speaker_prompt=="speaker 7":
80
+ history_prompt = "v2/de_speaker_6"
81
+ elif speaker_prompt=="speaker 8":
82
+ history_prompt = "v2/de_speaker_7"
83
+ elif speaker_prompt=="speaker 9":
84
+ history_prompt = "v2/de_speaker_8"
85
+ elif speaker_prompt=="speaker 10":
86
+ history_prompt = "v2/de_speaker_9"
87
  else:
88
+ history_prompt = "v2/de_speaker_9"
89
+
90
  elif language_prompt =="hindi":
91
+ if speaker_prompt=="speaker 1":
92
+ history_prompt = "v2/hi_speaker_0"
93
+ elif speaker_prompt=="speaker 2":
94
+ history_prompt = "v2/hi_speaker_1"
95
+ elif speaker_prompt=="speaker 3":
96
+ history_prompt = "v2/hi_speaker_2"
97
+ elif speaker_prompt=="speaker 4":
98
+ history_prompt = "v2/hi_speaker_3"
99
+ elif speaker_prompt=="speaker 5":
100
+ history_prompt = "v2/hi_speaker_4"
101
+ elif speaker_prompt=="speaker 6":
102
+ history_prompt = "v2/hi_speaker_5"
103
+ elif speaker_prompt=="speaker 7":
104
+ history_prompt = "v2/hi_speaker_6"
105
+ elif speaker_prompt=="speaker 8":
106
+ history_prompt = "v2/hi_speaker_7"
107
+ elif speaker_prompt=="speaker 9":
108
+ history_prompt = "v2/hi_speaker_8"
109
+ elif speaker_prompt=="speaker 10":
110
+ history_prompt = "v2/hi_speaker_9"
111
  else:
112
+ history_prompt = "v2/hi_speaker_9"
113
+
114
  elif language_prompt =="chinese":
115
+ if speaker_prompt=="speaker 1":
116
+ history_prompt = "v2/zh_speaker_0"
117
+ elif speaker_prompt=="speaker 2":
118
+ history_prompt = "v2/zh_speaker_1"
119
+ elif speaker_prompt=="speaker 3":
120
+ history_prompt = "v2/zh_speaker_2"
121
+ elif speaker_prompt=="speaker 4":
122
+ history_prompt = "v2/zh_speaker_3"
123
+ elif speaker_prompt=="speaker 5":
124
+ history_prompt = "v2/zh_speaker_4"
125
+ elif speaker_prompt=="speaker 6":
126
+ history_prompt = "v2/zh_speaker_5"
127
+ elif speaker_prompt=="speaker 7":
128
+ history_prompt = "v2/zh_speaker_6"
129
+ elif speaker_prompt=="speaker 8":
130
+ history_prompt = "v2/zh_speaker_7"
131
+ elif speaker_prompt=="speaker 9":
132
+ history_prompt = "v2/zh_speaker_8"
133
+ elif speaker_prompt=="speaker 10":
134
+ history_prompt = "v2/zh_speaker_9"
135
  else:
136
+ history_prompt = "v2/zh_speaker_9"
137
+
138
  elif language_prompt =="italian":
139
+ if speaker_prompt=="speaker 1":
140
+ history_prompt = "v2/it_speaker_0"
141
+ elif speaker_prompt=="speaker 2":
142
+ history_prompt = "v2/it_speaker_1"
143
+ elif speaker_prompt=="speaker 3":
144
+ history_prompt = "v2/it_speaker_2"
145
+ elif speaker_prompt=="speaker 4":
146
+ history_prompt = "v2/it_speaker_3"
147
+ elif speaker_prompt=="speaker 5":
148
+ history_prompt = "v2/it_speaker_4"
149
+ elif speaker_prompt=="speaker 6":
150
+ history_prompt = "v2/it_speaker_5"
151
+ elif speaker_prompt=="speaker 7":
152
+ history_prompt = "v2/it_speaker_6"
153
+ elif speaker_prompt=="speaker 8":
154
+ history_prompt = "v2/it_speaker_7"
155
+ elif speaker_prompt=="speaker 9":
156
+ history_prompt = "v2/it_speaker_8"
157
+ elif speaker_prompt=="speaker 10":
158
+ history_prompt = "v2/it_speaker_9"
159
  else:
160
+ history_prompt = "v2/it_speaker_9"
161
+
162
  elif language_prompt =="japanese":
163
+ if speaker_prompt=="speaker 1":
164
+ history_prompt = "v2/ja_speaker_0"
165
+ elif speaker_prompt=="speaker 2":
166
+ history_prompt = "v2/ja_speaker_1"
167
+ elif speaker_prompt=="speaker 3":
168
+ history_prompt = "v2/ja_speaker_2"
169
+ elif speaker_prompt=="speaker 4":
170
+ history_prompt = "v2/ja_speaker_3"
171
+ elif speaker_prompt=="speaker 5":
172
+ history_prompt = "v2/ja_speaker_4"
173
+ elif speaker_prompt=="speaker 6":
174
+ history_prompt = "v2/ja_speaker_5"
175
+ elif speaker_prompt=="speaker 7":
176
+ history_prompt = "v2/ja_speaker_6"
177
+ elif speaker_prompt=="speaker 8":
178
+ history_prompt = "v2/ja_speaker_7"
179
+ elif speaker_prompt=="speaker 9":
180
+ history_prompt = "v2/ja_speaker_8"
181
+ elif speaker_prompt=="speaker 10":
182
+ history_prompt = "v2/ja_speaker_9"
183
  else:
184
+ history_prompt = "v2/ja_speaker_9"
185
  else:
186
  raise ValueError("Invalid language or gender selection")
187
 
188
  sentences = nltk.sent_tokenize(text)
189
+ silence = np.zeros(int(0.25 * SAMPLE_RATE)) # quarter second of silence
190
 
191
  pieces = []
192
  for sentence in sentences:
 
216
 
217
  ]
218
 
219
+ speaker_options=[
220
+ "speaker 1",
221
+ "speaker 2",
222
+ "speaker 3",
223
+ "speaker 4",
224
+ "speaker 5",
225
+ "speaker 6",
226
+ "speaker 7",
227
+ "speaker 8",
228
+ "speaker 9",
229
+ "speaker 10",
230
  ]
 
231
  # Create a Gradio interface with text input and dropdown menus for language and gender
232
  iface = gr.Interface(
233
  fn=generate_audio_from_text,
234
  inputs=[
235
  gr.Textbox(text="Enter text to convert to speech:"),
236
  gr.Dropdown(choices=language_options, label="Select language:"),
237
+ gr.Dropdown(choices=speaker_options, label="Select speaker:"),
238
  ],
239
  outputs=gr.outputs.File(label="Download WAV File"),
240
  title="Text-to-Speech App Vertical Solution",
 
242
  )
243
 
244
  # Launch the Gradio app with sharing enabled
245
+ iface.launch(share=True, debug=True, enable_queue=True)