Wismut commited on
Commit
4d3cdf4
·
1 Parent(s): 2d36cf6

fixed saved voice persistence on reload

Browse files
Files changed (2) hide show
  1. app.py +95 -122
  2. voices.json +1 -1
app.py CHANGED
@@ -16,6 +16,10 @@ import nltk
16
  matplotlib.use("Agg") # Use non-interactive backend
17
  import matplotlib.pyplot as plt
18
 
 
 
 
 
19
  from text2speech import tts_randomized, parse_speed, tts_with_style_vector
20
 
21
  # Constants and Paths
@@ -47,7 +51,6 @@ print(f"Using device: {device}")
47
  # LOAD PCA MODEL AND ANNOTATED FEATURES
48
  ##############################################################################
49
 
50
- # Load PCA model and annotated features
51
  try:
52
  pca = joblib.load(PCA_MODEL_PATH)
53
  print("PCA model loaded successfully.")
@@ -90,12 +93,7 @@ def save_voices_json(data, path=VOICES_JSON_PATH):
90
  def update_sliders(voice_name):
91
  """
92
  Update slider values based on the selected predefined voice using reverse PCA.
93
-
94
- Args:
95
- voice_name (str): The name of the selected voice.
96
-
97
- Returns:
98
- list: A list of PCA component values to set the sliders.
99
  """
100
  if not voice_name:
101
  # Return default slider values (e.g., zeros) if no voice is selected
@@ -124,24 +122,16 @@ def update_sliders(voice_name):
124
  def generate_audio_with_voice(text, voice_key, speed_val):
125
  """
126
  Generate audio using the style vector of the selected predefined voice.
127
-
128
- Args:
129
- text (str): The text to synthesize.
130
- voice_key (str): The name of the selected voice.
131
- speed_val (float): The speed multiplier.
132
-
133
- Returns:
134
- tuple: (audio_tuple, style_vector)
135
  """
136
  try:
137
  # Load voices data
138
  voices_data = load_voices_json()
139
-
140
  if voice_key not in voices_data:
141
- print(f"Voice '{voice_key}' not found in {VOICES_JSON_PATH}.")
142
- return None, None, "Selected voice not found."
 
143
 
144
- # Retrieve the style vector for the selected voice
145
  style_vector = np.array(voices_data[voice_key], dtype=np.float32).reshape(1, -1)
146
  print(f"Selected Voice: {voice_key}")
147
  print(f"Style Vector (First 6): {style_vector[0][:6]}")
@@ -149,7 +139,7 @@ def generate_audio_with_voice(text, voice_key, speed_val):
149
  # Convert to torch tensor and move to device
150
  style_vec_torch = torch.from_numpy(style_vector).float().to(device)
151
 
152
- # Generate audio using the TTS model
153
  audio_np = tts_with_style_vector(
154
  text,
155
  style_vec=style_vec_torch,
@@ -161,14 +151,12 @@ def generate_audio_with_voice(text, voice_key, speed_val):
161
  )
162
 
163
  if audio_np is None:
164
- print("Audio generation failed.")
165
- return None, None, "Audio generation failed."
 
166
 
167
- # Prepare audio for Gradio
168
- sr = 24000 # Adjust based on your actual sampling rate
169
  audio_tuple = (sr, audio_np)
170
-
171
- # Return audio, image, and style vector
172
  return audio_tuple, style_vector.tolist()
173
 
174
  except Exception as e:
@@ -177,7 +165,7 @@ def generate_audio_with_voice(text, voice_key, speed_val):
177
 
178
 
179
  def build_modified_vector(voice_key, top6_values):
180
- """Build a modified style vector by updating top 6 PCA components."""
181
  voices_data = load_voices_json()
182
  if voice_key not in voices_data:
183
  print(f"Voice '{voice_key}' not found in {VOICES_JSON_PATH}.")
@@ -189,7 +177,6 @@ def build_modified_vector(voice_key, top6_values):
189
  return None
190
 
191
  try:
192
- # Reconstruct the style vector using inverse PCA
193
  pca_components = np.array(top6_values).reshape(1, -1)
194
  reconstructed_vec = pca.inverse_transform(pca_components)[0]
195
  return reconstructed_vec
@@ -198,39 +185,18 @@ def build_modified_vector(voice_key, top6_values):
198
  return None
199
 
200
 
201
- def reconstruct_style_vector(pca_components):
202
- """
203
- Reconstruct the 256-dimensional style vector from PCA components.
204
- """
205
- if pca is None:
206
- print("PCA model is not loaded.")
207
- return None
208
- try:
209
- return pca.inverse_transform([pca_components])[0]
210
- except Exception as e:
211
- print(f"Error during inverse PCA transform: {e}")
212
- return None
213
-
214
-
215
- def generate_custom_audio(text, voice_key, randomize, speed_str, *slider_values):
216
  """
217
- Generate audio and produce a matplotlib plot of the style vector.
218
- Returns:
219
- - audio tuple (sr, np_array) for Gradio's Audio
220
- - a PIL Image representing the style vector plot
221
- - the final style vector as a list for State
222
  """
223
  try:
224
- speed_val = parse_speed(speed_str)
225
- print(f"Parsed speed: {speed_val}")
226
-
227
  if randomize:
228
  # Generate randomized style vector
229
  audio_np, random_style_vec = tts_randomized(text, speed=speed_val)
230
  if random_style_vec is None:
231
  print("Failed to generate randomized style vector.")
232
- return None, None, None
233
- # Ensure the style vector is flat and on device
234
  final_vec = (
235
  random_style_vec.cpu().numpy().flatten()
236
  if isinstance(random_style_vec, torch.Tensor)
@@ -238,20 +204,15 @@ def generate_custom_audio(text, voice_key, randomize, speed_str, *slider_values)
238
  )
239
  print("Randomized Style Vector (First 6):", final_vec[:6])
240
  else:
241
- # Reconstruct the style vector from slider values using inverse PCA
242
  reconstructed_vec = build_modified_vector(voice_key, slider_values)
243
  if reconstructed_vec is None:
244
- print(
245
- "No reconstructed vector could be constructed, skipping audio generation."
246
- )
247
- return None, None, None
248
 
249
- # Convert to torch tensor and move to device
250
  style_vec_torch = (
251
  torch.from_numpy(reconstructed_vec).float().unsqueeze(0).to(device)
252
  )
253
-
254
- # Generate audio with the reconstructed style vector
255
  audio_np = tts_with_style_vector(
256
  text,
257
  style_vec=style_vec_torch,
@@ -266,22 +227,22 @@ def generate_custom_audio(text, voice_key, randomize, speed_str, *slider_values)
266
 
267
  if audio_np is None:
268
  print("Audio generation failed.")
269
- return None, None, None
270
 
271
- # Prepare audio for Gradio
272
- sr = 24000 # Adjust based on your actual sampling rate
273
  audio_tuple = (sr, audio_np)
274
-
275
- # Return audio, image, and style vector
276
  return audio_tuple, final_vec.tolist()
277
 
278
  except Exception as e:
279
- print(f"Error generating audio and style plot: {e}")
280
- return None, None, None
281
 
282
 
283
  def save_style_to_json(style_data, style_name):
284
- """Saves the provided style_data (list of floats) into voices.json under style_name."""
 
 
 
285
  if not style_name.strip():
286
  return "Please enter a new style name before saving."
287
 
@@ -291,37 +252,37 @@ def save_style_to_json(style_data, style_name):
291
  f"Style name '{style_name}' already exists. Please choose a different name."
292
  )
293
 
294
- # Ensure the style_data has the correct length
295
  if len(style_data) != VECTOR_DIMENSION:
296
  return f"Style vector length mismatch. Expected {VECTOR_DIMENSION}, got {len(style_data)}."
297
 
298
- # Save the style vector
299
  voices_data[style_name] = style_data
300
  save_voices_json(voices_data)
301
  return f"Saved style as '{style_name}' in {VOICES_JSON_PATH}."
302
 
303
 
304
- # Gradio Interface Functions
305
-
306
-
307
  def rearrange_voices(new_order):
308
- """Rearrange the voices based on the new_order list."""
 
 
 
309
  voices_data = load_voices_json()
310
  new_order_list = [name.strip() for name in new_order.split(",")]
311
  if not all(name in voices_data for name in new_order_list):
312
  return "Error: New order contains invalid voice names.", list(
313
  voices_data.keys()
314
  )
 
315
  ordered_data = OrderedDict()
316
  for name in new_order_list:
317
  ordered_data[name] = voices_data[name]
 
318
  save_voices_json(ordered_data)
319
  print(f"Voices rearranged: {list(ordered_data.keys())}")
320
  return "Voices rearranged successfully.", list(ordered_data.keys())
321
 
322
 
323
  def delete_voice(selected):
324
- """Delete voices from the voices.json."""
325
  if not selected:
326
  return "No voices selected for deletion.", list(load_voices_json().keys())
327
  voices_data = load_voices_json()
@@ -334,14 +295,15 @@ def delete_voice(selected):
334
 
335
 
336
  def upload_new_voices(uploaded_file):
337
- """Upload new voices from a JSON file."""
338
  if uploaded_file is None:
339
  return "No file uploaded.", list(load_voices_json().keys())
340
  try:
341
  uploaded_data = json.load(uploaded_file)
342
  if not isinstance(uploaded_data, dict):
343
- return "Invalid JSON format. Expected a dictionary of voices.", list(
344
- load_voices_json().keys()
 
345
  )
346
  voices_data = load_voices_json()
347
  voices_data.update(uploaded_data)
@@ -352,10 +314,13 @@ def upload_new_voices(uploaded_file):
352
  return "Uploaded file is not valid JSON.", list(load_voices_json().keys())
353
 
354
 
355
- # Create Gradio Interface with Tabs
 
 
356
 
357
 
358
  def create_combined_interface():
 
359
  voices_data = load_voices_json()
360
  voice_choices = list(voices_data.keys())
361
  default_voice = voice_choices[0] if voice_choices else None
@@ -367,16 +332,12 @@ def create_combined_interface():
367
  }
368
  """
369
 
370
- def refresh_voices():
371
- """Refresh the voices by reloading the JSON."""
372
- new_choices = list(load_voices_json().keys())
373
- print(f"Voices refreshed: {new_choices}")
374
- return gr.Dropdown(choices=new_choices)
375
-
376
  with gr.Blocks(theme=gr.themes.Ocean(), css=css) as demo:
377
  gr.Markdown("# StyleTTS2 Studio - Build custom voices")
378
 
379
- # ----------- Text-to-Speech Tab -----------
 
 
380
  with gr.Tab("Text-to-Speech"):
381
  gr.Markdown("### Generate Speech with Predefined Voices")
382
 
@@ -399,28 +360,29 @@ def create_combined_interface():
399
  label="Speed (%)",
400
  value=120,
401
  )
402
- with gr.Row():
403
- generate_btn = gr.Button("Generate Audio")
404
-
405
  audio_output = gr.Audio(label="Synthesized Audio")
406
 
407
- # Generate button functionality
408
  def on_generate_tts(text, voice, speed):
409
  if not voice:
410
  return None, "No voice selected."
411
  speed_val = speed / 100 # Convert percentage to multiplier
412
- audio, style_vector = generate_audio_with_voice(text, voice, speed_val)
413
- if audio is None:
414
- return None, style_vector # style_vector contains the error message
415
- return audio, "Audio generated successfully."
416
 
417
  generate_btn.click(
418
  fn=on_generate_tts,
419
  inputs=[text_input, voice_dropdown, speed_slider],
420
- outputs=[audio_output, gr.Textbox(label="Status", visible=False)],
421
  )
422
 
423
- # ----------- Voice Studio Tab -----------
 
 
424
  with gr.Tab("Voice Studio"):
425
  gr.Markdown("### Customize and Create New Voices")
426
 
@@ -463,18 +425,16 @@ def create_combined_interface():
463
  # State to hold the last style vector
464
  style_vector_state_studio = gr.State()
465
 
466
- # Generate button functionality
467
  def on_generate_studio(text, voice, speed, *pca_values):
468
  if not voice:
469
  return None, "No voice selected.", None
470
- speed_val = speed / 100 # Convert percentage to multiplier
471
- result = generate_custom_audio(
472
  text, voice, False, speed_val, *pca_values
473
  )
474
- if result is None:
475
  return None, "Failed to generate audio.", None
476
- audio_tuple, style_vector = result
477
- style_vector_state_studio.value = style_vector
478
  return audio_tuple, "Audio generated successfully.", style_vector
479
 
480
  generate_btn_studio.click(
@@ -484,43 +444,56 @@ def create_combined_interface():
484
  outputs=[audio_output_studio, status_text, style_vector_state_studio],
485
  )
486
 
 
487
  def on_save_style_studio(style_vector, style_name):
488
- if not style_name:
 
489
  return (
490
- "Please enter a name for the new voice!",
491
- gr.Dropdown(
492
- choices=[]
493
- ), # Return a new Dropdown instance with empty choices
494
- gr.Dropdown(
495
- choices=[]
496
- ), # Return a new Dropdown instance with empty choices
497
  )
 
498
  result = save_style_to_json(style_vector, style_name)
 
499
  new_choices = list(load_voices_json().keys())
500
- # Return multiple values to update both dropdowns and show status
 
501
  return (
502
- gr.Dropdown(
503
- choices=new_choices
504
- ), # Return a new Dropdown instance with updated choices
505
- gr.Dropdown(
506
- choices=new_choices
507
- ), # Return a new Dropdown instance with updated choices
508
- result, # Status message
509
  )
510
 
511
  save_btn_studio.click(
512
  fn=on_save_style_studio,
513
  inputs=[style_vector_state_studio, new_style_name],
514
- outputs=[voice_dropdown, voice_dropdown_studio, status_text],
 
515
  )
516
 
517
- # Add callback to update sliders when a voice is selected
518
  voice_dropdown_studio.change(
519
  fn=update_sliders,
520
  inputs=voice_dropdown_studio,
521
  outputs=pca_sliders,
522
  )
523
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
524
  gr.Markdown(
525
  "#### Based on [StyleTTS2](https://github.com/yl4579/StyleTTS2) and [artificial StyleTTS2](https://huggingface.co/dkounadis/artificial-styletts2/tree/main)"
526
  )
@@ -531,6 +504,6 @@ def create_combined_interface():
531
  if __name__ == "__main__":
532
  try:
533
  interface = create_combined_interface()
534
- interface.launch(share=False)
535
  except Exception as e:
536
  print(f"An error occurred while launching the interface: {e}")
 
16
  matplotlib.use("Agg") # Use non-interactive backend
17
  import matplotlib.pyplot as plt
18
 
19
+ # -------------------------------------------------------------------
20
+ # IMPORT OR DEFINE YOUR TEXT-TO-SPEECH FUNCTIONS
21
+ # (Adjust these imports to match your local TTS code)
22
+ # -------------------------------------------------------------------
23
  from text2speech import tts_randomized, parse_speed, tts_with_style_vector
24
 
25
  # Constants and Paths
 
51
  # LOAD PCA MODEL AND ANNOTATED FEATURES
52
  ##############################################################################
53
 
 
54
  try:
55
  pca = joblib.load(PCA_MODEL_PATH)
56
  print("PCA model loaded successfully.")
 
93
  def update_sliders(voice_name):
94
  """
95
  Update slider values based on the selected predefined voice using reverse PCA.
96
+ Returns a list of PCA component values to set the sliders.
 
 
 
 
 
97
  """
98
  if not voice_name:
99
  # Return default slider values (e.g., zeros) if no voice is selected
 
122
  def generate_audio_with_voice(text, voice_key, speed_val):
123
  """
124
  Generate audio using the style vector of the selected predefined voice.
125
+ Returns (audio_tuple, style_vector) or (None, error_message).
 
 
 
 
 
 
 
126
  """
127
  try:
128
  # Load voices data
129
  voices_data = load_voices_json()
 
130
  if voice_key not in voices_data:
131
+ msg = f"Voice '{voice_key}' not found in {VOICES_JSON_PATH}."
132
+ print(msg)
133
+ return None, msg
134
 
 
135
  style_vector = np.array(voices_data[voice_key], dtype=np.float32).reshape(1, -1)
136
  print(f"Selected Voice: {voice_key}")
137
  print(f"Style Vector (First 6): {style_vector[0][:6]}")
 
139
  # Convert to torch tensor and move to device
140
  style_vec_torch = torch.from_numpy(style_vector).float().to(device)
141
 
142
+ # Generate audio
143
  audio_np = tts_with_style_vector(
144
  text,
145
  style_vec=style_vec_torch,
 
151
  )
152
 
153
  if audio_np is None:
154
+ msg = "Audio generation failed."
155
+ print(msg)
156
+ return None, msg
157
 
158
+ sr = 24000
 
159
  audio_tuple = (sr, audio_np)
 
 
160
  return audio_tuple, style_vector.tolist()
161
 
162
  except Exception as e:
 
165
 
166
 
167
  def build_modified_vector(voice_key, top6_values):
168
+ """Reconstruct a style vector by applying inverse PCA on the given 6 slider values."""
169
  voices_data = load_voices_json()
170
  if voice_key not in voices_data:
171
  print(f"Voice '{voice_key}' not found in {VOICES_JSON_PATH}.")
 
177
  return None
178
 
179
  try:
 
180
  pca_components = np.array(top6_values).reshape(1, -1)
181
  reconstructed_vec = pca.inverse_transform(pca_components)[0]
182
  return reconstructed_vec
 
185
  return None
186
 
187
 
188
+ def generate_custom_audio(text, voice_key, randomize, speed_val, *slider_values):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  """
190
+ Generate audio with either a random style vector or a reconstructed vector
191
+ from the 6 PCA sliders. Returns (audio_tuple, style_vector) or (None, None).
 
 
 
192
  """
193
  try:
 
 
 
194
  if randomize:
195
  # Generate randomized style vector
196
  audio_np, random_style_vec = tts_randomized(text, speed=speed_val)
197
  if random_style_vec is None:
198
  print("Failed to generate randomized style vector.")
199
+ return None, None
 
200
  final_vec = (
201
  random_style_vec.cpu().numpy().flatten()
202
  if isinstance(random_style_vec, torch.Tensor)
 
204
  )
205
  print("Randomized Style Vector (First 6):", final_vec[:6])
206
  else:
207
+ # Reconstruct vector from PCA sliders
208
  reconstructed_vec = build_modified_vector(voice_key, slider_values)
209
  if reconstructed_vec is None:
210
+ print("No reconstructed vector. Skipping audio generation.")
211
+ return None, None
 
 
212
 
 
213
  style_vec_torch = (
214
  torch.from_numpy(reconstructed_vec).float().unsqueeze(0).to(device)
215
  )
 
 
216
  audio_np = tts_with_style_vector(
217
  text,
218
  style_vec=style_vec_torch,
 
227
 
228
  if audio_np is None:
229
  print("Audio generation failed.")
230
+ return None, None
231
 
232
+ sr = 24000
 
233
  audio_tuple = (sr, audio_np)
 
 
234
  return audio_tuple, final_vec.tolist()
235
 
236
  except Exception as e:
237
+ print(f"Error generating audio and style: {e}")
238
+ return None, None
239
 
240
 
241
  def save_style_to_json(style_data, style_name):
242
+ """
243
+ Saves the provided style_data (list of floats) into voices.json under style_name.
244
+ Returns a status message.
245
+ """
246
  if not style_name.strip():
247
  return "Please enter a new style name before saving."
248
 
 
252
  f"Style name '{style_name}' already exists. Please choose a different name."
253
  )
254
 
 
255
  if len(style_data) != VECTOR_DIMENSION:
256
  return f"Style vector length mismatch. Expected {VECTOR_DIMENSION}, got {len(style_data)}."
257
 
 
258
  voices_data[style_name] = style_data
259
  save_voices_json(voices_data)
260
  return f"Saved style as '{style_name}' in {VOICES_JSON_PATH}."
261
 
262
 
 
 
 
263
  def rearrange_voices(new_order):
264
+ """
265
+ Rearrange the voices in voices.json based on the comma-separated `new_order`.
266
+ Returns (status_msg, updated_list_of_voices).
267
+ """
268
  voices_data = load_voices_json()
269
  new_order_list = [name.strip() for name in new_order.split(",")]
270
  if not all(name in voices_data for name in new_order_list):
271
  return "Error: New order contains invalid voice names.", list(
272
  voices_data.keys()
273
  )
274
+
275
  ordered_data = OrderedDict()
276
  for name in new_order_list:
277
  ordered_data[name] = voices_data[name]
278
+
279
  save_voices_json(ordered_data)
280
  print(f"Voices rearranged: {list(ordered_data.keys())}")
281
  return "Voices rearranged successfully.", list(ordered_data.keys())
282
 
283
 
284
  def delete_voice(selected):
285
+ """Delete voices from the voices.json. Returns (status_msg, updated_list_of_voices)."""
286
  if not selected:
287
  return "No voices selected for deletion.", list(load_voices_json().keys())
288
  voices_data = load_voices_json()
 
295
 
296
 
297
  def upload_new_voices(uploaded_file):
298
+ """Upload new voices from a JSON file. Returns (status_msg, updated_list_of_voices)."""
299
  if uploaded_file is None:
300
  return "No file uploaded.", list(load_voices_json().keys())
301
  try:
302
  uploaded_data = json.load(uploaded_file)
303
  if not isinstance(uploaded_data, dict):
304
+ return (
305
+ "Invalid JSON format. Expected a dictionary of voices.",
306
+ list(load_voices_json().keys()),
307
  )
308
  voices_data = load_voices_json()
309
  voices_data.update(uploaded_data)
 
314
  return "Uploaded file is not valid JSON.", list(load_voices_json().keys())
315
 
316
 
317
+ # -------------------------------------------------------------------
318
+ # GRADIO INTERFACE
319
+ # -------------------------------------------------------------------
320
 
321
 
322
  def create_combined_interface():
323
+ # We'll initially load the voices to get a default set for the dropdown
324
  voices_data = load_voices_json()
325
  voice_choices = list(voices_data.keys())
326
  default_voice = voice_choices[0] if voice_choices else None
 
332
  }
333
  """
334
 
 
 
 
 
 
 
335
  with gr.Blocks(theme=gr.themes.Ocean(), css=css) as demo:
336
  gr.Markdown("# StyleTTS2 Studio - Build custom voices")
337
 
338
+ # -------------------------------------------------------
339
+ # 1) Text-to-Speech Tab
340
+ # -------------------------------------------------------
341
  with gr.Tab("Text-to-Speech"):
342
  gr.Markdown("### Generate Speech with Predefined Voices")
343
 
 
360
  label="Speed (%)",
361
  value=120,
362
  )
363
+ generate_btn = gr.Button("Generate Audio")
364
+ status_tts = gr.Textbox(label="Status", visible=False)
 
365
  audio_output = gr.Audio(label="Synthesized Audio")
366
 
367
+ # Generate TTS callback
368
  def on_generate_tts(text, voice, speed):
369
  if not voice:
370
  return None, "No voice selected."
371
  speed_val = speed / 100 # Convert percentage to multiplier
372
+ audio_result, msg = generate_audio_with_voice(text, voice, speed_val)
373
+ if audio_result is None:
374
+ return None, msg
375
+ return audio_result, "Audio generated successfully."
376
 
377
  generate_btn.click(
378
  fn=on_generate_tts,
379
  inputs=[text_input, voice_dropdown, speed_slider],
380
+ outputs=[audio_output, status_tts],
381
  )
382
 
383
+ # -------------------------------------------------------
384
+ # 2) Voice Studio Tab
385
+ # -------------------------------------------------------
386
  with gr.Tab("Voice Studio"):
387
  gr.Markdown("### Customize and Create New Voices")
388
 
 
425
  # State to hold the last style vector
426
  style_vector_state_studio = gr.State()
427
 
428
+ # Generate customized audio callback
429
  def on_generate_studio(text, voice, speed, *pca_values):
430
  if not voice:
431
  return None, "No voice selected.", None
432
+ speed_val = speed / 100
433
+ audio_tuple, style_vector = generate_custom_audio(
434
  text, voice, False, speed_val, *pca_values
435
  )
436
+ if audio_tuple is None:
437
  return None, "Failed to generate audio.", None
 
 
438
  return audio_tuple, "Audio generated successfully.", style_vector
439
 
440
  generate_btn_studio.click(
 
444
  outputs=[audio_output_studio, status_text, style_vector_state_studio],
445
  )
446
 
447
+ # Save customized voice callback
448
  def on_save_style_studio(style_vector, style_name):
449
+ """Save the new style, then update the dropdown choices."""
450
+ if not style_vector or not style_name:
451
  return (
452
+ gr.update(value="Please enter a name for the new voice!"),
453
+ gr.update(),
454
+ gr.update(),
 
 
 
 
455
  )
456
+ # Save the style
457
  result = save_style_to_json(style_vector, style_name)
458
+ # Reload the voices to get the new list
459
  new_choices = list(load_voices_json().keys())
460
+
461
+ # Return dictionary updates to existing components
462
  return (
463
+ gr.update(value=result),
464
+ gr.update(choices=new_choices),
465
+ gr.update(choices=new_choices),
 
 
 
 
466
  )
467
 
468
  save_btn_studio.click(
469
  fn=on_save_style_studio,
470
  inputs=[style_vector_state_studio, new_style_name],
471
+ # We update: status_text, voice_dropdown, voice_dropdown_studio
472
+ outputs=[status_text, voice_dropdown, voice_dropdown_studio],
473
  )
474
 
475
+ # Update sliders callback
476
  voice_dropdown_studio.change(
477
  fn=update_sliders,
478
  inputs=voice_dropdown_studio,
479
  outputs=pca_sliders,
480
  )
481
 
482
+ # -------------------------------------------------------
483
+ # Optionally: Reload voices on page load
484
+ # -------------------------------------------------------
485
+ def on_page_load():
486
+ new_choices = list(load_voices_json().keys())
487
+ return {
488
+ voice_dropdown: gr.update(choices=new_choices),
489
+ voice_dropdown_studio: gr.update(choices=new_choices),
490
+ }
491
+
492
+ # This automatically refreshes dropdowns every time the user loads/refreshes the page
493
+ demo.load(
494
+ on_page_load, inputs=None, outputs=[voice_dropdown, voice_dropdown_studio]
495
+ )
496
+
497
  gr.Markdown(
498
  "#### Based on [StyleTTS2](https://github.com/yl4579/StyleTTS2) and [artificial StyleTTS2](https://huggingface.co/dkounadis/artificial-styletts2/tree/main)"
499
  )
 
504
  if __name__ == "__main__":
505
  try:
506
  interface = create_combined_interface()
507
+ interface.launch(share=False) # or share=True if you want a public share link
508
  except Exception as e:
509
  print(f"An error occurred while launching the interface: {e}")
voices.json CHANGED
@@ -2837,4 +2837,4 @@
2837
  0.057131367030820654,
2838
  -0.0762246848122452
2839
  ]
2840
- }
 
2837
  0.057131367030820654,
2838
  -0.0762246848122452
2839
  ]
2840
+ }