Wismut commited on
Commit
34ab4db
·
1 Parent(s): c357cba

fixed missing cuda option

Browse files
Files changed (4) hide show
  1. Logo.ai +0 -0
  2. Logo.png +0 -0
  3. app.py +2 -2
  4. text2speech.py +16 -4
Logo.ai ADDED
The diff for this file is too large to render. See raw diff
 
Logo.png ADDED
app.py CHANGED
@@ -367,7 +367,7 @@ def create_combined_interface():
367
  with gr.Column():
368
  text_input = gr.Textbox(
369
  label="Text to Synthesize",
370
- value="Hello world from the Gradio + TTS pipeline!",
371
  lines=3,
372
  )
373
  voice_dropdown = gr.Dropdown(
@@ -411,7 +411,7 @@ def create_combined_interface():
411
  with gr.Column():
412
  text_input_studio = gr.Textbox(
413
  label="Text to Synthesize",
414
- value="Customize your voice here!",
415
  lines=3,
416
  )
417
  voice_dropdown_studio = gr.Dropdown(
 
367
  with gr.Column():
368
  text_input = gr.Textbox(
369
  label="Text to Synthesize",
370
+ value="Did you know that you can just do stuff?",
371
  lines=3,
372
  )
373
  voice_dropdown = gr.Dropdown(
 
411
  with gr.Column():
412
  text_input_studio = gr.Textbox(
413
  label="Text to Synthesize",
414
+ value="Use the sliders to customize a voice!",
415
  lines=3,
416
  )
417
  voice_dropdown_studio = gr.Dropdown(
text2speech.py CHANGED
@@ -18,6 +18,13 @@ from typing import Optional, Tuple, List
18
  VOICES_JSON_PATH = "voices.json" # Contains your known style vectors
19
  RANDOM_VOICES_JSON_PATH = "random_voices.json" # We'll store newly sampled vectors here
20
 
 
 
 
 
 
 
 
21
 
22
  ##############################################################################
23
  # JSON LOAD/SAVE
@@ -131,7 +138,7 @@ def sample_random_style(mean: np.ndarray, cov: np.ndarray) -> torch.Tensor:
131
  # Sample from multivariate normal distribution
132
  z = np.random.multivariate_normal(mean, cov)
133
  # Convert to torch tensor
134
- style_tensor = torch.tensor(z, dtype=torch.float32)
135
  # Unsqueeze to shape (1, D)
136
  style_tensor = style_tensor.unsqueeze(0)
137
  print(f"Sampled a new random style vector with shape {style_tensor.shape}.")
@@ -354,7 +361,9 @@ def get_or_compute_style_vector(key_or_path: str, voices_data: dict) -> torch.Te
354
  """
355
  if key_or_path in voices_data:
356
  print(f"Found style vector for '{key_or_path}' in '{VOICES_JSON_PATH}'.")
357
- style_vec = torch.tensor(voices_data[key_or_path], dtype=torch.float32)
 
 
358
  elif os.path.isfile(key_or_path):
359
  print(
360
  f"No existing style for '{key_or_path}'. Attempting to compute from audio..."
@@ -362,6 +371,7 @@ def get_or_compute_style_vector(key_or_path: str, voices_data: dict) -> torch.Te
362
  style_vec = inference.compute_style(key_or_path)
363
  if style_vec is None:
364
  raise ValueError(f"Failed to compute style vector from '{key_or_path}'.")
 
365
  voices_data[key_or_path] = style_vec.squeeze(0).tolist()
366
  save_json(voices_data, VOICES_JSON_PATH)
367
  print(
@@ -377,9 +387,10 @@ def get_or_compute_style_vector(key_or_path: str, voices_data: dict) -> torch.Te
377
  # Ensure style_vec is 2D: (1, D)
378
  if style_vec.dim() == 1:
379
  style_vec = style_vec.unsqueeze(0)
 
380
  print(f"Unsqueezed style vector to shape: {style_vec.shape}")
381
  elif style_vec.dim() == 3:
382
- style_vec = style_vec.squeeze(1)
383
  print(f"Squeezed style vector to shape: {style_vec.shape}")
384
  elif style_vec.dim() != 2:
385
  raise ValueError(
@@ -495,9 +506,10 @@ def tts_with_style_vector(
495
  # Ensure style_vec has shape (1, D)
496
  if style_vec.dim() == 1:
497
  style_vec = style_vec.unsqueeze(0) # e.g. (D,) -> (1, D)
 
498
  print(f"Unsqueezed style vector to shape: {style_vec.shape}")
499
  elif style_vec.dim() == 3:
500
- style_vec = style_vec.squeeze(1)
501
  print(f"Squeezed style vector to shape: {style_vec.shape}")
502
  elif style_vec.dim() != 2:
503
  print(f"Unexpected style vector shape: {style_vec.shape}. Expected 2D tensor.")
 
18
  VOICES_JSON_PATH = "voices.json" # Contains your known style vectors
19
  RANDOM_VOICES_JSON_PATH = "random_voices.json" # We'll store newly sampled vectors here
20
 
21
+ ##############################################################################
22
+ # DEVICE CONFIGURATION
23
+ ##############################################################################
24
+ # Detect if CUDA is available and set the device accordingly
25
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
26
+ print(f"Using device: {device}")
27
+
28
 
29
  ##############################################################################
30
  # JSON LOAD/SAVE
 
138
  # Sample from multivariate normal distribution
139
  z = np.random.multivariate_normal(mean, cov)
140
  # Convert to torch tensor
141
+ style_tensor = torch.tensor(z, dtype=torch.float32).to(device) # Move to device
142
  # Unsqueeze to shape (1, D)
143
  style_tensor = style_tensor.unsqueeze(0)
144
  print(f"Sampled a new random style vector with shape {style_tensor.shape}.")
 
361
  """
362
  if key_or_path in voices_data:
363
  print(f"Found style vector for '{key_or_path}' in '{VOICES_JSON_PATH}'.")
364
+ style_vec = torch.tensor(voices_data[key_or_path], dtype=torch.float32).to(
365
+ device
366
+ ) # Move to device
367
  elif os.path.isfile(key_or_path):
368
  print(
369
  f"No existing style for '{key_or_path}'. Attempting to compute from audio..."
 
371
  style_vec = inference.compute_style(key_or_path)
372
  if style_vec is None:
373
  raise ValueError(f"Failed to compute style vector from '{key_or_path}'.")
374
+ style_vec = style_vec.to(device) # Move to device
375
  voices_data[key_or_path] = style_vec.squeeze(0).tolist()
376
  save_json(voices_data, VOICES_JSON_PATH)
377
  print(
 
387
  # Ensure style_vec is 2D: (1, D)
388
  if style_vec.dim() == 1:
389
  style_vec = style_vec.unsqueeze(0)
390
+ style_vec = style_vec.to(device) # Ensure it's on the correct device
391
  print(f"Unsqueezed style vector to shape: {style_vec.shape}")
392
  elif style_vec.dim() == 3:
393
+ style_vec = style_vec.squeeze(1).to(device)
394
  print(f"Squeezed style vector to shape: {style_vec.shape}")
395
  elif style_vec.dim() != 2:
396
  raise ValueError(
 
506
  # Ensure style_vec has shape (1, D)
507
  if style_vec.dim() == 1:
508
  style_vec = style_vec.unsqueeze(0) # e.g. (D,) -> (1, D)
509
+ style_vec = style_vec.to(device) # Move to device
510
  print(f"Unsqueezed style vector to shape: {style_vec.shape}")
511
  elif style_vec.dim() == 3:
512
+ style_vec = style_vec.squeeze(1).to(device)
513
  print(f"Squeezed style vector to shape: {style_vec.shape}")
514
  elif style_vec.dim() != 2:
515
  print(f"Unexpected style vector shape: {style_vec.shape}. Expected 2D tensor.")