Spaces:
Running
Running
fixed missing cuda option
Browse files
Logo.ai
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Logo.png
ADDED
app.py
CHANGED
@@ -367,7 +367,7 @@ def create_combined_interface():
|
|
367 |
with gr.Column():
|
368 |
text_input = gr.Textbox(
|
369 |
label="Text to Synthesize",
|
370 |
-
value="
|
371 |
lines=3,
|
372 |
)
|
373 |
voice_dropdown = gr.Dropdown(
|
@@ -411,7 +411,7 @@ def create_combined_interface():
|
|
411 |
with gr.Column():
|
412 |
text_input_studio = gr.Textbox(
|
413 |
label="Text to Synthesize",
|
414 |
-
value="
|
415 |
lines=3,
|
416 |
)
|
417 |
voice_dropdown_studio = gr.Dropdown(
|
|
|
367 |
with gr.Column():
|
368 |
text_input = gr.Textbox(
|
369 |
label="Text to Synthesize",
|
370 |
+
value="Did you know that you can just do stuff?",
|
371 |
lines=3,
|
372 |
)
|
373 |
voice_dropdown = gr.Dropdown(
|
|
|
411 |
with gr.Column():
|
412 |
text_input_studio = gr.Textbox(
|
413 |
label="Text to Synthesize",
|
414 |
+
value="Use the sliders to customize a voice!",
|
415 |
lines=3,
|
416 |
)
|
417 |
voice_dropdown_studio = gr.Dropdown(
|
text2speech.py
CHANGED
@@ -18,6 +18,13 @@ from typing import Optional, Tuple, List
|
|
18 |
VOICES_JSON_PATH = "voices.json" # Contains your known style vectors
|
19 |
RANDOM_VOICES_JSON_PATH = "random_voices.json" # We'll store newly sampled vectors here
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
##############################################################################
|
23 |
# JSON LOAD/SAVE
|
@@ -131,7 +138,7 @@ def sample_random_style(mean: np.ndarray, cov: np.ndarray) -> torch.Tensor:
|
|
131 |
# Sample from multivariate normal distribution
|
132 |
z = np.random.multivariate_normal(mean, cov)
|
133 |
# Convert to torch tensor
|
134 |
-
style_tensor = torch.tensor(z, dtype=torch.float32)
|
135 |
# Unsqueeze to shape (1, D)
|
136 |
style_tensor = style_tensor.unsqueeze(0)
|
137 |
print(f"Sampled a new random style vector with shape {style_tensor.shape}.")
|
@@ -354,7 +361,9 @@ def get_or_compute_style_vector(key_or_path: str, voices_data: dict) -> torch.Te
|
|
354 |
"""
|
355 |
if key_or_path in voices_data:
|
356 |
print(f"Found style vector for '{key_or_path}' in '{VOICES_JSON_PATH}'.")
|
357 |
-
style_vec = torch.tensor(voices_data[key_or_path], dtype=torch.float32)
|
|
|
|
|
358 |
elif os.path.isfile(key_or_path):
|
359 |
print(
|
360 |
f"No existing style for '{key_or_path}'. Attempting to compute from audio..."
|
@@ -362,6 +371,7 @@ def get_or_compute_style_vector(key_or_path: str, voices_data: dict) -> torch.Te
|
|
362 |
style_vec = inference.compute_style(key_or_path)
|
363 |
if style_vec is None:
|
364 |
raise ValueError(f"Failed to compute style vector from '{key_or_path}'.")
|
|
|
365 |
voices_data[key_or_path] = style_vec.squeeze(0).tolist()
|
366 |
save_json(voices_data, VOICES_JSON_PATH)
|
367 |
print(
|
@@ -377,9 +387,10 @@ def get_or_compute_style_vector(key_or_path: str, voices_data: dict) -> torch.Te
|
|
377 |
# Ensure style_vec is 2D: (1, D)
|
378 |
if style_vec.dim() == 1:
|
379 |
style_vec = style_vec.unsqueeze(0)
|
|
|
380 |
print(f"Unsqueezed style vector to shape: {style_vec.shape}")
|
381 |
elif style_vec.dim() == 3:
|
382 |
-
style_vec = style_vec.squeeze(1)
|
383 |
print(f"Squeezed style vector to shape: {style_vec.shape}")
|
384 |
elif style_vec.dim() != 2:
|
385 |
raise ValueError(
|
@@ -495,9 +506,10 @@ def tts_with_style_vector(
|
|
495 |
# Ensure style_vec has shape (1, D)
|
496 |
if style_vec.dim() == 1:
|
497 |
style_vec = style_vec.unsqueeze(0) # e.g. (D,) -> (1, D)
|
|
|
498 |
print(f"Unsqueezed style vector to shape: {style_vec.shape}")
|
499 |
elif style_vec.dim() == 3:
|
500 |
-
style_vec = style_vec.squeeze(1)
|
501 |
print(f"Squeezed style vector to shape: {style_vec.shape}")
|
502 |
elif style_vec.dim() != 2:
|
503 |
print(f"Unexpected style vector shape: {style_vec.shape}. Expected 2D tensor.")
|
|
|
18 |
VOICES_JSON_PATH = "voices.json" # Contains your known style vectors
|
19 |
RANDOM_VOICES_JSON_PATH = "random_voices.json" # We'll store newly sampled vectors here
|
20 |
|
21 |
+
##############################################################################
|
22 |
+
# DEVICE CONFIGURATION
|
23 |
+
##############################################################################
|
24 |
+
# Detect if CUDA is available and set the device accordingly
|
25 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
26 |
+
print(f"Using device: {device}")
|
27 |
+
|
28 |
|
29 |
##############################################################################
|
30 |
# JSON LOAD/SAVE
|
|
|
138 |
# Sample from multivariate normal distribution
|
139 |
z = np.random.multivariate_normal(mean, cov)
|
140 |
# Convert to torch tensor
|
141 |
+
style_tensor = torch.tensor(z, dtype=torch.float32).to(device) # Move to device
|
142 |
# Unsqueeze to shape (1, D)
|
143 |
style_tensor = style_tensor.unsqueeze(0)
|
144 |
print(f"Sampled a new random style vector with shape {style_tensor.shape}.")
|
|
|
361 |
"""
|
362 |
if key_or_path in voices_data:
|
363 |
print(f"Found style vector for '{key_or_path}' in '{VOICES_JSON_PATH}'.")
|
364 |
+
style_vec = torch.tensor(voices_data[key_or_path], dtype=torch.float32).to(
|
365 |
+
device
|
366 |
+
) # Move to device
|
367 |
elif os.path.isfile(key_or_path):
|
368 |
print(
|
369 |
f"No existing style for '{key_or_path}'. Attempting to compute from audio..."
|
|
|
371 |
style_vec = inference.compute_style(key_or_path)
|
372 |
if style_vec is None:
|
373 |
raise ValueError(f"Failed to compute style vector from '{key_or_path}'.")
|
374 |
+
style_vec = style_vec.to(device) # Move to device
|
375 |
voices_data[key_or_path] = style_vec.squeeze(0).tolist()
|
376 |
save_json(voices_data, VOICES_JSON_PATH)
|
377 |
print(
|
|
|
387 |
# Ensure style_vec is 2D: (1, D)
|
388 |
if style_vec.dim() == 1:
|
389 |
style_vec = style_vec.unsqueeze(0)
|
390 |
+
style_vec = style_vec.to(device) # Ensure it's on the correct device
|
391 |
print(f"Unsqueezed style vector to shape: {style_vec.shape}")
|
392 |
elif style_vec.dim() == 3:
|
393 |
+
style_vec = style_vec.squeeze(1).to(device)
|
394 |
print(f"Squeezed style vector to shape: {style_vec.shape}")
|
395 |
elif style_vec.dim() != 2:
|
396 |
raise ValueError(
|
|
|
506 |
# Ensure style_vec has shape (1, D)
|
507 |
if style_vec.dim() == 1:
|
508 |
style_vec = style_vec.unsqueeze(0) # e.g. (D,) -> (1, D)
|
509 |
+
style_vec = style_vec.to(device) # Move to device
|
510 |
print(f"Unsqueezed style vector to shape: {style_vec.shape}")
|
511 |
elif style_vec.dim() == 3:
|
512 |
+
style_vec = style_vec.squeeze(1).to(device)
|
513 |
print(f"Squeezed style vector to shape: {style_vec.shape}")
|
514 |
elif style_vec.dim() != 2:
|
515 |
print(f"Unexpected style vector shape: {style_vec.shape}. Expected 2D tensor.")
|