Spaces:

Wismut
/

StyleTTS2_Studio

Running

App Files Files Community

Wismut commited on 28 days ago

Commit

34ab4db

1 Parent(s): c357cba

fixed missing cuda option

Browse files

Files changed (4) hide show

Logo.ai +0 -0
Logo.png +0 -0
app.py +2 -2
text2speech.py +16 -4

Logo.ai ADDED Viewed

The diff for this file is too large to render. See raw diff

Logo.png ADDED Viewed

app.py CHANGED Viewed

@@ -367,7 +367,7 @@ def create_combined_interface():
             with gr.Column():
                 text_input = gr.Textbox(
                     label="Text to Synthesize",
-                    value="Hello world from the Gradio + TTS pipeline!",
                     lines=3,
                 )
                 voice_dropdown = gr.Dropdown(
@@ -411,7 +411,7 @@ def create_combined_interface():
             with gr.Column():
                 text_input_studio = gr.Textbox(
                     label="Text to Synthesize",
-                    value="Customize your voice here!",
                     lines=3,
                 )
                 voice_dropdown_studio = gr.Dropdown(

             with gr.Column():
                 text_input = gr.Textbox(
                     label="Text to Synthesize",
+                    value="Did you know that you can just do stuff?",
                     lines=3,
                 )
                 voice_dropdown = gr.Dropdown(
             with gr.Column():
                 text_input_studio = gr.Textbox(
                     label="Text to Synthesize",
+                    value="Use the sliders to customize a voice!",
                     lines=3,
                 )
                 voice_dropdown_studio = gr.Dropdown(

text2speech.py CHANGED Viewed

@@ -18,6 +18,13 @@ from typing import Optional, Tuple, List
 VOICES_JSON_PATH = "voices.json"  # Contains your known style vectors
 RANDOM_VOICES_JSON_PATH = "random_voices.json"  # We'll store newly sampled vectors here
 ##############################################################################
 # JSON LOAD/SAVE
@@ -131,7 +138,7 @@ def sample_random_style(mean: np.ndarray, cov: np.ndarray) -> torch.Tensor:
     # Sample from multivariate normal distribution
     z = np.random.multivariate_normal(mean, cov)
     # Convert to torch tensor
-    style_tensor = torch.tensor(z, dtype=torch.float32)
     # Unsqueeze to shape (1, D)
     style_tensor = style_tensor.unsqueeze(0)
     print(f"Sampled a new random style vector with shape {style_tensor.shape}.")
@@ -354,7 +361,9 @@ def get_or_compute_style_vector(key_or_path: str, voices_data: dict) -> torch.Te
     """
     if key_or_path in voices_data:
         print(f"Found style vector for '{key_or_path}' in '{VOICES_JSON_PATH}'.")
-        style_vec = torch.tensor(voices_data[key_or_path], dtype=torch.float32)
     elif os.path.isfile(key_or_path):
         print(
             f"No existing style for '{key_or_path}'. Attempting to compute from audio..."
@@ -362,6 +371,7 @@ def get_or_compute_style_vector(key_or_path: str, voices_data: dict) -> torch.Te
         style_vec = inference.compute_style(key_or_path)
         if style_vec is None:
             raise ValueError(f"Failed to compute style vector from '{key_or_path}'.")
         voices_data[key_or_path] = style_vec.squeeze(0).tolist()
         save_json(voices_data, VOICES_JSON_PATH)
         print(
@@ -377,9 +387,10 @@ def get_or_compute_style_vector(key_or_path: str, voices_data: dict) -> torch.Te
     # Ensure style_vec is 2D: (1, D)
     if style_vec.dim() == 1:
         style_vec = style_vec.unsqueeze(0)
         print(f"Unsqueezed style vector to shape: {style_vec.shape}")
     elif style_vec.dim() == 3:
-        style_vec = style_vec.squeeze(1)
         print(f"Squeezed style vector to shape: {style_vec.shape}")
     elif style_vec.dim() != 2:
         raise ValueError(
@@ -495,9 +506,10 @@ def tts_with_style_vector(
     # Ensure style_vec has shape (1, D)
     if style_vec.dim() == 1:
         style_vec = style_vec.unsqueeze(0)  # e.g. (D,) -> (1, D)
         print(f"Unsqueezed style vector to shape: {style_vec.shape}")
     elif style_vec.dim() == 3:
-        style_vec = style_vec.squeeze(1)
         print(f"Squeezed style vector to shape: {style_vec.shape}")
     elif style_vec.dim() != 2:
         print(f"Unexpected style vector shape: {style_vec.shape}. Expected 2D tensor.")

 VOICES_JSON_PATH = "voices.json"  # Contains your known style vectors
 RANDOM_VOICES_JSON_PATH = "random_voices.json"  # We'll store newly sampled vectors here
+##############################################################################
+# DEVICE CONFIGURATION
+##############################################################################
+# Detect if CUDA is available and set the device accordingly
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"Using device: {device}")
 ##############################################################################
 # JSON LOAD/SAVE
     # Sample from multivariate normal distribution
     z = np.random.multivariate_normal(mean, cov)
     # Convert to torch tensor
+    style_tensor = torch.tensor(z, dtype=torch.float32).to(device)  # Move to device
     # Unsqueeze to shape (1, D)
     style_tensor = style_tensor.unsqueeze(0)
     print(f"Sampled a new random style vector with shape {style_tensor.shape}.")
     """
     if key_or_path in voices_data:
         print(f"Found style vector for '{key_or_path}' in '{VOICES_JSON_PATH}'.")
+        style_vec = torch.tensor(voices_data[key_or_path], dtype=torch.float32).to(
+            device
+        )  # Move to device
     elif os.path.isfile(key_or_path):
         print(
             f"No existing style for '{key_or_path}'. Attempting to compute from audio..."
         style_vec = inference.compute_style(key_or_path)
         if style_vec is None:
             raise ValueError(f"Failed to compute style vector from '{key_or_path}'.")
+        style_vec = style_vec.to(device)  # Move to device
         voices_data[key_or_path] = style_vec.squeeze(0).tolist()
         save_json(voices_data, VOICES_JSON_PATH)
         print(
     # Ensure style_vec is 2D: (1, D)
     if style_vec.dim() == 1:
         style_vec = style_vec.unsqueeze(0)
+        style_vec = style_vec.to(device)  # Ensure it's on the correct device
         print(f"Unsqueezed style vector to shape: {style_vec.shape}")
     elif style_vec.dim() == 3:
+        style_vec = style_vec.squeeze(1).to(device)
         print(f"Squeezed style vector to shape: {style_vec.shape}")
     elif style_vec.dim() != 2:
         raise ValueError(
     # Ensure style_vec has shape (1, D)
     if style_vec.dim() == 1:
         style_vec = style_vec.unsqueeze(0)  # e.g. (D,) -> (1, D)
+        style_vec = style_vec.to(device)  # Move to device
         print(f"Unsqueezed style vector to shape: {style_vec.shape}")
     elif style_vec.dim() == 3:
+        style_vec = style_vec.squeeze(1).to(device)
         print(f"Squeezed style vector to shape: {style_vec.shape}")
     elif style_vec.dim() != 2:
         print(f"Unexpected style vector shape: {style_vec.shape}. Expected 2D tensor.")