Spaces:
Running
Running
modify app
Browse files
app.py
CHANGED
@@ -76,17 +76,20 @@ def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, op
|
|
76 |
|
77 |
# Convert current_output to numpy array if it's a tensor
|
78 |
if isinstance(current_output, torch.Tensor):
|
79 |
-
current_output = current_output.cpu().numpy()
|
80 |
|
81 |
# Denormalize the audio to int16
|
82 |
current_output = denormalize_audio(current_output, dtype=np.int16)
|
83 |
|
84 |
-
|
85 |
-
if current_output.ndim == 1:
|
86 |
current_output = current_output.reshape(-1, 1)
|
87 |
elif current_output.ndim > 2:
|
88 |
current_output = current_output.squeeze()
|
89 |
-
|
|
|
|
|
|
|
|
|
90 |
yield (args.sample_rate, current_output), ito_param_output, step, ito_log
|
91 |
|
92 |
|
|
|
76 |
|
77 |
# Convert current_output to numpy array if it's a tensor
|
78 |
if isinstance(current_output, torch.Tensor):
|
79 |
+
current_output = current_output.detach().cpu().numpy()
|
80 |
|
81 |
# Denormalize the audio to int16
|
82 |
current_output = denormalize_audio(current_output, dtype=np.int16)
|
83 |
|
84 |
+
if output_audio.ndim == 1:
|
|
|
85 |
current_output = current_output.reshape(-1, 1)
|
86 |
elif current_output.ndim > 2:
|
87 |
current_output = current_output.squeeze()
|
88 |
+
|
89 |
+
# Ensure the audio is in the correct shape (samples, channels)
|
90 |
+
if current_output.shape[1] > current_output.shape[0]:
|
91 |
+
current_output = current_output.transpose(1,0)
|
92 |
+
|
93 |
yield (args.sample_rate, current_output), ito_param_output, step, ito_log
|
94 |
|
95 |
|