Spaces:

jhtonyKoo
/

ITO-Master

Running

jhtonyKoo commited on Oct 10, 2024

Commit

7d7bb34

1 Parent(s): 3635837

modify app

Files changed (1) hide show

app.py CHANGED Viewed

@@ -76,17 +76,20 @@ def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, op
         # Convert current_output to numpy array if it's a tensor
         if isinstance(current_output, torch.Tensor):
-            current_output = current_output.cpu().numpy()
         # Denormalize the audio to int16
         current_output = denormalize_audio(current_output, dtype=np.int16)
-        # Ensure the audio is in the correct shape (samples, channels)
-        if current_output.ndim == 1:
             current_output = current_output.reshape(-1, 1)
         elif current_output.ndim > 2:
             current_output = current_output.squeeze()
         yield (args.sample_rate, current_output), ito_param_output, step, ito_log

         # Convert current_output to numpy array if it's a tensor
         if isinstance(current_output, torch.Tensor):
+            current_output = current_output.detach().cpu().numpy()
         # Denormalize the audio to int16
         current_output = denormalize_audio(current_output, dtype=np.int16)
+        if output_audio.ndim == 1:
             current_output = current_output.reshape(-1, 1)
         elif current_output.ndim > 2:
             current_output = current_output.squeeze()
+        # Ensure the audio is in the correct shape (samples, channels)
+        if current_output.shape[1] > current_output.shape[0]:
+            current_output = current_output.transpose(1,0)
         yield (args.sample_rate, current_output), ito_param_output, step, ito_log