Spaces:

Staticaliza
/

Voice

Running on Zero

App Files Files Community

Staticaliza commited on Dec 14, 2024

Commit

b6050ac

verified ·

1 Parent(s): 2e18475

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -5

app.py CHANGED Viewed

@@ -266,7 +266,7 @@ def voice_conversion(input, reference, steps, guidance, pitch, speed):
     print(f"[INFO] | Mel spectrogram shapes: mel={mel.shape}, mel2={mel2.shape}")
     # Length adjustment
-    target_lengths = torch.LongTensor([int(mel.size(2) * speed)]).to(mel.device)
     target2_lengths = torch.LongTensor([mel2.size(2)]).to(mel2.device)
     print(f"[INFO] | Target lengths: {target_lengths.item()}, {target2_lengths.item()}")
@@ -299,17 +299,21 @@ def voice_conversion(input, reference, steps, guidance, pitch, speed):
         vc_target = inference_module.cfm.inference(cat_condition, torch.LongTensor([cat_condition.size(1)]).to(mel2.device), mel2, style2, None, steps, inference_cfg_rate=guidance)
         vc_target = vc_target[:, :, mel2.size(2):]
         print(f"[INFO] | vc_target shape: {vc_target.shape}")
         # Generate waveform using BigVGAN
         vc_wave = bigvgan_fn(vc_target.float())[0]
         print(f"[INFO] | vc_wave shape: {vc_wave.shape}")
         # Handle the generated waveform
         output_wave = vc_wave[0].cpu().numpy()
         generated_wave_chunks.append(output_wave)
         # Ensure processed_frames increments correctly to avoid infinite loop
         processed_frames += vc_target.size(2)
         print(f"[INFO] | Processed frames updated to: {processed_frames}")
     # Concatenate all generated wave chunks
@@ -364,8 +368,8 @@ with gr.Blocks(css=css) as main:
     with gr.Column():
         steps = gr.Slider(label="Steps", value=4, minimum=1, maximum=100, step=1)
         guidance = gr.Slider(label="Guidance", value=0.7, minimum=0.0, maximum=1.0, step=0.1)
-        pitch = gr.Slider(label="Pitch", value=0.0, minimum=-12.0, maximum=12.0, step=0.1)
-        speed = gr.Slider(label="Speed", value=1.0, minimum=0.5, maximum=2.0, step=0.1)
     with gr.Column():
         submit = gr.Button("▶")

     print(f"[INFO] | Mel spectrogram shapes: mel={mel.shape}, mel2={mel2.shape}")
     # Length adjustment
+    target_lengths = torch.LongTensor([int(mel.size(2) / speed)]).to(mel.device)
     target2_lengths = torch.LongTensor([mel2.size(2)]).to(mel2.device)
     print(f"[INFO] | Target lengths: {target_lengths.item()}, {target2_lengths.item()}")
         vc_target = inference_module.cfm.inference(cat_condition, torch.LongTensor([cat_condition.size(1)]).to(mel2.device), mel2, style2, None, steps, inference_cfg_rate=guidance)
         vc_target = vc_target[:, :, mel2.size(2):]
         print(f"[INFO] | vc_target shape: {vc_target.shape}")
+        print(vc_target)
         # Generate waveform using BigVGAN
         vc_wave = bigvgan_fn(vc_target.float())[0]
         print(f"[INFO] | vc_wave shape: {vc_wave.shape}")
+        print(vc_wave)
         # Handle the generated waveform
         output_wave = vc_wave[0].cpu().numpy()
         generated_wave_chunks.append(output_wave)
         # Ensure processed_frames increments correctly to avoid infinite loop
+        print(processed_frames)
+        print(cond.size(1))
         processed_frames += vc_target.size(2)
+        print(processed_frames)
+        print(cond.size(1))
         print(f"[INFO] | Processed frames updated to: {processed_frames}")
     # Concatenate all generated wave chunks
     with gr.Column():
         steps = gr.Slider(label="Steps", value=4, minimum=1, maximum=100, step=1)
         guidance = gr.Slider(label="Guidance", value=0.7, minimum=0.0, maximum=1.0, step=0.1)
+        pitch = gr.Slider(label="Pitch", value=0.0, minimum=-10.0, maximum=10.0, step=0.1)
+        speed = gr.Slider(label="Speed", value=1.0, minimum=0.1, maximum=10.0, step=0.1)
     with gr.Column():
         submit = gr.Button("▶")