Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -266,7 +266,7 @@ def voice_conversion(input, reference, steps, guidance, pitch, speed):
|
|
266 |
print(f"[INFO] | Mel spectrogram shapes: mel={mel.shape}, mel2={mel2.shape}")
|
267 |
|
268 |
# Length adjustment
|
269 |
-
target_lengths = torch.LongTensor([int(mel.size(2)
|
270 |
target2_lengths = torch.LongTensor([mel2.size(2)]).to(mel2.device)
|
271 |
print(f"[INFO] | Target lengths: {target_lengths.item()}, {target2_lengths.item()}")
|
272 |
|
@@ -299,17 +299,21 @@ def voice_conversion(input, reference, steps, guidance, pitch, speed):
|
|
299 |
vc_target = inference_module.cfm.inference(cat_condition, torch.LongTensor([cat_condition.size(1)]).to(mel2.device), mel2, style2, None, steps, inference_cfg_rate=guidance)
|
300 |
vc_target = vc_target[:, :, mel2.size(2):]
|
301 |
print(f"[INFO] | vc_target shape: {vc_target.shape}")
|
302 |
-
|
303 |
# Generate waveform using BigVGAN
|
304 |
vc_wave = bigvgan_fn(vc_target.float())[0]
|
305 |
print(f"[INFO] | vc_wave shape: {vc_wave.shape}")
|
306 |
-
|
307 |
# Handle the generated waveform
|
308 |
output_wave = vc_wave[0].cpu().numpy()
|
309 |
generated_wave_chunks.append(output_wave)
|
310 |
|
311 |
# Ensure processed_frames increments correctly to avoid infinite loop
|
|
|
|
|
312 |
processed_frames += vc_target.size(2)
|
|
|
|
|
313 |
print(f"[INFO] | Processed frames updated to: {processed_frames}")
|
314 |
|
315 |
# Concatenate all generated wave chunks
|
@@ -364,8 +368,8 @@ with gr.Blocks(css=css) as main:
|
|
364 |
with gr.Column():
|
365 |
steps = gr.Slider(label="Steps", value=4, minimum=1, maximum=100, step=1)
|
366 |
guidance = gr.Slider(label="Guidance", value=0.7, minimum=0.0, maximum=1.0, step=0.1)
|
367 |
-
pitch = gr.Slider(label="Pitch", value=0.0, minimum=-
|
368 |
-
speed = gr.Slider(label="Speed", value=1.0, minimum=0.
|
369 |
|
370 |
with gr.Column():
|
371 |
submit = gr.Button("▶")
|
|
|
266 |
print(f"[INFO] | Mel spectrogram shapes: mel={mel.shape}, mel2={mel2.shape}")
|
267 |
|
268 |
# Length adjustment
|
269 |
+
target_lengths = torch.LongTensor([int(mel.size(2) / speed)]).to(mel.device)
|
270 |
target2_lengths = torch.LongTensor([mel2.size(2)]).to(mel2.device)
|
271 |
print(f"[INFO] | Target lengths: {target_lengths.item()}, {target2_lengths.item()}")
|
272 |
|
|
|
299 |
vc_target = inference_module.cfm.inference(cat_condition, torch.LongTensor([cat_condition.size(1)]).to(mel2.device), mel2, style2, None, steps, inference_cfg_rate=guidance)
|
300 |
vc_target = vc_target[:, :, mel2.size(2):]
|
301 |
print(f"[INFO] | vc_target shape: {vc_target.shape}")
|
302 |
+
print(vc_target)
|
303 |
# Generate waveform using BigVGAN
|
304 |
vc_wave = bigvgan_fn(vc_target.float())[0]
|
305 |
print(f"[INFO] | vc_wave shape: {vc_wave.shape}")
|
306 |
+
print(vc_wave)
|
307 |
# Handle the generated waveform
|
308 |
output_wave = vc_wave[0].cpu().numpy()
|
309 |
generated_wave_chunks.append(output_wave)
|
310 |
|
311 |
# Ensure processed_frames increments correctly to avoid infinite loop
|
312 |
+
print(processed_frames)
|
313 |
+
print(cond.size(1))
|
314 |
processed_frames += vc_target.size(2)
|
315 |
+
print(processed_frames)
|
316 |
+
print(cond.size(1))
|
317 |
print(f"[INFO] | Processed frames updated to: {processed_frames}")
|
318 |
|
319 |
# Concatenate all generated wave chunks
|
|
|
368 |
with gr.Column():
|
369 |
steps = gr.Slider(label="Steps", value=4, minimum=1, maximum=100, step=1)
|
370 |
guidance = gr.Slider(label="Guidance", value=0.7, minimum=0.0, maximum=1.0, step=0.1)
|
371 |
+
pitch = gr.Slider(label="Pitch", value=0.0, minimum=-10.0, maximum=10.0, step=0.1)
|
372 |
+
speed = gr.Slider(label="Speed", value=1.0, minimum=0.1, maximum=10.0, step=0.1)
|
373 |
|
374 |
with gr.Column():
|
375 |
submit = gr.Button("▶")
|