Staticaliza commited on
Commit
b6050ac
·
verified ·
1 Parent(s): 2e18475

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -5
app.py CHANGED
@@ -266,7 +266,7 @@ def voice_conversion(input, reference, steps, guidance, pitch, speed):
266
  print(f"[INFO] | Mel spectrogram shapes: mel={mel.shape}, mel2={mel2.shape}")
267
 
268
  # Length adjustment
269
- target_lengths = torch.LongTensor([int(mel.size(2) * speed)]).to(mel.device)
270
  target2_lengths = torch.LongTensor([mel2.size(2)]).to(mel2.device)
271
  print(f"[INFO] | Target lengths: {target_lengths.item()}, {target2_lengths.item()}")
272
 
@@ -299,17 +299,21 @@ def voice_conversion(input, reference, steps, guidance, pitch, speed):
299
  vc_target = inference_module.cfm.inference(cat_condition, torch.LongTensor([cat_condition.size(1)]).to(mel2.device), mel2, style2, None, steps, inference_cfg_rate=guidance)
300
  vc_target = vc_target[:, :, mel2.size(2):]
301
  print(f"[INFO] | vc_target shape: {vc_target.shape}")
302
-
303
  # Generate waveform using BigVGAN
304
  vc_wave = bigvgan_fn(vc_target.float())[0]
305
  print(f"[INFO] | vc_wave shape: {vc_wave.shape}")
306
-
307
  # Handle the generated waveform
308
  output_wave = vc_wave[0].cpu().numpy()
309
  generated_wave_chunks.append(output_wave)
310
 
311
  # Ensure processed_frames increments correctly to avoid infinite loop
 
 
312
  processed_frames += vc_target.size(2)
 
 
313
  print(f"[INFO] | Processed frames updated to: {processed_frames}")
314
 
315
  # Concatenate all generated wave chunks
@@ -364,8 +368,8 @@ with gr.Blocks(css=css) as main:
364
  with gr.Column():
365
  steps = gr.Slider(label="Steps", value=4, minimum=1, maximum=100, step=1)
366
  guidance = gr.Slider(label="Guidance", value=0.7, minimum=0.0, maximum=1.0, step=0.1)
367
- pitch = gr.Slider(label="Pitch", value=0.0, minimum=-12.0, maximum=12.0, step=0.1)
368
- speed = gr.Slider(label="Speed", value=1.0, minimum=0.5, maximum=2.0, step=0.1)
369
 
370
  with gr.Column():
371
  submit = gr.Button("▶")
 
266
  print(f"[INFO] | Mel spectrogram shapes: mel={mel.shape}, mel2={mel2.shape}")
267
 
268
  # Length adjustment
269
+ target_lengths = torch.LongTensor([int(mel.size(2) / speed)]).to(mel.device)
270
  target2_lengths = torch.LongTensor([mel2.size(2)]).to(mel2.device)
271
  print(f"[INFO] | Target lengths: {target_lengths.item()}, {target2_lengths.item()}")
272
 
 
299
  vc_target = inference_module.cfm.inference(cat_condition, torch.LongTensor([cat_condition.size(1)]).to(mel2.device), mel2, style2, None, steps, inference_cfg_rate=guidance)
300
  vc_target = vc_target[:, :, mel2.size(2):]
301
  print(f"[INFO] | vc_target shape: {vc_target.shape}")
302
+ print(vc_target)
303
  # Generate waveform using BigVGAN
304
  vc_wave = bigvgan_fn(vc_target.float())[0]
305
  print(f"[INFO] | vc_wave shape: {vc_wave.shape}")
306
+ print(vc_wave)
307
  # Handle the generated waveform
308
  output_wave = vc_wave[0].cpu().numpy()
309
  generated_wave_chunks.append(output_wave)
310
 
311
  # Ensure processed_frames increments correctly to avoid infinite loop
312
+ print(processed_frames)
313
+ print(cond.size(1))
314
  processed_frames += vc_target.size(2)
315
+ print(processed_frames)
316
+ print(cond.size(1))
317
  print(f"[INFO] | Processed frames updated to: {processed_frames}")
318
 
319
  # Concatenate all generated wave chunks
 
368
  with gr.Column():
369
  steps = gr.Slider(label="Steps", value=4, minimum=1, maximum=100, step=1)
370
  guidance = gr.Slider(label="Guidance", value=0.7, minimum=0.0, maximum=1.0, step=0.1)
371
+ pitch = gr.Slider(label="Pitch", value=0.0, minimum=-10.0, maximum=10.0, step=0.1)
372
+ speed = gr.Slider(label="Speed", value=1.0, minimum=0.1, maximum=10.0, step=0.1)
373
 
374
  with gr.Column():
375
  submit = gr.Button("▶")