Staticaliza commited on
Commit
2e18475
·
verified ·
1 Parent(s): 75a9559

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -5
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import gradio as gr
2
  import numpy as np
3
  import soundfile as sf
4
- import noisereduce as nr
5
  import spaces
6
  import torch
7
  import torchaudio
@@ -13,7 +12,6 @@ import os
13
  from huggingface_hub import hf_hub_download
14
  from transformers import AutoFeatureExtractor, WhisperModel
15
  from torch.nn.utils import parametrizations
16
- from scipy.signal import butter, filtfilt
17
 
18
  from modules.commons import build_model, load_checkpoint, recursive_munch
19
  from modules.campplus.DTDNN import CAMPPlus
@@ -183,7 +181,7 @@ footer {
183
 
184
  @torch.no_grad()
185
  @torch.inference_mode()
186
- def voice_conversion(input, reference, steps, guidance, speed):
187
  print("[INFO] | Voice conversion started.")
188
 
189
  inference_module, mel_fn, bigvgan_fn = model, to_mel, bigvgan_model
@@ -317,6 +315,17 @@ def voice_conversion(input, reference, steps, guidance, speed):
317
  # Concatenate all generated wave chunks
318
  final_audio = np.concatenate(generated_wave_chunks).astype(np.float32)
319
 
 
 
 
 
 
 
 
 
 
 
 
320
  # Normalize the audio to ensure it's within [-1.0, 1.0]
321
  max_val = np.max(np.abs(final_audio))
322
  if max_val > 1.0:
@@ -353,8 +362,9 @@ with gr.Blocks(css=css) as main:
353
  reference_input = gr.Audio(label="Reference Audio", type="filepath")
354
 
355
  with gr.Column():
356
- steps = gr.Slider(label="Steps", value=2, minimum=1, maximum=100, step=1)
357
  guidance = gr.Slider(label="Guidance", value=0.7, minimum=0.0, maximum=1.0, step=0.1)
 
358
  speed = gr.Slider(label="Speed", value=1.0, minimum=0.5, maximum=2.0, step=0.1)
359
 
360
  with gr.Column():
@@ -364,7 +374,7 @@ with gr.Blocks(css=css) as main:
364
  with gr.Column():
365
  output = gr.Audio(label="Output", type="filepath")
366
 
367
- submit.click(voice_conversion, inputs=[input, reference_input, steps, guidance, speed], outputs=output, queue=False)
368
  maintain.click(cloud, inputs=[], outputs=[], queue=False)
369
 
370
  main.launch(show_api=True)
 
1
  import gradio as gr
2
  import numpy as np
3
  import soundfile as sf
 
4
  import spaces
5
  import torch
6
  import torchaudio
 
12
  from huggingface_hub import hf_hub_download
13
  from transformers import AutoFeatureExtractor, WhisperModel
14
  from torch.nn.utils import parametrizations
 
15
 
16
  from modules.commons import build_model, load_checkpoint, recursive_munch
17
  from modules.campplus.DTDNN import CAMPPlus
 
181
 
182
  @torch.no_grad()
183
  @torch.inference_mode()
184
+ def voice_conversion(input, reference, steps, guidance, pitch, speed):
185
  print("[INFO] | Voice conversion started.")
186
 
187
  inference_module, mel_fn, bigvgan_fn = model, to_mel, bigvgan_model
 
315
  # Concatenate all generated wave chunks
316
  final_audio = np.concatenate(generated_wave_chunks).astype(np.float32)
317
 
318
+ # Pitch Shifting using librosa
319
+ print("[INFO] | Applying pitch shifting.")
320
+ try:
321
+ if pitch != 0:
322
+ final_audio = librosa.effects.pitch_shift(final_audio, sr=sr_current, n_steps=pitch)
323
+ print(f"[INFO] | Pitch shifted by {pitch} semitones.")
324
+ else:
325
+ print("[INFO] | No pitch shift applied.")
326
+ except Exception as e:
327
+ print(f"[ERROR] | Pitch shifting failed: {e}")
328
+
329
  # Normalize the audio to ensure it's within [-1.0, 1.0]
330
  max_val = np.max(np.abs(final_audio))
331
  if max_val > 1.0:
 
362
  reference_input = gr.Audio(label="Reference Audio", type="filepath")
363
 
364
  with gr.Column():
365
+ steps = gr.Slider(label="Steps", value=4, minimum=1, maximum=100, step=1)
366
  guidance = gr.Slider(label="Guidance", value=0.7, minimum=0.0, maximum=1.0, step=0.1)
367
+ pitch = gr.Slider(label="Pitch", value=0.0, minimum=-12.0, maximum=12.0, step=0.1)
368
  speed = gr.Slider(label="Speed", value=1.0, minimum=0.5, maximum=2.0, step=0.1)
369
 
370
  with gr.Column():
 
374
  with gr.Column():
375
  output = gr.Audio(label="Output", type="filepath")
376
 
377
+ submit.click(voice_conversion, inputs=[input, reference_input, steps, guidance, pitch, speed], outputs=output, queue=False)
378
  maintain.click(cloud, inputs=[], outputs=[], queue=False)
379
 
380
  main.launch(show_api=True)