Spaces:

Staticaliza
/

Voice

Running

App Files Files Community

Staticaliza commited on Dec 14, 2024

Commit

2e18475

verified ·

1 Parent(s): 75a9559

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -5

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import gradio as gr
 import numpy as np
 import soundfile as sf
-import noisereduce as nr
 import spaces
 import torch
 import torchaudio
@@ -13,7 +12,6 @@ import os
 from huggingface_hub import hf_hub_download
 from transformers import AutoFeatureExtractor, WhisperModel
 from torch.nn.utils import parametrizations
-from scipy.signal import butter, filtfilt
 from modules.commons import build_model, load_checkpoint, recursive_munch
 from modules.campplus.DTDNN import CAMPPlus
@@ -183,7 +181,7 @@ footer {
 @torch.no_grad()
 @torch.inference_mode()
-def voice_conversion(input, reference, steps, guidance, speed):
     print("[INFO] | Voice conversion started.")
     inference_module, mel_fn, bigvgan_fn = model, to_mel, bigvgan_model
@@ -317,6 +315,17 @@ def voice_conversion(input, reference, steps, guidance, speed):
     # Concatenate all generated wave chunks
     final_audio = np.concatenate(generated_wave_chunks).astype(np.float32)
     # Normalize the audio to ensure it's within [-1.0, 1.0]
     max_val = np.max(np.abs(final_audio))
     if max_val > 1.0:
@@ -353,8 +362,9 @@ with gr.Blocks(css=css) as main:
         reference_input = gr.Audio(label="Reference Audio", type="filepath")
     with gr.Column():
-        steps = gr.Slider(label="Steps", value=2, minimum=1, maximum=100, step=1)
         guidance = gr.Slider(label="Guidance", value=0.7, minimum=0.0, maximum=1.0, step=0.1)
         speed = gr.Slider(label="Speed", value=1.0, minimum=0.5, maximum=2.0, step=0.1)
     with gr.Column():
@@ -364,7 +374,7 @@ with gr.Blocks(css=css) as main:
     with gr.Column():
         output = gr.Audio(label="Output", type="filepath")
-    submit.click(voice_conversion, inputs=[input, reference_input, steps, guidance, speed], outputs=output, queue=False)
     maintain.click(cloud, inputs=[], outputs=[], queue=False)
 main.launch(show_api=True)

 import gradio as gr
 import numpy as np
 import soundfile as sf
 import spaces
 import torch
 import torchaudio
 from huggingface_hub import hf_hub_download
 from transformers import AutoFeatureExtractor, WhisperModel
 from torch.nn.utils import parametrizations
 from modules.commons import build_model, load_checkpoint, recursive_munch
 from modules.campplus.DTDNN import CAMPPlus
 @torch.no_grad()
 @torch.inference_mode()
+def voice_conversion(input, reference, steps, guidance, pitch, speed):
     print("[INFO] | Voice conversion started.")
     inference_module, mel_fn, bigvgan_fn = model, to_mel, bigvgan_model
     # Concatenate all generated wave chunks
     final_audio = np.concatenate(generated_wave_chunks).astype(np.float32)
+    # Pitch Shifting using librosa
+    print("[INFO] | Applying pitch shifting.")
+    try:
+        if pitch != 0:
+            final_audio = librosa.effects.pitch_shift(final_audio, sr=sr_current, n_steps=pitch)
+            print(f"[INFO] | Pitch shifted by {pitch} semitones.")
+        else:
+            print("[INFO] | No pitch shift applied.")
+    except Exception as e:
+        print(f"[ERROR] | Pitch shifting failed: {e}")
     # Normalize the audio to ensure it's within [-1.0, 1.0]
     max_val = np.max(np.abs(final_audio))
     if max_val > 1.0:
         reference_input = gr.Audio(label="Reference Audio", type="filepath")
     with gr.Column():
+        steps = gr.Slider(label="Steps", value=4, minimum=1, maximum=100, step=1)
         guidance = gr.Slider(label="Guidance", value=0.7, minimum=0.0, maximum=1.0, step=0.1)
+        pitch = gr.Slider(label="Pitch", value=0.0, minimum=-12.0, maximum=12.0, step=0.1)
         speed = gr.Slider(label="Speed", value=1.0, minimum=0.5, maximum=2.0, step=0.1)
     with gr.Column():
     with gr.Column():
         output = gr.Audio(label="Output", type="filepath")
+    submit.click(voice_conversion, inputs=[input, reference_input, steps, guidance, pitch, speed], outputs=output, queue=False)
     maintain.click(cloud, inputs=[], outputs=[], queue=False)
 main.launch(show_api=True)