Spaces:

Staticaliza
/

Voice-CPU

Sleeping

App Files Files Community

Staticaliza commited on Dec 14, 2024

Commit

8615f3d

verified ·

1 Parent(s): 31f10c8

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -5

app.py CHANGED Viewed

@@ -39,6 +39,9 @@ torch.set_grad_enabled(False)
 device = torch.device("cpu")
 print(f"[DEVICE] | Using device: {device}")
 # ----------------------------
 # Load Models and Configuration
 # ----------------------------
@@ -75,7 +78,7 @@ model.cfm.estimator.setup_caches(max_batch_size=1, max_seq_length=8192)
 # Ensure 'CAMPPlus' is correctly imported and defined
 try:
-    campplus_model = CAMPPlus(feat_dim=80, embedding_size=192)
     print("[INFO] | CAMPPlus model instantiated.")
 except NameError:
     print("[ERROR] | CAMPPlus is not defined. Please check the import path and ensure CAMPPlus is correctly defined.")
@@ -90,7 +93,7 @@ campplus_model.to(device)
 print("[INFO] | CAMPPlus model loaded, set to eval mode, and moved to CPU.")
 # Load BigVGAN model
-bigvgan_model = bigvgan.BigVGAN.from_pretrained('nvidia/bigvgan_24khz_100band', use_cuda_kernel=False)
 bigvgan_model.remove_weight_norm()
 bigvgan_model = bigvgan_model.eval().to(device)
 print("[INFO] | BigVGAN model loaded, weight norm removed, set to eval mode, and moved to CPU.")
@@ -118,7 +121,7 @@ mel_fn_args = {
     "n_fft": 1024,
     "win_size": 1024,
     "hop_size": 256,
-    "num_mels": 80,
     "sampling_rate": sr,
     "fmin": 0,
     "fmax": None,
@@ -153,7 +156,7 @@ mel_fn_args_f0 = {
     "n_fft": config_f0['preprocess_params']['spect_params']['n_fft'],
     "win_size": config_f0['preprocess_params']['spect_params']['win_length'],
     "hop_size": config_f0['preprocess_params']['spect_params']['hop_length'],
-    "num_mels": 80,  # Ensure this matches the primary model
     "sampling_rate": sr_f0,
     "fmin": 0,
     "fmax": None,
@@ -273,7 +276,7 @@ def voice_conversion(input, reference, steps, guidance, pitch, speed):
     # Extract style features
     print("[INFO] | Extracting style features from reference audio.")
-    feat2 = torchaudio.compliance.kaldi.fbank(ref_waves_16k, num_mel_bins=80, dither=0, sample_frequency=sampling_rate)
     feat2 = feat2 - feat2.mean(dim=0, keepdim=True)
     style2 = campplus_model(feat2.unsqueeze(0))
     print(f"[INFO] | Style2 shape: {style2.shape}")

 device = torch.device("cpu")
 print(f"[DEVICE] | Using device: {device}")
+channel_numbers = 100 # 80 by default
+main_model = "nvidia/bigvgan_24khz_100band" # nvidia/bigvgan_v2_22khz_80band_256x
 # ----------------------------
 # Load Models and Configuration
 # ----------------------------
 # Ensure 'CAMPPlus' is correctly imported and defined
 try:
+    campplus_model = CAMPPlus(feat_dim=channel_numbers, embedding_size=192)
     print("[INFO] | CAMPPlus model instantiated.")
 except NameError:
     print("[ERROR] | CAMPPlus is not defined. Please check the import path and ensure CAMPPlus is correctly defined.")
 print("[INFO] | CAMPPlus model loaded, set to eval mode, and moved to CPU.")
 # Load BigVGAN model
+bigvgan_model = bigvgan.BigVGAN.from_pretrained(main_model, use_cuda_kernel=False)
 bigvgan_model.remove_weight_norm()
 bigvgan_model = bigvgan_model.eval().to(device)
 print("[INFO] | BigVGAN model loaded, weight norm removed, set to eval mode, and moved to CPU.")
     "n_fft": 1024,
     "win_size": 1024,
     "hop_size": 256,
+    "num_mels": channel_numbers,
     "sampling_rate": sr,
     "fmin": 0,
     "fmax": None,
     "n_fft": config_f0['preprocess_params']['spect_params']['n_fft'],
     "win_size": config_f0['preprocess_params']['spect_params']['win_length'],
     "hop_size": config_f0['preprocess_params']['spect_params']['hop_length'],
+    "num_mels": channel_numbers,
     "sampling_rate": sr_f0,
     "fmin": 0,
     "fmax": None,
     # Extract style features
     print("[INFO] | Extracting style features from reference audio.")
+    feat2 = torchaudio.compliance.kaldi.fbank(ref_waves_16k, num_mel_bins=channel_numbers, dither=0, sample_frequency=sampling_rate)
     feat2 = feat2 - feat2.mean(dim=0, keepdim=True)
     style2 = campplus_model(feat2.unsqueeze(0))
     print(f"[INFO] | Style2 shape: {style2.shape}")