Spaces:

Staticaliza
/

Voice

Running on Zero

Staticaliza commited on Dec 14, 2024

Commit

643937e

verified ·

1 Parent(s): f9cb653

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -90,7 +90,7 @@ campplus_model.to(device)
 print("[INFO] | CAMPPlus model loaded, set to eval mode, and moved to CPU.")
 # Load BigVGAN model
-bigvgan_model = bigvgan.BigVGAN.from_pretrained('nvidia/bigvgan_base_22khz_80band', use_cuda_kernel=False)
 bigvgan_model.remove_weight_norm()
 bigvgan_model = bigvgan_model.eval().to(device)
 print("[INFO] | BigVGAN model loaded, weight norm removed, set to eval mode, and moved to CPU.")
@@ -107,7 +107,7 @@ codec_encoder = {k: v.eval().to(device) for k, v in codec_encoder.items()}
 print("[INFO] | FAcodec model loaded, set to eval mode, and moved to CPU.")
 # Load Whisper model with float32 and compatible size
-whisper_name = model_params.speech_tokenizer.whisper_name if hasattr(model_params.speech_tokenizer, 'whisper_name') else "openai/whisper-small"
 whisper_model = WhisperModel.from_pretrained(whisper_name, torch_dtype=torch.float32).to(device)
 del whisper_model.decoder  # Remove decoder as it's not used
 whisper_feature_extractor = AutoFeatureExtractor.from_pretrained(whisper_name)

 print("[INFO] | CAMPPlus model loaded, set to eval mode, and moved to CPU.")
 # Load BigVGAN model
+bigvgan_model = bigvgan.BigVGAN.from_pretrained('nvidia/bigvgan_v2_22khz_80band_256x', use_cuda_kernel=False)
 bigvgan_model.remove_weight_norm()
 bigvgan_model = bigvgan_model.eval().to(device)
 print("[INFO] | BigVGAN model loaded, weight norm removed, set to eval mode, and moved to CPU.")
 print("[INFO] | FAcodec model loaded, set to eval mode, and moved to CPU.")
 # Load Whisper model with float32 and compatible size
+whisper_name = model_params.speech_tokenizer.whisper_name if hasattr(model_params.speech_tokenizer, 'whisper_name') else "biodatlab/distill-whisper-th-small"
 whisper_model = WhisperModel.from_pretrained(whisper_name, torch_dtype=torch.float32).to(device)
 del whisper_model.decoder  # Remove decoder as it's not used
 whisper_feature_extractor = AutoFeatureExtractor.from_pretrained(whisper_name)