--- language: - ms --- # Malay VITS Multispeaker clean V2 **This model intended to use by [malaya-speech](https://github.com/mesolitica/malaya-speech) only, it is possible to not use the library but make sure the character vocabulary is correct**. ## how to ```python from huggingface_hub import snapshot_download from malaya_speech.torch_model.vits.model_infer import SynthesizerTrn from malaya_speech.torch_model.vits.commons import intersperse from malaya_speech.utils.text import TTS_SYMBOLS from malaya_speech.tts import load_text_ids import torch import os import json try: from malaya_boilerplate.hparams import HParams except BaseException: from malaya_boilerplate.train.config import HParams folder = snapshot_download(repo_id="mesolitica/VITS-multispeaker-clean-v2") with open(os.path.join(folder, 'config.json')) as fopen: hps = HParams(**json.load(fopen)) model = SynthesizerTrn( len(TTS_SYMBOLS), hps.data.filter_length // 2 + 1, hps.train.segment_size // hps.data.hop_length, n_speakers=hps.data.n_speakers, **hps.model, ).eval() model.load_state_dict(torch.load(os.path.join(folder, 'model.pth'), map_location='cpu')) speaker_id = { 'Ariff': 0, 'Ayu': 1, 'Bunga': 2, 'Danial': 3, 'Elina': 4, 'Kamarul': 5, 'Osman': 6, 'Yasmin': 7 } normalizer = load_text_ids(pad_to = None, understand_punct = True, is_lower = False) t, ids = normalizer.normalize('saya nak makan nasi ayam yang sedap, lagi lazat, dan hidup sangatlah susah kan.', add_fullstop = False) if hps.data.add_blank: ids = intersperse(ids, 0) ids = torch.LongTensor(ids) ids_lengths = torch.LongTensor([ids.size(0)]) ids = ids.unsqueeze(0) sid = 0 sid = torch.tensor([sid]) with torch.no_grad(): audio = model.infer( ids, ids_lengths, noise_scale=0.0, noise_scale_w=0.0, length_scale=1.0, sid=sid, ) y_ = audio[0].numpy() ```