Kamtera commited on
Commit
a4b0beb
·
verified ·
1 Parent(s): b16949a

Upload train_vits-1735402331.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. train_vits-1735402331.json +106 -0
train_vits-1735402331.json ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from trainer import Trainer, TrainerArgs
4
+
5
+ from TTS.tts.configs.shared_configs import BaseDatasetConfig , CharactersConfig
6
+ from TTS.config.shared_configs import BaseAudioConfig
7
+ from TTS.tts.configs.vits_config import VitsConfig
8
+ from TTS.tts.datasets import load_tts_samples
9
+ from TTS.tts.models.vits import Vits, VitsAudioConfig
10
+ from TTS.tts.utils.text.tokenizer import TTSTokenizer
11
+ from TTS.utils.audio import AudioProcessor
12
+ from TTS.utils.downloaders import download_thorsten_de
13
+
14
+ output_path = os.path.dirname(os.path.abspath(__file__))
15
+ dataset_config = BaseDatasetConfig(
16
+ formatter="mozilla", meta_file_train="metadata.csv", path="/kaggle/input/persian-female-rmz-mojezeh-tts"
17
+ )
18
+
19
+
20
+
21
+ audio_config = BaseAudioConfig(
22
+ sample_rate=22050,
23
+ do_trim_silence=False,
24
+ resample=False,
25
+ mel_fmin=0,
26
+ mel_fmax=None
27
+ )
28
+ character_config=CharactersConfig(
29
+ characters='ءابتثجحخدذرزسشصضطظعغفقلمنهويِپچژکگیآأؤإئًَُّ',
30
+ punctuations='!(),-.:;? ̠،؛؟‌<>',
31
+ phonemes='ˈˌːˑpbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟaegiouwyɪʊ̩æɑɔəɚɛɝɨ̃ʉʌʍ0123456789"#$%*+/=ABCDEFGHIJKLMNOPRSTUVWXYZ[]^_{}',
32
+ pad="<PAD>",
33
+ eos="<EOS>",
34
+ bos="<BOS>",
35
+ blank="<BLNK>",
36
+ characters_class="TTS.tts.utils.text.characters.IPAPhonemes",
37
+ )
38
+ config = VitsConfig(
39
+ audio=audio_config,
40
+ run_name="vits_fa_ramezani",
41
+ batch_size=16,
42
+ eval_batch_size=8,
43
+ batch_group_size=5,
44
+ num_loader_workers=0,
45
+ num_eval_loader_workers=2,
46
+ run_eval=True,
47
+ test_delay_epochs=-1,
48
+ epochs=1000,
49
+ save_step=1000,
50
+ text_cleaner="basic_cleaners",
51
+ use_phonemes=True,
52
+ phoneme_language="fa",
53
+ characters=character_config,
54
+ phoneme_cache_path=os.path.join(output_path, "phoneme_cache"),
55
+ compute_input_seq_cache=True,
56
+ print_step=250,
57
+ print_eval=False,
58
+ mixed_precision=False,
59
+ test_sentences=[
60
+ ["سلطان محمود در زمستانی سخت به طلخک گفت "],
61
+ ["کارل و لرل کارها رو رله کردن "],
62
+ ["مردی نزد بقالی آمد و گفت پیاز هم ده تا دهان بدان خو شبوی سازم."],
63
+ ["سه سیر سرشیر سه شیشه شیر! "],
64
+ ["از مال خود پاره ای گوشت بستان و زیره بایی معطّر بساز"],
65
+ ["لورل روی ریل راه میرفت "],
66
+ ["یکی اسبی به عاریت خواست"]
67
+ ],
68
+ output_path=output_path,
69
+ datasets=[dataset_config],
70
+ )
71
+
72
+ # INITIALIZE THE AUDIO PROCESSOR
73
+ # Audio processor is used for feature extraction and audio I/O.
74
+ # It mainly serves to the dataloader and the training loggers.
75
+ ap = AudioProcessor.init_from_config(config)
76
+
77
+ # INITIALIZE THE TOKENIZER
78
+ # Tokenizer is used to convert text to sequences of token IDs.
79
+ # config is updated with the default characters if not defined in the config.
80
+ tokenizer, config = TTSTokenizer.init_from_config(config)
81
+
82
+ # LOAD DATA SAMPLES
83
+ # Each sample is a list of ```[text, audio_file_path, speaker_name]```
84
+ # You can define your custom sample loader returning the list of samples.
85
+ # Or define your custom formatter and pass it to the `load_tts_samples`.
86
+ # Check `TTS.tts.datasets.load_tts_samples` for more details.
87
+ train_samples, eval_samples = load_tts_samples(
88
+ dataset_config,
89
+ eval_split=True,
90
+ eval_split_max_size=config.eval_split_max_size,
91
+ eval_split_size=config.eval_split_size,
92
+ )
93
+
94
+ # init model
95
+ model = Vits(config, ap, tokenizer, speaker_manager=None)
96
+
97
+ # init the trainer and 🚀
98
+ trainer = Trainer(
99
+ TrainerArgs(),
100
+ config,
101
+ output_path,
102
+ model=model,
103
+ train_samples=train_samples,
104
+ eval_samples=eval_samples,
105
+ )
106
+ trainer.fit()