|
{ |
|
"project_name": "vits_kbd", |
|
"push_to_hub": true, |
|
"hub_model_id": "mms_finetune_kbd_murat", |
|
"overwrite_output_dir": false, |
|
"output_dir": "./tmp/vits_kbd_finetuned_che_model", |
|
|
|
"dataset_name": "anzorq/kbd_speech", |
|
"audio_column_name": "audio", |
|
"text_column_name": "transcription", |
|
"train_split_name": "train", |
|
"eval_split_name": "train", |
|
"speaker_id_column_name": "speaker_id", |
|
"override_speaker_embeddings": true, |
|
"filter_on_speaker_id": 9769125, |
|
|
|
"full_generation_sample_text": "укъэмыващэрэт тӏэ уэ щӏалэ цӏыкӏур! - жиӏащ лӏыжьым", |
|
|
|
"max_duration_in_seconds": 20, |
|
"min_duration_in_seconds": 1.0, |
|
"max_tokens_length": 500, |
|
|
|
"do_lower_case": true, |
|
|
|
"model_name_or_path": "anzorq/mms-tts-kbd-discriminator", |
|
|
|
"preprocessing_num_workers": 4, |
|
|
|
"do_train": true, |
|
"max_steps": 5100, |
|
"gradient_accumulation_steps": 1, |
|
"gradient_checkpointing": false, |
|
"per_device_train_batch_size": 16, |
|
"learning_rate": 1e-4, |
|
"adam_beta1": 0.8, |
|
"adam_beta2": 0.99, |
|
"warmup_ratio": 0.01, |
|
"group_by_length": false, |
|
|
|
"do_eval": true, |
|
"eval_steps": 100, |
|
"per_device_eval_batch_size": 16, |
|
"max_eval_samples": 100, |
|
"do_step_schedule_per_epoch": true, |
|
|
|
"weight_disc": 3, |
|
"weight_fmaps": 1, |
|
"weight_gen": 1, |
|
"weight_kl": 1.5, |
|
"weight_duration": 1, |
|
"weight_mel": 35, |
|
|
|
"fp16": true, |
|
"seed": 456 |
|
} |