File size: 1,504 Bytes
02e90e4
01e655b
d2b7e94
01e655b
1df74c6
d2b7e94
d5b3cd8
01e655b
 
 
 
 
 
 
02e90e4
01e655b
 
 
 
 
 
 
1df74c6
01e655b
1df74c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
01e655b
1df74c6
 
 
 
 
 
01e655b
1df74c6
d5b3cd8
1df74c6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
from typing import Union

from modules.SentenceSplitter import SentenceSplitter
from modules.speaker import Speaker
from modules.ssml_parser.SSMLParser import SSMLSegment
from modules.SynthesizeSegments import SynthesizeSegments, combine_audio_segments
from modules.utils import audio


def synthesize_audio(
    text: str,
    temperature: float = 0.3,
    top_P: float = 0.7,
    top_K: float = 20,
    spk: Union[int, Speaker] = -1,
    infer_seed: int = -1,
    use_decoder: bool = True,
    prompt1: str = "",
    prompt2: str = "",
    prefix: str = "",
    batch_size: int = 1,
    spliter_threshold: int = 100,
    end_of_sentence="",
):
    spliter = SentenceSplitter(spliter_threshold)
    sentences = spliter.parse(text)

    text_segments = [
        SSMLSegment(
            text=s,
            params={
                "temperature": temperature,
                "top_P": top_P,
                "top_K": top_K,
                "spk": spk,
                "infer_seed": infer_seed,
                "use_decoder": use_decoder,
                "prompt1": prompt1,
                "prompt2": prompt2,
                "prefix": prefix,
            },
        )
        for s in sentences
    ]
    synthesizer = SynthesizeSegments(
        batch_size=batch_size, eos=end_of_sentence, spliter_thr=spliter_threshold
    )
    audio_segments = synthesizer.synthesize_segments(text_segments)

    combined_audio = combine_audio_segments(audio_segments)

    return audio.pydub_to_np(combined_audio)