File size: 1,060 Bytes
a8c8d73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import argparse
import os.path

import audiotsm
import audiotsm.io.wav
import audiotsm.io.array

from project_settings import project_path


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--filename",
        default=(project_path / "data/voice_clone_audio/e2_tts/audio_0_3_clone_from_audio_0_2.wav").as_posix(),
        type=str,
    )
    parser.add_argument(
        "--output_file",
        default="temp.wav",
        type=str,
    )
    parser.add_argument("--speed", default=1.1, type=float)
    args = parser.parse_args()
    return args


def main():
    args = get_args()

    reader = audiotsm.io.wav.WavReader(args.filename)

    writer = audiotsm.io.wav.WavWriter(args.output_file, reader.channels, reader.samplerate)

    # 使用WSOLA算法进行时间缩放
    wsola = audiotsm.wsola(reader.channels, speed=args.speed)
    wsola.run(reader, writer)

    # 关闭文件
    writer.close()
    reader.close()
    return


if __name__ == "__main__":
    main()