File size: 2,681 Bytes
17263d1
 
 
 
 
 
 
 
 
 
ba051ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17263d1
 
 
 
ba051ef
17263d1
 
 
 
ba051ef
17263d1
 
 
 
ba051ef
17263d1
 
 
 
ba051ef
17263d1
 
 
 
ba051ef
17263d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import argparse
import shutil

from gradio_client import Client, handle_file
import numpy as np
from scipy.io import wavfile


# language1 = "英语"
# language2 = "English"
# language1 = "西班牙语"
# language2 = "Spanish"
language1 = "日语"
language2 = "Japanese"
# language1 = "葡萄牙语"
# language2 = "Portuguese"
# language1 = "韩语"
# language2 = "Korean"
# language1 = "阿拉伯语"
# language2 = "Arabic"
# language1 = "中国台湾"
# language2 = "Chinese"


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--filename1",
        default=rf"E:\牛信文档\语音克隆\多语种语音克隆\money_num\{language1}\{language2}_1.wav",
        type=str,
    )
    parser.add_argument(
        "--filename2",
        default=rf"E:\牛信文档\语音克隆\多语种语音克隆\money_num\{language1}\xtts_v2_{language2.lower()}_2.wav",
        type=str,
    )
    parser.add_argument(
        "--filename3",
        default=rf"E:\牛信文档\语音克隆\多语种语音克隆\money_num\{language1}\{language2}_3.wav",
        type=str,
    )
    parser.add_argument(
        "--output_adapt_file",
        default=rf"E:\牛信文档\语音克隆\多语种语音克隆\money_num\{language1}\xtts_v2_{language2.lower()}_2_volume_adapt.wav",
        type=str,
    )
    parser.add_argument(
        "--output_concat_file",
        default=rf"E:\牛信文档\语音克隆\多语种语音克隆\money_num\{language1}\xtts_v2_{language2.lower()}_2_concat.wav",
        type=str,
    )
    args = parser.parse_args()
    return args


def main():
    args = get_args()

    # client = Client("http://10.75.27.247:7861/")
    client = Client("http://127.0.0.1:7861/")

    new_filename2, _ = client.predict(
        audio_t=handle_file(args.filename2),
        radio=1,
        decibel=0,
        reference=handle_file(args.filename3),
        engine="by_pydub_by_reference",
        api_name="/when_click_change_volume"
    )

    new_filename2, _, _, _ = client.predict(
        audio_t=handle_file(new_filename2),
        to_sample_rate=8000,
        sample_width=2,
        channels="0",
        engine="librosa",
        api_name="/when_click_audio_convert"
    )

    _, signal1 = wavfile.read(args.filename1)
    _, signal2 = wavfile.read(new_filename2)
    _, signal3 = wavfile.read(args.filename3)

    signal = np.concat([signal1, signal2, signal3], axis=0)

    shutil.move(
        new_filename2,
        args.output_adapt_file
    )
    wavfile.write(
        args.output_concat_file,
        8000,
        signal,
    )
    return


if __name__ == '__main__':
    main()