HoneyTian's picture
first commit
a8c8d73
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import argparse
import os.path
import audiotsm
import audiotsm.io.wav
import audiotsm.io.array
from project_settings import project_path
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--filename",
default=(project_path / "data/voice_clone_audio/e2_tts/audio_0_3_clone_from_audio_0_2.wav").as_posix(),
type=str,
)
parser.add_argument(
"--output_file",
default="temp.wav",
type=str,
)
parser.add_argument("--speed", default=1.1, type=float)
args = parser.parse_args()
return args
def main():
args = get_args()
reader = audiotsm.io.wav.WavReader(args.filename)
writer = audiotsm.io.wav.WavWriter(args.output_file, reader.channels, reader.samplerate)
# 使用WSOLA算法进行时间缩放
wsola = audiotsm.wsola(reader.channels, speed=args.speed)
wsola.run(reader, writer)
# 关闭文件
writer.close()
reader.close()
return
if __name__ == "__main__":
main()