Spaces:

siyangyuan
/

mb

Runtime error

App Files Files Community

mb / app.py

siyangyuan

Upload 45 files

14cff58 over 2 years ago

raw

history blame contribute delete

2.88 kB

	import os
	import httpx
	import torch
	import gradio as gr
	from tempfile import NamedTemporaryFile
	from pathlib import Path

	from mockingbirdforuse import MockingBird


	mockingbird = MockingBird()
	mockingbird_path = Path(os.path.dirname(__file__)) / "data"
	base_url = "https://al.smoe.top/d/Home/source/mockingbird/"

	for sy in ["encoder.pt", "g_hifigan.pt", "wavernn.pt"]:
	if not os.path.exists(os.path.join(mockingbird_path, sy)):
	torch.hub.download_url_to_file(f"{base_url}/{sy}", mockingbird_path / sy)

	for model in ["azusa", "nanmei", "ltyai", "tianyi"]:
	model_path = mockingbird_path / model
	model_path.mkdir(parents=True, exist_ok=True)
	for file_name in ["record.wav", f"{model}.pt"]:
	if not os.path.exists(os.path.join(model_path, file_name)):
	torch.hub.download_url_to_file(
	f"{base_url}/{model}/{file_name}", model_path / file_name
	)

	mockingbird.load_model(
	Path(os.path.join(mockingbird_path, "encoder.pt")),
	Path(os.path.join(mockingbird_path, "g_hifigan.pt")),
	Path(os.path.join(mockingbird_path, "wavernn.pt")),
	)


	def inference(
	text: str,
	model_name: str,
	vocoder_type: str = "HifiGan",
	style_idx: int = 0,
	min_stop_token: int = 9,
	steps: int = 2000,
	):
	model_path = mockingbird_path / model_name
	mockingbird.set_synthesizer(Path(os.path.join(model_path, f"{model_name}.pt")))
	fd = NamedTemporaryFile(suffix=".wav", delete=False)
	record = mockingbird.synthesize(
	text=str(text),
	input_wav=model_path / "record.wav",
	vocoder_type=vocoder_type,
	style_idx=style_idx,
	min_stop_token=min_stop_token,
	steps=steps,
	)
	with open(fd.name, "wb") as file:
	file.write(record.getvalue())
	return fd.name


	title = "MockingBird"
	description = "🚀AI拟声: 5秒内克隆您的声音并生成任意语音内容 Clone a voice in 5 seconds to generate arbitrary speech in real-time"
	article = "<a href='https://github.com/babysor/MockingBird'>Github Repo</a></p>"

	gr.Interface(
	inference,
	[
	gr.Textbox(label="Input"),
	gr.Radio(
	["azusa", "nanmei", "ltyai", "tianyi"],
	label="model type",
	value="azusa",
	),
	gr.Radio(
	["HifiGan", "WaveRNN"],
	label="Vocoder type",
	value="HifiGan",
	),
	gr.Slider(minimum=-1, maximum=9, step=1, label="style idx", value=0),
	gr.Slider(minimum=3, maximum=9, label="min stop token", value=9),
	gr.Slider(minimum=200, maximum=2000, label="steps", value=2000),
	],
	gr.Audio(type="filepath", label="Output"),
	title=title,
	description=description,
	article=article,
	examples=[["阿梓不是你的电子播放器", "azusa", "HifiGan", 0, 9, 2000], ["不是", "nanmei", "HifiGan", 0, 9, 2000]],
	).launch()