Spaces:

siyangyuan
/

mb

Runtime error

File size: 2,881 Bytes

14cff58

import os
import httpx
import torch
import gradio as gr
from tempfile import NamedTemporaryFile
from pathlib import Path

from mockingbirdforuse import MockingBird


mockingbird = MockingBird()
mockingbird_path = Path(os.path.dirname(__file__)) / "data"
base_url = "https://al.smoe.top/d/Home/source/mockingbird/"

for sy in ["encoder.pt", "g_hifigan.pt", "wavernn.pt"]:
    if not os.path.exists(os.path.join(mockingbird_path, sy)):
        torch.hub.download_url_to_file(f"{base_url}/{sy}", mockingbird_path / sy)

for model in ["azusa", "nanmei", "ltyai", "tianyi"]:
    model_path = mockingbird_path / model
    model_path.mkdir(parents=True, exist_ok=True)
    for file_name in ["record.wav", f"{model}.pt"]:
        if not os.path.exists(os.path.join(model_path, file_name)):
            torch.hub.download_url_to_file(
                f"{base_url}/{model}/{file_name}", model_path / file_name
            )

mockingbird.load_model(
    Path(os.path.join(mockingbird_path, "encoder.pt")),
    Path(os.path.join(mockingbird_path, "g_hifigan.pt")),
    Path(os.path.join(mockingbird_path, "wavernn.pt")),
)


def inference(
    text: str,
    model_name: str,
    vocoder_type: str = "HifiGan",
    style_idx: int = 0,
    min_stop_token: int = 9,
    steps: int = 2000,
):
    model_path = mockingbird_path / model_name
    mockingbird.set_synthesizer(Path(os.path.join(model_path, f"{model_name}.pt")))
    fd = NamedTemporaryFile(suffix=".wav", delete=False)
    record = mockingbird.synthesize(
        text=str(text),
        input_wav=model_path / "record.wav",
        vocoder_type=vocoder_type,
        style_idx=style_idx,
        min_stop_token=min_stop_token,
        steps=steps,
    )
    with open(fd.name, "wb") as file:
        file.write(record.getvalue())
    return fd.name


title = "MockingBird"
description = "🚀AI拟声: 5秒内克隆您的声音并生成任意语音内容 Clone a voice in 5 seconds to generate arbitrary speech in real-time"
article = "<a href='https://github.com/babysor/MockingBird'>Github Repo</a></p>"

gr.Interface(
    inference,
    [
        gr.Textbox(label="Input"),
        gr.Radio(
            ["azusa", "nanmei", "ltyai", "tianyi"],
            label="model type",
            value="azusa",
        ),
        gr.Radio(
            ["HifiGan", "WaveRNN"],
            label="Vocoder type",
            value="HifiGan",
        ),
        gr.Slider(minimum=-1, maximum=9, step=1, label="style idx", value=0),
        gr.Slider(minimum=3, maximum=9, label="min stop token", value=9),
        gr.Slider(minimum=200, maximum=2000, label="steps", value=2000),
    ],
    gr.Audio(type="filepath", label="Output"),
    title=title,
    description=description,
    article=article,
    examples=[["阿梓不是你的电子播放器", "azusa", "HifiGan", 0, 9, 2000], ["不是", "nanmei", "HifiGan", 0, 9, 2000]],
).launch()