Spaces:
Running
Running
File size: 2,903 Bytes
e95bc25 15e7b85 e95bc25 2c32692 e95bc25 4e602d2 845cab6 e95bc25 884068b e707d6a 884068b e95bc25 96fabd6 15e7b85 4e602d2 e707d6a e95bc25 15e7b85 e95bc25 15e7b85 e95bc25 15e7b85 4e602d2 15e7b85 4e602d2 f194fff 4e602d2 f194fff 96fabd6 f194fff e95bc25 15e7b85 4e602d2 15e7b85 e95bc25 15e7b85 e95bc25 15e7b85 e95bc25 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import gradio as gr
import tempfile
from TTS.api import TTS
from huggingface_hub import hf_hub_download
import torch
CUDA = torch.cuda.is_available()
REPO_ID = "ayymen/Coqui-TTS-Vits-shi"
VOICE_CONVERSION_MODELS = {
'freevc24': 'voice_conversion_models/multilingual/vctk/freevc24'
'openvoice_v1': 'voice_conversion_models/multilingual/multi-dataset/openvoice_v1',
'openvoice_v2': 'voice_conversion_models/multilingual/multi-dataset/openvoice_v2',
}
my_title = "ⴰⴹⵕⵉⵚ ⵙ ⵉⵎⵙⵍⵉ - Tamazight Text-to-Speech"
my_description = "This model is based on [VITS](https://github.com/jaywalnut310/vits), thanks to 🐸 [Coqui.ai](https://coqui.ai/)."
my_examples = [
["ⴰⵣⵓⵍ. ⵎⴰⵏⵣⴰⴽⵉⵏ?"],
["ⵡⴰ ⵜⴰⵎⵖⴰⵔⵜ ⵎⴰ ⴷ ⵓⴽⴰⵏ ⵜⵙⴽⵔⵜ?"],
["ⴳⵏ ⴰⴷ ⴰⴽ ⵉⵙⵙⴳⵏ ⵕⴱⴱⵉ ⵉⵜⵜⵓ ⴽ."],
["ⴰⵔⵔⴰⵡ ⵏ ⵍⵀⵎⵎ ⵢⵓⴽⵔ ⴰⵖ ⵉⵀⴷⵓⵎⵏ ⵏⵏⵖ!"]
]
my_inputs = [
gr.Textbox(lines=5, label="Input Text", placeholder="The only available characters are: ⴰⴱⴳⴷⴹⴻⴼⴽⵀⵃⵄⵅⵇⵉⵊⵍⵎⵏⵓⵔⵕⵖⵙⵚⵛⵜⵟⵡⵢⵣⵥⵯ !,.:?"),
gr.Audio(type="filepath", label="Speaker audio for voice cloning (optional)"),
gr.Dropdown(label="Voice Conversion Model", choices=list(VOICE_CONVERSION_MODELS.keys())),
gr.Checkbox(label="Split Sentences (each sentence will be generated separately)", value=True)
]
my_outputs = gr.Audio(type="filepath", label="Output Audio", autoplay=True)
best_model_path = hf_hub_download(repo_id=REPO_ID, filename="best_model.pth")
config_path = hf_hub_download(repo_id=REPO_ID, filename="config.json")
api = TTS(model_path=best_model_path, config_path=config_path).to("cuda" if CUDA else "cpu")
# pre-download voice conversion models
for model in VOICE_CONVERSION_MODELS.values():
api.load_vc_model_by_name(model, gpu=CUDA)
def tts(text: str, speaker_wav: str = None, voice_cv_model: str = 'freevc24', split_sentences: bool = True):
# replace oov characters
text = text.replace("\n", ". ")
text = text.replace("(", ",")
text = text.replace(")", ",")
text = text.replace('"', ",")
text = text.replace(";", ",")
text = text.replace("-", " ")
with tempfile.NamedTemporaryFile(suffix = ".wav", delete = False) as fp:
if speaker_wav:
api.load_vc_model_by_name(VOICE_CONVERSION_MODELS[voice_cv_model], gpu=CUDA)
api.tts_with_vc_to_file(text, speaker_wav=speaker_wav, file_path=fp.name, split_sentences=split_sentences)
else:
api.tts_to_file(text, file_path=fp.name, split_sentences=split_sentences)
return fp.name
iface = gr.Interface(
fn=tts,
inputs=my_inputs,
outputs=my_outputs,
title=my_title,
description=my_description,
examples=my_examples,
cache_examples=True
)
iface.launch()
|