Spaces:
Paused
Paused
File size: 9,765 Bytes
9653cf4 e415129 9653cf4 122fe58 ed0009e 9653cf4 7e13479 9653cf4 7e13479 9653cf4 7e13479 9653cf4 6a0a2ac 3a50740 6a0a2ac 9653cf4 6a0a2ac 3a50740 6a0a2ac 3a50740 ed0009e 6cac856 ed0009e 6cac856 7e13479 9653cf4 c6aa473 7e13479 9653cf4 f1f53e4 9653cf4 6cac856 06cc3bf 3572db4 06cc3bf 6cac856 06cc3bf 7e13479 a6c41ad 9653cf4 72c126a 42e35f7 3572db4 9653cf4 6cac856 7e13479 9653cf4 7e13479 37e915a 06cc3bf 7e13479 53ce68f f4b92d1 06cc3bf 7e13479 a26f907 7e13479 42e35f7 ed0009e 9653cf4 5c8f4fe 06cc3bf 5c8f4fe 06cc3bf 5c8f4fe 9653cf4 5c8f4fe 53ce68f 06cc3bf 5c8f4fe 42e35f7 7e13479 9653cf4 06cc3bf ed0009e 6cac856 9653cf4 36e8f2b 9653cf4 6cac856 49fa485 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 |
import os
import tempfile
from openai import OpenAI
from tts_voice import tts_order_voice
import edge_tts
import numpy as np
import anyio
import torch
import torchaudio
import gradio as gr
from scipy.io import wavfile
from scipy.io.wavfile import write
#新加内容
import asyncio
import threading
import requests
from aiohttp import ClientSession
# # 异步函数进行预加载
# async def fetch_link_content(url):
# async with ClientSession() as session:
# async with session.get(url) as response:
# return await response.text()
# # 后台任务确保不阻塞主线程
# def fetch_link_in_background(url):
# loop = asyncio.new_event_loop()
# asyncio.set_event_loop(loop)
# content = loop.run_until_complete(fetch_link_content(url))
# # 将 content 缓存起来或者在全局状态中保存以供后续使用
# print("预加载的内容:", content)
# link_url = "https://huggingface.co/api/spaces/by-subdomain/zxsipola123456-tts"
# background_thread = threading.Thread(target=fetch_link_in_background, args=(link_url,))
# background_thread.start()
# 创建 KNN-VC 模型
knn_vc = torch.hub.load('bshall/knn-vc', 'knn_vc', prematched=True, trust_repo=True, pretrained=True, device='cpu')
# 初始化 language_dict
language_dict = tts_order_voice
# 异步文字转语音函数
async def text_to_speech_edge(text, language_code):
voice = language_dict[language_code]
communicate = edge_tts.Communicate(text, voice)
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
tmp_path = tmp_file.name
await communicate.save(tmp_path)
return "语音合成完成:{}".format(text), tmp_path
def voice_change(audio_in, audio_ref):
samplerate1, data1 = wavfile.read(audio_in)
samplerate2, data2 = wavfile.read(audio_ref)
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_audio_in, \
tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_audio_ref:
audio_in_path = tmp_audio_in.name
audio_ref_path = tmp_audio_ref.name
write(audio_in_path, samplerate1, data1)
write(audio_ref_path, samplerate2, data2)
query_seq = knn_vc.get_features(audio_in_path)
matching_set = knn_vc.get_matching_set([audio_ref_path])
out_wav = knn_vc.match(query_seq, matching_set, topk=4)
output_path = 'output.wav'
torchaudio.save(output_path, out_wav[None], 16000)
return output_path
# #验证中转api key是否有效
# def validate_api_key(api_proxy_key):
# try:
# client = OpenAI(api_key=api_proxy_key, base_url='https://lmzh.top/v1')
# # 测试调用一个简单的API来验证Key
# response = client.models.list()
# return True
# except Exception:
# return False
# # 更新Edge TTS标签页状态的函数
# def update_edge_tts_tab(api_proxy_key):
# is_valid = validate_api_key(api_proxy_key)
# return gr.update(interactive=is_valid)
# 文字转语音(OpenAI)
def tts(text, model, voice, api_key):
if len(text) > 300:
raise gr.Error('您输入的文本字符多于300个,请缩短您的文本')
if api_key == '':
raise gr.Error('请填写您的 中转API Key')
try:
client = OpenAI(api_key=api_key, base_url='https://lmzh.top/v1')
response = client.audio.speech.create(
model=model,
voice=voice,
input=text,
)
except Exception as error:
raise gr.Error(f"生成语音时出错:{error}")
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file:
temp_file.write(response.content)
temp_file_path = temp_file.name
return temp_file_path
def tts1(text, model, voice, api_key):
if len(text)>300:
raise gr.Error('您输入的文本字符多于300个,请缩短您的文本')
if api_key == '':
raise gr.Error('Please enter your OpenAI API Key')
else:
try:
client = OpenAI(api_key=api_key)
response = client.audio.speech.create(
model=model, # "tts-1","tts-1-hd"
voice=voice, # 'alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'
input=text,
)
except Exception as error:
# Handle any exception that occurs
raise gr.Error("An error occurred while generating speech. Please check your API key and try again.")
print(str(error))
# Create a temp file to save the audio
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file:
temp_file.write(response.content)
# Get the file path of the temp file
temp_file_path = temp_file.name
return temp_file_path
# Gradio 前端设计
app = gr.Blocks(title="TTS文本生成语音 + AI秒变声")
with app:
gr.Markdown("# <center>TTS文本生成语音 + AI秒变声</center>")
gr.Markdown("### <center>key获取地址[here](https://buy.sipola.cn),ai文案生成(可使用中转key和官方key)请访问 [here](https://ai.sipola.cn)</center>")
with gr.Tab("中转key-TTS文本生语音"):
with gr.Row(variant='panel'):
api_proxy_key = gr.Textbox(type='password', label='API Key', placeholder='请在此填写您在https://buy.sipola.cn获取的中转API Key')
model = gr.Dropdown(choices=['tts-1','tts-1-hd'], label='请选择模型(tts-1推理更快,tts-1-hd音质更好)', value='tts-1')
voice = gr.Dropdown(choices=['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'], label='请选择一个说话人', value='alloy')
with gr.Row():
with gr.Column():
inp_text = gr.Textbox(label="请填写您想生成的文本中英文皆可", placeholder="请输入ai生成的文案,不要超过300字,最好200字左右", lines=5)
btn_text = gr.Button("一键生成音频", variant="primary")
with gr.Column():
inp1 = gr.Audio(type="filepath", label="TTS真实拟声", interactive=False)
inp2 = gr.Audio(type="filepath", label="请上传同文案参照音频,可自己读取同文案录音")
btn1 = gr.Button("一键AI变声合成", variant="primary")
with gr.Column():
out1 = gr.Audio(type="filepath", label="AI变声后的专属音频")
btn_text.click(tts, [inp_text, model, voice, api_proxy_key], inp1)
btn1.click(voice_change, [inp1, inp2], out1)
with gr.Tab("官方key-TTS文本生语音"):
with gr.Row(variant='panel'):
api_key = gr.Textbox(type='password', label='API Key', placeholder='请在此填写您在https://buy.sipola.cn 获取的官方API Key')
model = gr.Dropdown(choices=['tts-1','tts-1-hd'], label='请选择模型(tts-1推理更快,tts-1-hd音质更好)', value='tts-1')
voice = gr.Dropdown(choices=['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'], label='请选择一个说话人', value='alloy')
with gr.Row():
with gr.Column():
inp_text = gr.Textbox(label="请填写您想生成的文本中英文皆可", placeholder="请输入ai生成的文案,不要超过300字,最好200字左右", lines=5)
btn_text = gr.Button("一键生成音频", variant="primary")
with gr.Column():
inp1 = gr.Audio(type="filepath", label="TTS真实拟声", interactive=False)
inp2 = gr.Audio(type="filepath", label="请上传同文案参照音频,可自己读取同文案录音")
btn1 = gr.Button("一键AI变声合成", variant="primary")
with gr.Column():
out1 = gr.Audio(type="filepath", label="AI变声后的专属音频")
btn_text.click(tts1, [inp_text, model, voice, api_key], inp1)
btn1.click(voice_change, [inp1, inp2], out1)
with gr.Tab("TTS-AI变声") as edge_tts_tab:
with gr.Row():
with gr.Column():
input_text = gr.Textbox(label="请填写您想生成的文本中英文皆可",placeholder="请输入ai生成的文案,不要超过300字,最好200字左右",lines=5)
btn_edge = gr.Button("一键生成音频", variant="primary")
with gr.Column():
default_language = list(language_dict.keys())[15]
language = gr.Dropdown(choices=list(language_dict.keys()), value=default_language, label="请选择文本对应的语言")
output_audio = gr.Audio(type="filepath", label="TTS真实拟声", interactive=False, show_download_button=False)
output_text = gr.Textbox(label="输出文本", visible=False)
with gr.Row():
with gr.Column():
inp_vc = gr.Audio(type="filepath", label="请上传和文案相同参照音频,可自己读取文案录音或者用TTS文本生语音同文案生成的音频")
btn_vc = gr.Button("一键AI变声合成", variant="primary")
with gr.Column():
out_vc = gr.Audio(type="filepath", label="AI变声后的专属音频")
btn_edge.click(lambda text, lang: anyio.run(text_to_speech_edge, text, lang), [input_text, language], [output_text, output_audio])
btn_vc.click(voice_change, [output_audio, inp_vc], out_vc)
# 监听API Key输入框的变化并更新Edge TTS标签页的状态
# api_proxy_key.change(
# update_edge_tts_tab,
# inputs=[api_proxy_key],
# outputs=[edge_tts_tab]
# )
gr.HTML('''
<div class="footer">
<center><p>Power by sipola </p></center>
</div>
''')
app.launch(show_error=True)
|