Spaces:
Runtime error
Runtime error
File size: 3,921 Bytes
f654d12 12bfd03 f654d12 12bfd03 73c1b13 12bfd03 4905c07 12bfd03 73c1b13 12bfd03 73c1b13 12bfd03 73c1b13 12bfd03 4905c07 12bfd03 4905c07 12bfd03 4905c07 12bfd03 4905c07 12bfd03 4905c07 12bfd03 4905c07 12bfd03 4905c07 12bfd03 4905c07 12bfd03 4905c07 12bfd03 4905c07 12bfd03 4905c07 12bfd03 4905c07 12bfd03 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import spaces
import random
import gradio as gr
from css.utils import *
# 定制语音生成
def custom():
def random_seed():
return random.randint(1, 100000000)
@spaces.GPU
def generate_audio(_recorded_audio, _prompt_input_textbox, _language_radio,
_synthetic_input_textbox, _seed):
import time
t1 = time.time()
print(_recorded_audio, _prompt_input_textbox, _language_radio, _synthetic_input_textbox, _seed)
if _synthetic_input_textbox == '':
# gr.Warning('合成文本为空,您是否忘记输入合成文本?')
gr.Warning('The synthesis text is empty, did you forget to input the synthesis text?')
return (target_sr, default_data)
set_all_random_seed(_seed)
if use_instruct(_synthetic_input_textbox):
model = cosyvoice_instruct
else:
model = cosyvoice
prompt_speech_16k = postprocess(load_wav(_recorded_audio, prompt_sr))
t2 = time.time()
if _language_radio == 'cross' or _prompt_input_textbox == '':
output = model.inference_cross_lingual(_synthetic_input_textbox, prompt_speech_16k)
else:
output = model.inference_zero_shot(_synthetic_input_textbox, _prompt_input_textbox, prompt_speech_16k)
t3 = time.time()
audio_data = postprocess(output['tts_speech']).numpy().flatten()
t4 = time.time()
print(f'load and preprocess time: {t2-t1}s')
print(f'inference time: {t3-t2}s')
print(f'postprocess time: {t4-t3}s')
return (target_sr, audio_data)
with gr.Column():
with gr.Row():
with gr.Column(scale=1, min_width=400):
with gr.Group():
recorded_audio = gr.Audio(sources=['microphone'],
label="Record Audio File",
type='filepath')
gr.Text("Please click to record and read the text on the right (Chinese or English) to complete the input",
max_lines=1,
container=False,
interactive=False)
with gr.Column(scale=10):
prompt_input_textbox = gr.Textbox(label="Input Text for Recording")
gr.Examples(
label="Example Recording Texts",
examples=example_prompt_text,
inputs=[prompt_input_textbox])
with gr.Column():
language_radio = gr.Radio(choices=[('Same Language', 'same'), ('Cross Language', 'cross')],
value='same',
label="Input Synthesis Text")
synthetic_input_textbox = gr.Textbox(show_label=False)
gr.Examples(
label="Example Texts",
examples=example_tts_text,
inputs=[synthetic_input_textbox])
with gr.Accordion(label="Random Seed"):
with gr.Row():
with gr.Column(scale=1, min_width=180):
seed_button = gr.Button(value="\U0001F3B2 Shuffle Randomly",
elem_classes="full-height")
with gr.Column(scale=10):
seed = gr.Number(show_label=False,
value=0,
container=False,
elem_classes="full-height")
with gr.Column():
generate_button = gr.Button("Generate Audio", variant="primary", size="lg")
with gr.Column():
output_audio = gr.Audio(label="Synthesized Audio")
seed_button.click(fn=random_seed, outputs=[seed])
generate_button.click(
fn=generate_audio,
inputs=[recorded_audio, prompt_input_textbox, language_radio, synthetic_input_textbox, seed],
outputs=[output_audio])
|