AlphaDragon coraKong commited on
Commit
5539f35
0 Parent(s):

Duplicate from coraKong/voice-cloning-demo

Browse files

Co-authored-by: Cora Kong <[email protected]>

Files changed (4) hide show
  1. .gitattributes +34 -0
  2. README.md +13 -0
  3. app.py +92 -0
  4. requirements.txt +1 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Voice Cloning Demo
3
+ emoji: 💩
4
+ colorFrom: yellow
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 3.21.0
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: coraKong/voice-cloning-demo
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from TTS.api import TTS
3
+
4
+ # Init TTS
5
+ tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=False)
6
+ zh_tts = TTS(model_name="tts_models/zh-CN/baker/tacotron2-DDC-GST", progress_bar=False, gpu=False)
7
+ de_tts = TTS(model_name="tts_models/de/thorsten/vits", gpu=False)
8
+ es_tts = TTS(model_name="tts_models/es/mai/tacotron2-DDC", progress_bar=False, gpu=False)
9
+
10
+ def text_to_speech(text: str, speaker_wav, speaker_wav_file, language: str):
11
+ if speaker_wav_file and not speaker_wav:
12
+ speaker_wav = speaker_wav_file
13
+ file_path = "output.wav"
14
+ if language == "zh-CN":
15
+ # if speaker_wav is not None:
16
+ # zh_tts.tts_to_file(text, speaker_wav=speaker_wav, file_path=file_path)
17
+ # else:
18
+ zh_tts.tts_to_file(text, file_path=file_path)
19
+ elif language == "de":
20
+ # if speaker_wav is not None:
21
+ # de_tts.tts_to_file(text, speaker_wav=speaker_wav, file_path=file_path)
22
+ # else:
23
+ de_tts.tts_to_file(text, file_path=file_path)
24
+ elif language == "es":
25
+ # if speaker_wav is not None:
26
+ # es_tts.tts_to_file(text, speaker_wav=speaker_wav, file_path=file_path)
27
+ # else:
28
+ es_tts.tts_to_file(text, file_path=file_path)
29
+ else:
30
+ if speaker_wav is not None:
31
+ tts.tts_to_file(text, speaker_wav=speaker_wav, language=language, file_path=file_path)
32
+ else:
33
+ tts.tts_to_file(text, speaker=tts.speakers[0], language=language, file_path=file_path)
34
+ return file_path
35
+
36
+
37
+
38
+ # inputs = [gr.Textbox(label="Input the text", value="", max_lines=3),
39
+ # gr.Audio(label="Voice to clone", source="microphone", type="filepath"),
40
+ # gr.Audio(label="Voice to clone", type="filepath"),
41
+ # gr.Radio(label="Language", choices=["en", "zh-CN", "fr-fr", "pt-br", "de", "es"], value="en"),
42
+ # gr.Text(intro_text, font_size=14)]
43
+ # outputs = gr.Audio(label="Output")
44
+
45
+ # demo = gr.Interface(fn=text_to_speech, inputs=inputs, outputs=outputs)
46
+
47
+ # demo.launch()
48
+
49
+
50
+ title = "Voice-Cloning-Demo"
51
+
52
+ def toggle(choice):
53
+ if choice == "mic":
54
+ return gr.update(visible=True, value=None), gr.update(visible=False, value=None)
55
+ else:
56
+ return gr.update(visible=False, value=None), gr.update(visible=True, value=None)
57
+
58
+ def handle_language_change(choice):
59
+ if choice == "zh-CN" or choice == "de" or choice == "es":
60
+ return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
61
+ else:
62
+ return gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
63
+
64
+ warming_text = """Please note that Chinese, German, and Spanish are currently not supported for voice cloning."""
65
+
66
+ with gr.Blocks() as demo:
67
+ with gr.Row():
68
+ with gr.Column():
69
+ text_input = gr.Textbox(label="Input the text", value="", max_lines=3)
70
+ lan_input = gr.Radio(label="Language", choices=["en", "fr-fr", "pt-br", "zh-CN", "de", "es"], value="en")
71
+ gr.Markdown(warming_text)
72
+ radio = gr.Radio(["mic", "file"], value="mic",
73
+ label="How would you like to upload your audio?")
74
+ audio_input_mic = gr.Audio(label="Voice to clone", source="microphone", type="filepath", visible=True)
75
+ audio_input_file = gr.Audio(label="Voice to clone", type="filepath", visible=False)
76
+
77
+ with gr.Row():
78
+ with gr.Column():
79
+ btn_clear = gr.Button("Clear")
80
+ with gr.Column():
81
+ btn = gr.Button("Submit", variant="primary")
82
+ with gr.Column():
83
+ audio_output = gr.Audio(label="Output")
84
+
85
+ # gr.Examples(examples, fn=inference, inputs=[audio_file, text_input],
86
+ # outputs=audio_output, cache_examples=True)
87
+ btn.click(text_to_speech, inputs=[text_input, audio_input_mic,
88
+ audio_input_file, lan_input], outputs=audio_output)
89
+ radio.change(toggle, radio, [audio_input_mic, audio_input_file])
90
+ lan_input.change(handle_language_change, lan_input, [radio, audio_input_mic, audio_input_file])
91
+
92
+ demo.launch(enable_queue=True)
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ TTS