Kit-Lemonfoot commited on
Commit
8bf7162
β€’
1 Parent(s): 230c0f6

Of course it didn't work. Now I get to go on another patching odyssey! Yay!

Browse files
Files changed (1) hide show
  1. GPT_SoVITS/inference_webui.py +271 -271
GPT_SoVITS/inference_webui.py CHANGED
@@ -1,271 +1,271 @@
1
- # Based on GPT-SoVITS-fast-inference by ChasonJiang
2
-
3
- import random
4
- import os
5
- import torch
6
- import spaces
7
-
8
- if torch.cuda.is_available():
9
- device = "cuda"
10
- else:
11
- device = "cpu"
12
-
13
- import re, logging
14
- logging.getLogger("markdown_it").setLevel(logging.ERROR)
15
- logging.getLogger("urllib3").setLevel(logging.ERROR)
16
- logging.getLogger("httpcore").setLevel(logging.ERROR)
17
- logging.getLogger("httpx").setLevel(logging.ERROR)
18
- logging.getLogger("asyncio").setLevel(logging.ERROR)
19
- logging.getLogger("charset_normalizer").setLevel(logging.ERROR)
20
- logging.getLogger("torchaudio._extension").setLevel(logging.ERROR)
21
- import pdb
22
- import json
23
-
24
- infer_ttswebui = os.environ.get("infer_ttswebui", 9872)
25
- infer_ttswebui = int(infer_ttswebui)
26
- is_share = os.environ.get("is_share", "False")
27
- is_share = eval(is_share)
28
- if "_CUDA_VISIBLE_DEVICES" in os.environ:
29
- os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
30
- is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
31
- gpt_path=None
32
- sovits_path=None
33
- #gpt_path = os.environ.get("gpt_path", None)
34
- #sovits_path = os.environ.get("sovits_path", None)
35
- cnhubert_base_path = os.environ.get("cnhubert_base_path", None)
36
- bert_path = os.environ.get("bert_path", None)
37
-
38
- import gradio as gr
39
- from TTS_infer_pack.TTS import TTS, TTS_Config
40
- from TTS_infer_pack.text_segmentation_method import get_method
41
-
42
-
43
- dict_language = {
44
- "ZH": "all_zh",#ε…¨ιƒ¨ζŒ‰δΈ­ζ–‡θ―†εˆ«
45
- "EN": "en",#ε…¨ιƒ¨ζŒ‰θ‹±ζ–‡θ―†εˆ«#######不变
46
- "JP": "all_ja",#ε…¨ιƒ¨ζŒ‰ζ—₯ζ–‡θ―†εˆ«
47
- "ZH/EN": "zh",#ζŒ‰δΈ­θ‹±ζ··εˆθ―†εˆ«####不变
48
- "JP/EN": "ja",#ζŒ‰ζ—₯θ‹±ζ··εˆθ―†εˆ«####不变
49
- "Automatic": "auto",#ε€šθ―­η§ε―εŠ¨εˆ‡εˆ†θ―†εˆ«θ―­η§
50
- }
51
-
52
- cut_method = {
53
- "None":"cut0",
54
- "4 Sentences": "cut1",
55
- "50 Characters": "cut2",
56
- "ZH/JP Punctuation": "cut3",
57
- "EN Punctuation": "cut4",
58
- "All Punctuation": "cut5",
59
- }
60
-
61
- tts_config = TTS_Config("GPT_SoVITS/configs/tts_infer.yaml")
62
- tts_config.device = device
63
- tts_config.is_half = is_half
64
- if gpt_path is not None:
65
- tts_config.t2s_weights_path = gpt_path
66
- if sovits_path is not None:
67
- tts_config.vits_weights_path = sovits_path
68
- if cnhubert_base_path is not None:
69
- tts_config.cnhuhbert_base_path = cnhubert_base_path
70
- if bert_path is not None:
71
- tts_config.bert_base_path = bert_path
72
-
73
- print(tts_config)
74
- tts_pipeline = TTS(tts_config)
75
- gpt_path = tts_config.t2s_weights_path
76
- sovits_path = tts_config.vits_weights_path
77
-
78
- clm= ""
79
-
80
- @spaces.GPU()
81
- def inference(name, gptmp, svmp, sty, text, text_lang,
82
- ref_audio_path, prompt_text,
83
- prompt_lang, top_k,
84
- top_p, temperature,
85
- text_split_method, batch_size,
86
- speed_factor,
87
- split_bucket,fragment_interval,
88
- seed, keep_random, parallel_infer,
89
- repetition_penalty
90
- ):
91
-
92
- global clm
93
- #Live switching
94
- if(not ref_audio_path):
95
- ref_audio_path=f"referenceaudio/{name}/"+referencedata[name][0][sty]
96
- prompt_text=referencedata[name][1][sty]
97
- if clm!=name:
98
- print(f"Switching to model {name}")
99
- clm=name
100
- tts_pipeline.init_t2s_weights(gptmp)
101
- tts_pipeline.init_vits_weights(svmp)
102
-
103
- seed = -1 if keep_random else seed
104
- actual_seed = seed if seed not in [-1, "", None] else random.randrange(1 << 32)
105
- print(f"TMP: {temperature} | SPDFCT: {speed_factor} | STY: {sty} | LANG: {text_lang}")
106
- inputs={
107
- "text": text,
108
- "text_lang": dict_language[text_lang],
109
- "ref_audio_path": ref_audio_path,
110
- "prompt_text": prompt_text,
111
- "prompt_lang": dict_language[prompt_lang],
112
- "top_k": top_k,
113
- "top_p": top_p,
114
- "temperature": temperature,
115
- "text_split_method": cut_method[text_split_method],
116
- "batch_size":int(batch_size),
117
- "speed_factor":float(speed_factor),
118
- "split_bucket":split_bucket,
119
- "return_fragment":False,
120
- "fragment_interval":fragment_interval,
121
- "seed":actual_seed,
122
- "parallel_infer": parallel_infer,
123
- "repetition_penalty": repetition_penalty,
124
- }
125
- for item in tts_pipeline.run(inputs):
126
- yield item, actual_seed
127
-
128
- def custom_sort_key(s):
129
- # δ½Ώη”¨ζ­£εˆ™θ‘¨θΎΎεΌζε–ε­—η¬¦δΈ²δΈ­ηš„ζ•°ε­—ιƒ¨εˆ†ε’Œιžζ•°ε­—ιƒ¨εˆ†
130
- parts = re.split('(\d+)', s)
131
- # ε°†ζ•°ε­—ιƒ¨εˆ†θ½¬ζ’δΈΊζ•΄ζ•°οΌŒιžζ•°ε­—ιƒ¨εˆ†δΏζŒδΈε˜
132
- parts = [int(part) if part.isdigit() else part for part in parts]
133
- return parts
134
-
135
-
136
- def change_choices():
137
- SoVITS_names, GPT_names = get_weights_names()
138
- return {"choices": sorted(SoVITS_names, key=custom_sort_key), "__type__": "update"}, {"choices": sorted(GPT_names, key=custom_sort_key), "__type__": "update"}
139
-
140
-
141
- pretrained_sovits_name = "GPT_SoVITS/pretrained_models/s2G488k.pth"
142
- pretrained_gpt_name = "GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt"
143
- SoVITS_weight_root = "GPT_SoVITS/SoVITS_weights/"
144
- GPT_weight_root = "GPT_SoVITS/GPT_weights/"
145
-
146
- def get_weights_names():
147
- SoVITS_names = [pretrained_sovits_name]
148
- for name in os.listdir(SoVITS_weight_root):
149
- if name.endswith(".pth"): SoVITS_names.append("%s/%s" % (SoVITS_weight_root, name))
150
- GPT_names = [pretrained_gpt_name]
151
- for name in os.listdir(GPT_weight_root):
152
- if name.endswith(".ckpt"): GPT_names.append("%s/%s" % (GPT_weight_root, name))
153
- return SoVITS_names, GPT_names
154
-
155
- def load_models():
156
- print("Loading models...")
157
- voices=[]
158
- ustyles={}
159
- with open("voicelist.json", "r", encoding="utf-8") as f:
160
- voc_info = json.load(f)
161
- for name, info in voc_info.items():
162
- if not info['enable']:
163
- continue
164
- title= info['title']
165
- gptmodelpath= "%s/%s" % (GPT_weight_root, info['gpt_model_path'])
166
- sovitsmodelpath= "%s/%s" % (SoVITS_weight_root, info['sovits_model_path'])
167
- author= info['modelauthor']
168
- image = info['cover']
169
- styles = info['styles']
170
- #check that all styles properly exist
171
- for s in styles.values():
172
- if(not os.path.exists(f"referenceaudio/{name}/{s}")):
173
- print(f"WARNING : Some defined preset styles do not exist for model {name}, skipping")
174
- styles=None
175
- break
176
- styletrans = info['styletrans']
177
- st=[styles, styletrans]
178
- voices.append((name, title, gptmodelpath, sovitsmodelpath, author, image))
179
- ustyles[name]=st
180
- print(f"Indexed model {title}")
181
- return voices, ustyles
182
-
183
- modeldata, referencedata = load_models()
184
-
185
- #Gradio preload
186
- text = gr.TextArea(label="Input Text", value="Hello there! This is test audio of a new text to speech tool.")
187
- text_language = gr.Dropdown(label="Language", choices=["EN", "JP", "ZH", "ZH/EN", "JP/EN", "Automatic"], value="EN")
188
- how_to_cut = gr.Dropdown(label="Slicing Method",
189
- choices=["None", "4 Sentences", "50 Characters", "ZH/JP Punctuation", "EN Punctuation", "All Punctuation" ],
190
- value="4 Sentences",
191
- interactive=True,
192
- )
193
- top_k = gr.Slider(minimum=1,maximum=100,step=1,label="Top_k",value=5,interactive=True)
194
- top_p = gr.Slider(minimum=0,maximum=1,step=0.05,label="Top_p",value=1,interactive=True)
195
- temperature = gr.Slider(minimum=0,maximum=1,step=0.05,label="Temperature",value=0.7,interactive=True)
196
- batch_size = gr.Slider(minimum=1,maximum=200,step=1,label="Batch Size",value=20,interactive=True)
197
- fragment_interval = gr.Slider(minimum=0.01,maximum=1,step=0.01,label="Fragment Interval",value=0.3,interactive=True)
198
- speed_factor = gr.Slider(minimum=0.50,maximum=2,step=0.05,label="Speed Factor",value=1.0,interactive=True)
199
- repetition_penalty = gr.Slider(minimum=0,maximum=2,step=0.05,label="Repetition Penalty",value=1.35,interactive=True)
200
- parallel_infer = gr.Checkbox(label="Parallel Infer", value=True, interactive=True, show_label=True)
201
- split_bucket = gr.Checkbox(label="Split Bucket", value=True, interactive=True, show_label=True)
202
- seed = gr.Number(label="Random Seed",value=-1, interactive=True, show_label=True)
203
- keep_random = gr.Checkbox(label="Use Randomized Seed", value=True, interactive=True, show_label=True)
204
-
205
- #Main gradio
206
- with gr.Blocks(title="Lemonfoot GPT-SoVITS") as app:
207
- gr.Markdown(
208
- "# Lemonfoot GPT-SoVITS πŸš€πŸ‹\n"
209
- "### Space by Kit Lemonfoot / Noel Shirogane's High Flying Birds\n"
210
- "Based on code originally by RVC_Boss and ChasonJiang\n\n"
211
- "Do no evil.\n\n"
212
- )
213
- for (name, title, gptmodelpath, sovitsmodelpath, author, image) in modeldata:
214
- with gr.TabItem(name):
215
- with gr.Row():
216
- with gr.Column():
217
- n = gr.Textbox(value=name, visible=False, interactive=False)
218
- gptmp = gr.Textbox(value=gptmodelpath, visible=False, interactive=False)
219
- svmp = gr.Textbox(value=sovitsmodelpath, visible=False, interactive=False)
220
- gr.Markdown(f"**{title}**\n\n Dataset author: {author}")
221
- gr.Image(f"images/{image}", label=None, show_label=False, width=300, show_download_button=False, container=False, show_share_button=False)
222
- with gr.Column():
223
- #if there isn't any styles don't bother rendering the style window
224
- if(not referencedata[name][0]==None):
225
- rd = list(referencedata[name][0].keys())
226
- with gr.TabItem("Style using a preset"):
227
- sty = gr.Dropdown(
228
- label="Current style",
229
- choices=rd,
230
- value=rd[0],
231
- interactive=True
232
- )
233
- else:
234
- sty=gr.Textbox(value="none", visible=False, interactive=False)
235
- with gr.TabItem("Style using a different audio"):
236
- with gr.Column():
237
- ref_audio_path = gr.Audio(label="Reference Audio", type="filepath")
238
- prompt_text = gr.Textbox(label="Reference Audio Text", interactive=True, placeholder="Leave blank to use no-text reference mode.")
239
- prompt_language = gr.Dropdown(label="Reference Audio Language", choices=["EN", "JP", "ZH", "ZH/EN", "JP/EN", "Automatic"], value="EN")
240
- with gr.Column():
241
- inference_button = gr.Button("Synthesize", variant="primary")
242
- output = gr.Audio(label="Output")
243
-
244
- inference_button.click(
245
- inference,
246
- inputs=[n, gptmp, svmp, sty, text, text_language, ref_audio_path, prompt_text, prompt_language, top_k, top_p, temperature, how_to_cut, batch_size, speed_factor, split_bucket, fragment_interval, seed, keep_random, parallel_infer, repetition_penalty],
247
- outputs=[output, seed]
248
- )
249
-
250
- #bottom info
251
- with gr.Row():
252
- with gr.Column():
253
- text.render()
254
- text_language.render()
255
- how_to_cut.render()
256
- with gr.Column():
257
- temperature.render()
258
- speed_factor.render()
259
- with gr.Accordion("Advanced Inference Parameters", open=False):
260
- top_k.render()
261
- top_p.render()
262
- batch_size.render()
263
- fragment_interval.render()
264
- repetition_penalty.render()
265
- parallel_infer.render()
266
- split_bucket.render()
267
- seed.render()
268
- keep_random.render()
269
-
270
-
271
- app.queue().launch()
 
1
+ # Based on GPT-SoVITS-fast-inference by ChasonJiang
2
+
3
+ import random
4
+ import os
5
+ import torch
6
+
7
+ if torch.cuda.is_available():
8
+ device = "cuda"
9
+ else:
10
+ device = "cpu"
11
+
12
+ import re, logging
13
+ logging.getLogger("markdown_it").setLevel(logging.ERROR)
14
+ logging.getLogger("urllib3").setLevel(logging.ERROR)
15
+ logging.getLogger("httpcore").setLevel(logging.ERROR)
16
+ logging.getLogger("httpx").setLevel(logging.ERROR)
17
+ logging.getLogger("asyncio").setLevel(logging.ERROR)
18
+ logging.getLogger("charset_normalizer").setLevel(logging.ERROR)
19
+ logging.getLogger("torchaudio._extension").setLevel(logging.ERROR)
20
+ import pdb
21
+ import json
22
+
23
+ infer_ttswebui = os.environ.get("infer_ttswebui", 9872)
24
+ infer_ttswebui = int(infer_ttswebui)
25
+ is_share = os.environ.get("is_share", "False")
26
+ is_share = eval(is_share)
27
+ if "_CUDA_VISIBLE_DEVICES" in os.environ:
28
+ os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
29
+ is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
30
+ gpt_path=None
31
+ sovits_path=None
32
+ #gpt_path = os.environ.get("gpt_path", None)
33
+ #sovits_path = os.environ.get("sovits_path", None)
34
+ cnhubert_base_path = os.environ.get("cnhubert_base_path", None)
35
+ bert_path = os.environ.get("bert_path", None)
36
+
37
+ import gradio as gr
38
+ from TTS_infer_pack.TTS import TTS, TTS_Config
39
+ from TTS_infer_pack.text_segmentation_method import get_method
40
+
41
+ import nltk
42
+ nltk.download('averaged_perceptron_tagger_eng')
43
+
44
+ dict_language = {
45
+ "ZH": "all_zh",#ε…¨ιƒ¨ζŒ‰δΈ­ζ–‡θ―†εˆ«
46
+ "EN": "en",#ε…¨ιƒ¨ζŒ‰θ‹±ζ–‡θ―†εˆ«#######不变
47
+ "JP": "all_ja",#ε…¨ιƒ¨ζŒ‰ζ—₯ζ–‡θ―†εˆ«
48
+ "ZH/EN": "zh",#ζŒ‰δΈ­θ‹±ζ··εˆθ―†εˆ«####不变
49
+ "JP/EN": "ja",#ζŒ‰ζ—₯θ‹±ζ··εˆθ―†εˆ«####不变
50
+ "Automatic": "auto",#ε€šθ―­η§ε―εŠ¨εˆ‡εˆ†θ―†εˆ«θ―­η§
51
+ }
52
+
53
+ cut_method = {
54
+ "None":"cut0",
55
+ "4 Sentences": "cut1",
56
+ "50 Characters": "cut2",
57
+ "ZH/JP Punctuation": "cut3",
58
+ "EN Punctuation": "cut4",
59
+ "All Punctuation": "cut5",
60
+ }
61
+
62
+ tts_config = TTS_Config("GPT_SoVITS/configs/tts_infer.yaml")
63
+ tts_config.device = device
64
+ tts_config.is_half = is_half
65
+ if gpt_path is not None:
66
+ tts_config.t2s_weights_path = gpt_path
67
+ if sovits_path is not None:
68
+ tts_config.vits_weights_path = sovits_path
69
+ if cnhubert_base_path is not None:
70
+ tts_config.cnhuhbert_base_path = cnhubert_base_path
71
+ if bert_path is not None:
72
+ tts_config.bert_base_path = bert_path
73
+
74
+ print(tts_config)
75
+ tts_pipeline = TTS(tts_config)
76
+ gpt_path = tts_config.t2s_weights_path
77
+ sovits_path = tts_config.vits_weights_path
78
+
79
+ clm= ""
80
+
81
+ def inference(name, gptmp, svmp, sty, text, text_lang,
82
+ ref_audio_path, prompt_text,
83
+ prompt_lang, top_k,
84
+ top_p, temperature,
85
+ text_split_method, batch_size,
86
+ speed_factor,
87
+ split_bucket,fragment_interval,
88
+ seed, keep_random, parallel_infer,
89
+ repetition_penalty
90
+ ):
91
+
92
+ global clm
93
+ #Live switching
94
+ if(not ref_audio_path):
95
+ ref_audio_path=f"referenceaudio/{name}/"+referencedata[name][0][sty]
96
+ prompt_text=referencedata[name][1][sty]
97
+ if clm!=name:
98
+ print(f"Switching to model {name}")
99
+ clm=name
100
+ tts_pipeline.init_t2s_weights(gptmp)
101
+ tts_pipeline.init_vits_weights(svmp)
102
+
103
+ seed = -1 if keep_random else seed
104
+ actual_seed = seed if seed not in [-1, "", None] else random.randrange(1 << 32)
105
+ print(f"TMP: {temperature} | SPDFCT: {speed_factor} | STY: {sty} | LANG: {text_lang}")
106
+ inputs={
107
+ "text": text,
108
+ "text_lang": dict_language[text_lang],
109
+ "ref_audio_path": ref_audio_path,
110
+ "prompt_text": prompt_text,
111
+ "prompt_lang": dict_language[prompt_lang],
112
+ "top_k": top_k,
113
+ "top_p": top_p,
114
+ "temperature": temperature,
115
+ "text_split_method": cut_method[text_split_method],
116
+ "batch_size":int(batch_size),
117
+ "speed_factor":float(speed_factor),
118
+ "split_bucket":split_bucket,
119
+ "return_fragment":False,
120
+ "fragment_interval":fragment_interval,
121
+ "seed":actual_seed,
122
+ "parallel_infer": parallel_infer,
123
+ "repetition_penalty": repetition_penalty,
124
+ }
125
+ for item in tts_pipeline.run(inputs):
126
+ yield item, actual_seed
127
+
128
+ def custom_sort_key(s):
129
+ # δ½Ώη”¨ζ­£εˆ™θ‘¨θΎΎεΌζε–ε­—η¬¦δΈ²δΈ­ηš„ζ•°ε­—ιƒ¨εˆ†ε’Œιžζ•°ε­—ιƒ¨εˆ†
130
+ parts = re.split('(\d+)', s)
131
+ # ε°†ζ•°ε­—ιƒ¨εˆ†θ½¬ζ’δΈΊζ•΄ζ•°οΌŒιžζ•°ε­—ιƒ¨εˆ†δΏζŒδΈε˜
132
+ parts = [int(part) if part.isdigit() else part for part in parts]
133
+ return parts
134
+
135
+
136
+ def change_choices():
137
+ SoVITS_names, GPT_names = get_weights_names()
138
+ return {"choices": sorted(SoVITS_names, key=custom_sort_key), "__type__": "update"}, {"choices": sorted(GPT_names, key=custom_sort_key), "__type__": "update"}
139
+
140
+
141
+ pretrained_sovits_name = "GPT_SoVITS/pretrained_models/s2G488k.pth"
142
+ pretrained_gpt_name = "GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt"
143
+ SoVITS_weight_root = "GPT_SoVITS/SoVITS_weights/"
144
+ GPT_weight_root = "GPT_SoVITS/GPT_weights/"
145
+
146
+ def get_weights_names():
147
+ SoVITS_names = [pretrained_sovits_name]
148
+ for name in os.listdir(SoVITS_weight_root):
149
+ if name.endswith(".pth"): SoVITS_names.append("%s/%s" % (SoVITS_weight_root, name))
150
+ GPT_names = [pretrained_gpt_name]
151
+ for name in os.listdir(GPT_weight_root):
152
+ if name.endswith(".ckpt"): GPT_names.append("%s/%s" % (GPT_weight_root, name))
153
+ return SoVITS_names, GPT_names
154
+
155
+ def load_models():
156
+ print("Loading models...")
157
+ voices=[]
158
+ ustyles={}
159
+ with open("voicelist.json", "r", encoding="utf-8") as f:
160
+ voc_info = json.load(f)
161
+ for name, info in voc_info.items():
162
+ if not info['enable']:
163
+ continue
164
+ title= info['title']
165
+ gptmodelpath= "%s/%s" % (GPT_weight_root, info['gpt_model_path'])
166
+ sovitsmodelpath= "%s/%s" % (SoVITS_weight_root, info['sovits_model_path'])
167
+ author= info['modelauthor']
168
+ image = info['cover']
169
+ styles = info['styles']
170
+ #check that all styles properly exist
171
+ for s in styles.values():
172
+ if(not os.path.exists(f"referenceaudio/{name}/{s}")):
173
+ print(f"WARNING : Some defined preset styles do not exist for model {name}, skipping")
174
+ styles=None
175
+ break
176
+ styletrans = info['styletrans']
177
+ st=[styles, styletrans]
178
+ voices.append((name, title, gptmodelpath, sovitsmodelpath, author, image))
179
+ ustyles[name]=st
180
+ print(f"Indexed model {title}")
181
+ return voices, ustyles
182
+
183
+ modeldata, referencedata = load_models()
184
+
185
+ #Gradio preload
186
+ text = gr.TextArea(label="Input Text", value="Hello there! This is test audio of a new text to speech tool.")
187
+ text_language = gr.Dropdown(label="Language", choices=["EN", "JP", "ZH", "ZH/EN", "JP/EN", "Automatic"], value="EN")
188
+ how_to_cut = gr.Dropdown(label="Slicing Method",
189
+ choices=["None", "4 Sentences", "50 Characters", "ZH/JP Punctuation", "EN Punctuation", "All Punctuation" ],
190
+ value="4 Sentences",
191
+ interactive=True,
192
+ )
193
+ top_k = gr.Slider(minimum=1,maximum=100,step=1,label="Top_k",value=5,interactive=True)
194
+ top_p = gr.Slider(minimum=0,maximum=1,step=0.05,label="Top_p",value=1,interactive=True)
195
+ temperature = gr.Slider(minimum=0,maximum=1,step=0.05,label="Temperature",value=0.7,interactive=True)
196
+ batch_size = gr.Slider(minimum=1,maximum=200,step=1,label="Batch Size",value=20,interactive=True)
197
+ fragment_interval = gr.Slider(minimum=0.01,maximum=1,step=0.01,label="Fragment Interval",value=0.3,interactive=True)
198
+ speed_factor = gr.Slider(minimum=0.50,maximum=2,step=0.05,label="Speed Factor",value=1.0,interactive=True)
199
+ repetition_penalty = gr.Slider(minimum=0,maximum=2,step=0.05,label="Repetition Penalty",value=1.35,interactive=True)
200
+ parallel_infer = gr.Checkbox(label="Parallel Infer", value=True, interactive=True, show_label=True)
201
+ split_bucket = gr.Checkbox(label="Split Bucket", value=True, interactive=True, show_label=True)
202
+ seed = gr.Number(label="Random Seed",value=-1, interactive=True, show_label=True)
203
+ keep_random = gr.Checkbox(label="Use Randomized Seed", value=True, interactive=True, show_label=True)
204
+
205
+ #Main gradio
206
+ with gr.Blocks(title="Lemonfoot GPT-SoVITS") as app:
207
+ gr.Markdown(
208
+ "# Lemonfoot GPT-SoVITS πŸš€πŸ‹\n"
209
+ "### Space by Kit Lemonfoot / Noel Shirogane's High Flying Birds\n"
210
+ "Based on code originally by RVC_Boss and ChasonJiang\n\n"
211
+ "Do no evil.\n\n"
212
+ )
213
+ for (name, title, gptmodelpath, sovitsmodelpath, author, image) in modeldata:
214
+ with gr.TabItem(name):
215
+ with gr.Row():
216
+ with gr.Column():
217
+ n = gr.Textbox(value=name, visible=False, interactive=False)
218
+ gptmp = gr.Textbox(value=gptmodelpath, visible=False, interactive=False)
219
+ svmp = gr.Textbox(value=sovitsmodelpath, visible=False, interactive=False)
220
+ gr.Markdown(f"**{title}**\n\n Dataset author: {author}")
221
+ gr.Image(f"images/{image}", label=None, show_label=False, width=300, show_download_button=False, container=False, show_share_button=False)
222
+ with gr.Column():
223
+ #if there isn't any styles don't bother rendering the style window
224
+ if(not referencedata[name][0]==None):
225
+ rd = list(referencedata[name][0].keys())
226
+ with gr.TabItem("Style using a preset"):
227
+ sty = gr.Dropdown(
228
+ label="Current style",
229
+ choices=rd,
230
+ value=rd[0],
231
+ interactive=True
232
+ )
233
+ else:
234
+ sty=gr.Textbox(value="none", visible=False, interactive=False)
235
+ with gr.TabItem("Style using a different audio"):
236
+ with gr.Column():
237
+ ref_audio_path = gr.Audio(label="Reference Audio", type="filepath")
238
+ prompt_text = gr.Textbox(label="Reference Audio Text", interactive=True, placeholder="Leave blank to use no-text reference mode.")
239
+ prompt_language = gr.Dropdown(label="Reference Audio Language", choices=["EN", "JP", "ZH", "ZH/EN", "JP/EN", "Automatic"], value="EN")
240
+ with gr.Column():
241
+ inference_button = gr.Button("Synthesize", variant="primary")
242
+ output = gr.Audio(label="Output")
243
+
244
+ inference_button.click(
245
+ inference,
246
+ inputs=[n, gptmp, svmp, sty, text, text_language, ref_audio_path, prompt_text, prompt_language, top_k, top_p, temperature, how_to_cut, batch_size, speed_factor, split_bucket, fragment_interval, seed, keep_random, parallel_infer, repetition_penalty],
247
+ outputs=[output, seed]
248
+ )
249
+
250
+ #bottom info
251
+ with gr.Row():
252
+ with gr.Column():
253
+ text.render()
254
+ text_language.render()
255
+ how_to_cut.render()
256
+ with gr.Column():
257
+ temperature.render()
258
+ speed_factor.render()
259
+ with gr.Accordion("Advanced Inference Parameters", open=False):
260
+ top_k.render()
261
+ top_p.render()
262
+ batch_size.render()
263
+ fragment_interval.render()
264
+ repetition_penalty.render()
265
+ parallel_infer.render()
266
+ split_bucket.render()
267
+ seed.render()
268
+ keep_random.render()
269
+
270
+
271
+ app.queue().launch()