Update app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import spaces
|
2 |
import gradio as gr
|
3 |
import torch
|
@@ -335,41 +336,46 @@ def voice_conversion(source, target, diffusion_steps, length_adjust, inference_c
|
|
335 |
).export(format="mp3", bitrate=bitrate).read()
|
336 |
yield mp3_bytes, None
|
337 |
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
import spaces
|
3 |
import gradio as gr
|
4 |
import torch
|
|
|
336 |
).export(format="mp3", bitrate=bitrate).read()
|
337 |
yield mp3_bytes, None
|
338 |
|
339 |
+
default_dir = "/default"
|
340 |
+
reference_files = [
|
341 |
+
("ずんだもん", "zundamon"),
|
342 |
+
("四国めたん", "sikokumetan"),
|
343 |
+
("春日部つむぎ", "kasukabetsumugi"),
|
344 |
+
("雨晴はう", "ameharehau"),
|
345 |
+
("波音リツ", "namineritsu"),
|
346 |
+
]
|
347 |
+
|
348 |
+
def proxy_voice_conversion(source, selected_reference, diffusion_steps, length_adjust, inference_cfg_rate, f0_condition, auto_f0_adjust, pitch_shift):
|
349 |
+
reference_path = os.path.join(default_dir, f"{selected_reference}.mp3")
|
350 |
+
return voice_conversion(source, reference_path, diffusion_steps, length_adjust, inference_cfg_rate, f0_condition, auto_f0_adjust, pitch_shift)
|
351 |
+
|
352 |
+
gallery_items = [[os.path.join(default_dir, f"{filename}.png"), name, filename] for name, filename in reference_files]
|
353 |
+
|
354 |
+
description = ("Zero-shot音声変換モデル(学習不要)。ローカルでの利用方法は[GitHubリポジトリ](https://github.com/Plachtaa/seed-vc)をご覧ください。"
|
355 |
+
"参考音声が25秒を超える場合、自動的に25秒にクリップされます。"
|
356 |
+
"また、元音声と参考音声の合計時間が30秒を超える場合、元音声は分割処理されます。")
|
357 |
+
|
358 |
+
inputs = [
|
359 |
+
gr.Audio(type="filepath", label="元音声"),
|
360 |
+
gr.Gallery(label="参照音声を選択", value=gallery_items, columns=5, interactive=True),
|
361 |
+
gr.Slider(minimum=1, maximum=200, value=10, step=1, label="拡散ステップ数", info="デフォルトは10、50~100が最適な品質"),
|
362 |
+
gr.Slider(minimum=0.5, maximum=2.0, step=0.1, value=1.0, label="長さ調整", info="1.0未満で速度を上げ、1.0以上で速度を遅くします"),
|
363 |
+
gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=0.7, label="推論CFG率", info="わずかな影響があります"),
|
364 |
+
gr.Checkbox(label="F0条件付きモデルを使用", value=False, info="歌声変換には必須です"),
|
365 |
+
gr.Checkbox(label="F0自動調整", value=True, info="F0をおおよそ調整して目標音声に合わせます。F0条件付きモデル使用時にのみ有効です"),
|
366 |
+
gr.Slider(label='音程変換', minimum=-24, maximum=24, step=1, value=0, info="半音単位の音程変換。F0条件付きモデル使用時にのみ有効です"),
|
367 |
+
]
|
368 |
+
|
369 |
+
outputs = [
|
370 |
+
gr.Audio(label="ストリーム出力音声", streaming=True, format='mp3'),
|
371 |
+
gr.Audio(label="完全出力音声", streaming=False, format='wav')
|
372 |
+
]
|
373 |
+
|
374 |
+
gr.Interface(
|
375 |
+
fn=proxy_voice_conversion,
|
376 |
+
description=description,
|
377 |
+
inputs=inputs,
|
378 |
+
outputs=outputs,
|
379 |
+
title="Seed Voice Conversion with Reference Gallery",
|
380 |
+
cache_examples=False,
|
381 |
+
).launch()
|