Seed-VC-characters

Running

App Files Files Community

soiz1 commited on Mar 10

Commit

bb47651

verified ·

1 Parent(s): 9a321f6

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -122

app.py CHANGED Viewed

@@ -337,125 +337,50 @@ def voice_conversion(source, target, diffusion_steps, length_adjust, inference_c
             yield mp3_bytes, None
-import gradio as gr
-from g4f.client import Client
-import markdown2  # より豊富なマークダウン対応
-import base64
-from io import BytesIO
-import json
-client = Client()
-def format_output(text):
-    """
-    チャットGPTスタイルのマークダウン形式に対応するためのフォーマット関数
-    """
-    return markdown2.markdown(text, extras=[
-        "fenced-code-blocks",
-        "tables",
-        "task_list",
-        "strike",
-        "spoiler",
-        "markdown-in-html"
-    ])
-def image_to_data_url(image):
-    """
-    画像をBase64形式のdataURLに変換する関数
-    """
-    buffered = BytesIO()
-    image.save(buffered, format="PNG")
-    img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
-    return f"data:image/png;base64,{img_str}"
-def respond(message, history, system_message, max_tokens, temperature, top_p, model_choice, web_search, image=None):
-    # システムメッセージを先頭に追加
-    messages = [{"role": "system", "content": system_message}]
-    # これまでの会話履歴を追加
-    for user_msg, assistant_msg in history:
-        if user_msg:
-            messages.append({"role": "user", "content": user_msg})
-        if assistant_msg:
-            messages.append({"role": "assistant", "content": assistant_msg})
-    # 今回のユーザーメッセージを追加
-    if image:
-        message += f"\n![image]({image})"  # 画像をマークダウンで追加
-    messages.append({"role": "user", "content": message})
-    # API 呼び出し
-    response = client.chat.completions.create(
-        model=model_choice,
-        messages=messages,
-        max_tokens=max_tokens,
-        temperature=temperature,
-        top_p=top_p,
-        web_search=web_search
-    )
-    formatted_response = format_output(response.choices[0].message.content)
-    return formatted_response
-def chat(message, history, system_message, max_tokens, temperature, top_p, model_choice, web_search, image):
-    if message.strip() == "" and not image:
-        return "", history, history
-    if image:
-        image_data_url = image_to_data_url(image)
-    else:
-        image_data_url = None
-    print("メッセージ送信直後の履歴:")
-    print(json.dumps(history, ensure_ascii=False, indent=2))  # メッセージ送信前の履歴をJSONで表示
-    response = respond(message, history, system_message, max_tokens, temperature, top_p, model_choice, web_search, image_data_url)
-    history = history + [(message, response)]
-    print("AIの回答直後の履歴:")
-    print(json.dumps(history, ensure_ascii=False, indent=2))  # AIの回答後の履歴をJSONで表示
-    # 入力欄をクリアして、更新済みのチャット履歴と状態を返す
-    return "", history, history
-with gr.Blocks() as demo:
-    with gr.Row():
-        # 左側のカラム：入力欄と各オプションを配置
-        with gr.Column():
-            # オプション（システムメッセージや各種パラメータ）
-            system_message = gr.Textbox(
-                value="あなたは日本語しか話せません。あなたは最新の医療支援AIです。薬の紹介、薬の提案、薬の作成など、さまざまなタスクに答えます。また、新しい薬を開発する際は、既存のものに頼らずに画期的なアイデアを出します。",
-                label="システムメッセージ"
-            )
-            max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="トークン制限")
-            temperature = gr.Slider(minimum=0.1, maximum=4.0, value=2, step=0.1, label="Temperature (数値が大きいほど様々な回答をします。)")
-            top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling) (数値が低いと回答候補が上位のみになります。)")
-            model_choice = gr.Radio(choices=["gpt-4o-mini", "o3-mini"], value="gpt-4o-mini", label="モデル選択")
-            web_search = gr.Checkbox(value=True, label="WEB検索")
-            # チャット入力欄と送信ボタン
-            chatbot_input = gr.Textbox(show_label=False, placeholder="ここにメッセージを入力してください...", lines=2)
-            image_input = gr.Image(type="pil", label="画像をアップロード", visible=False)  # 画像アップロード
-            submit_btn = gr.Button("送信")
-        # 右側のカラム：チャットの履歴を表示
-        with gr.Column():
-            chat_history_display = gr.Chatbot(label="チャット履歴")
-    # 会話の状態（履歴）を保持する State コンポーネント
-    state = gr.State([])
-    # 送信ボタン押下時の挙動
-    submit_btn.click(
-        chat,
-        inputs=[chatbot_input, state, system_message, max_tokens, temperature, top_p, model_choice, web_search, image_input],
-        outputs=[chatbot_input, chat_history_display, state]
-    )
-    # エンターキーでの送信にも対応
-    chatbot_input.submit(
-        chat,
-        inputs=[chatbot_input, state, system_message, max_tokens, temperature, top_p, model_choice, web_search, image_input],
-        outputs=[chatbot_input, chat_history_display, state]
-    )
-demo.launch()

             yield mp3_bytes, None
+# 画像とMP3ファイルの情報
+gallery_items = [
+    {"image": "default/sikokumetan.webp", "mp3": "default/sikokumetan.mp3"}
+]
+# 画像をクリックした時に呼び出す関数
+def set_reference_audio(image_path):
+    # ギャラリーの画像に対応するMP3ファイルを返す
+    for item in gallery_items:
+        if item["image"] == image_path:
+            return item["mp3"]
+    return None
+if __name__ == "__main__":
+    description = ("Zero-shot音声変換モデル（学習不要）。ローカルでの利用方法は[GitHubリポジトリ](https://github.com/Plachtaa/seed-vc)をご覧ください。"
+                   "参考音声が25秒を超える場合、自動的に25秒にクリップされます。"
+                   "また、元音声と参考音声の合計時間が30秒を超える場合、元音声は分割処理されます。")
+    inputs = [
+        gr.Audio(type="filepath", label="元音声"),
+        gr.Audio(type="filepath", label="参考音声"),
+        gr.Slider(minimum=1, maximum=200, value=10, step=1, label="拡散ステップ数", info="デフォルトは10、50～100が最適な品質"),
+        gr.Slider(minimum=0.5, maximum=2.0, step=0.1, value=1.0, label="長さ調整", info="1.0未満で速度を上げ、1.0以上で速度を遅くします"),
+        gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=0.7, label="推論CFG率", info="わずかな影響があります"),
+        gr.Checkbox(label="F0条件付きモデルを使用", value=False, info="歌声変換には必須です"),
+        gr.Checkbox(label="F0自動調整", value=True, info="F0をおおよそ調整して目標音声に合わせます。F0条件付きモデル使用時にのみ有効です"),
+        gr.Slider(label='音程変換', minimum=-24, maximum=24, step=1, value=0, info="半音単位の音程変換。F0条件付きモデル使用時にのみ有効です"),
+        gr.Gallery(value=[item["image"] for item in gallery_items], label="画像ギャラリー", interactive=True)
+    ]
+    def update_reference_audio(selected_image):
+        # ギャラリーで選択された画像に対応するMP3ファイルを返す
+        mp3_file = set_reference_audio(selected_image)
+        return mp3_file
+    # ギャラリーで選択された画像を参考音声の入力に反映
+    gr.Interface(
+        fn=voice_conversion,
+        description=description,
+        inputs=inputs,
+        outputs=[gr.Audio(label="ストリーム出力音声", streaming=True, format='mp3'),
+                 gr.Audio(label="完全出力音声", streaming=False, format='wav')],
+        title="Seed Voice Conversion",
+        examples=examples,
+        cache_examples=False,
+        live=True
+    ).launch()