Spaces:
Running
Running
import gradio as gr | |
from openai import OpenAI | |
import os | |
from datetime import datetime | |
from zoneinfo import ZoneInfo | |
auto_play_bl = {'ON': True, 'OFF': False} | |
voice_list = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"] | |
def set_state(state, openai_key, voice, auto_play, speed): | |
state["openai_key"] = openai_key | |
state["voice"] = voice | |
state["auto_play"] = auto_play_bl[auto_play] | |
state["speed"] = speed | |
return state | |
def create_voice(state, text, file): | |
err_msg = "" | |
# セッション情報取得 | |
client = state["client"] | |
user_id = state["user_id"] | |
voice = state["voice"] | |
speed = state["speed"] | |
# OpenAIキーチェック | |
if state["openai_key"] == "": | |
err_msg = "OpenAIキーを入力してください。(設定タブ)" | |
return None, err_msg | |
# ファイル入力チェック | |
if text.strip() == "" and file is None: | |
err_msg = "画面から文章を入力するか、テキストファイルをアップして下さい。" | |
return None, err_msg | |
if client is None: | |
os.environ["OPENAI_API_KEY"] = state["openai_key"] | |
# クライアント作成 | |
client = OpenAI() | |
# client作成後は消す | |
os.environ["OPENAI_API_KEY"] = "" | |
state["client"] = client | |
if user_id == "": | |
# IDとして現在時刻をセット | |
dt = datetime.now(ZoneInfo("Asia/Tokyo")) | |
user_id = dt.strftime("%Y%m%d%H%M%S") | |
# ユーザIDでフォルダ作成 | |
os.makedirs(user_id, exist_ok=True) | |
state["user_id"] = user_id | |
if file: | |
with open(file, 'r') as f: | |
input_text = f.read() | |
else: | |
input_text = text | |
# ファイル名は現在時刻 | |
dt = datetime.now(ZoneInfo("Asia/Tokyo")) | |
file_name = dt.strftime("%Y%m%d%H%M%S") + ".mp3" | |
file_path = user_id + "/" + file_name | |
# 音声にする | |
result = request_tts(client, voice, speed, file_path, input_text) | |
if result != "": | |
err_msg = result | |
file_path = None | |
return file_path, err_msg | |
def request_tts(client, voice , speed, file_path, text): | |
""" テキストを音声にする """ | |
err_msg = "" | |
try: | |
response = client.audio.speech.create( | |
model= "tts-1", # "tts-1-hd", | |
voice=voice, | |
input=text, | |
speed=speed | |
) | |
# 音声ファイルに出力 | |
response.stream_to_file(file_path) | |
except Exception as e: | |
err_msg = "音声作成中にエラーが発生しました。" | |
print(e) | |
finally: | |
return err_msg | |
with gr.Blocks() as demo: | |
title = "<h2>Text to Speechデモアプリ</h2>" | |
message = "<h3>最初に[設定]タブからOpenAIキーを入力してください。" | |
message += "</h3>" | |
gr.Markdown(title + message) | |
state = gr.State({ | |
"openai_key" : "", | |
"client" : None, | |
"user_id" : "", | |
"auto_play" : True, | |
"speed" : 0.8, | |
"voice" : "nova" | |
}) | |
with gr.Tab("音声にする") as main_tab: | |
text = gr.Textbox(label="音声にするテキスト", lines=3, interactive = True) | |
file = gr.File(label="入力ファイル", type="filepath",file_types=[".txt"], interactive = True) | |
with gr.Row(): | |
btn = gr.Button("音声にする") | |
clear = gr.ClearButton([text, file], value="クリア") | |
sys_msg = gr.Text(label="システムメッセージ", interactive = False) | |
voice = gr.Audio(label="出力音声", type="filepath", interactive = False, autoplay = True) | |
btn.click(create_voice, [state, text, file], [voice, sys_msg]) | |
with gr.Tab("設定") as set_tab: | |
openai_key = gr.Textbox(label="OpenAI API Key", interactive = True) | |
voice = gr.Dropdown(choices=voice_list, value = "nova", label="Voice", interactive = True) | |
auto_play = gr.Dropdown(choices=["ON", "OFF"], value = "ON", label="Auto Play", interactive = True) | |
speed = gr.Slider(0, 1, value=0.8, label="Speed", info="1に近づけるほど読むスピードが速くなります。", interactive = True) | |
# 設定変更時 | |
main_tab.select(set_state, [state, openai_key, voice, auto_play, speed], state) | |
with gr.Tab("声サンプル") as voice_chk: | |
gr.Markdown("<h3>Text to speechの声のサンプルです。(速度は0.8です)</h3>") | |
with gr.Row(): | |
btn_alloy = gr.Button(value="alloy") | |
btn_echo = gr.Button(value="echo") | |
btn_fable = gr.Button(value="fable") | |
with gr.Row(): | |
btn_onyx = gr.Button(value="onyx") | |
btn_nova = gr.Button(value="nova") | |
btn_shimmer = gr.Button(value="shimmer") | |
sample_voice=gr.Audio(type="filepath", interactive = False, autoplay = True) | |
btn_alloy.click(lambda:"voice_sample/alloy.mp3", None, sample_voice) | |
btn_echo.click(lambda:"voice_sample/echo.mp3", None, sample_voice) | |
btn_fable.click(lambda:"voice_sample/fable.mp3", None, sample_voice) | |
btn_onyx.click(lambda:"voice_sample/onyx.mp3", None, sample_voice) | |
btn_nova.click(lambda:"voice_sample/nova.mp3", None, sample_voice) | |
btn_shimmer.click(lambda:"voice_sample/shimmer.mp3", None, sample_voice) | |
demo.queue() | |
demo.launch(debug=False) |