import gradio as gr from openai import OpenAI import os from datetime import datetime from zoneinfo import ZoneInfo auto_play_bl = {'ON': True, 'OFF': False} voice_list = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"] def set_state(state, openai_key, voice, auto_play, speed): state["openai_key"] = openai_key state["voice"] = voice state["auto_play"] = auto_play_bl[auto_play] state["speed"] = speed return state def create_voice(state, text, file): err_msg = "" # セッション情報取得 client = state["client"] user_id = state["user_id"] voice = state["voice"] speed = state["speed"] # OpenAIキーチェック if state["openai_key"] == "": err_msg = "OpenAIキーを入力してください。(設定タブ)" return None, err_msg # ファイル入力チェック if text.strip() == "" and file is None: err_msg = "画面から文章を入力するか、テキストファイルをアップして下さい。" return None, err_msg if client is None: os.environ["OPENAI_API_KEY"] = state["openai_key"] # クライアント作成 client = OpenAI() # client作成後は消す os.environ["OPENAI_API_KEY"] = "" state["client"] = client if user_id == "": # IDとして現在時刻をセット dt = datetime.now(ZoneInfo("Asia/Tokyo")) user_id = dt.strftime("%Y%m%d%H%M%S") # ユーザIDでフォルダ作成 os.makedirs(user_id, exist_ok=True) state["user_id"] = user_id if file: with open(file, 'r') as f: input_text = f.read() else: input_text = text # ファイル名は現在時刻 dt = datetime.now(ZoneInfo("Asia/Tokyo")) file_name = dt.strftime("%Y%m%d%H%M%S") + ".mp3" file_path = user_id + "/" + file_name # 音声にする result = request_tts(client, voice, speed, file_path, input_text) if result != "": err_msg = result file_path = None return file_path, err_msg def request_tts(client, voice , speed, file_path, text): """ テキストを音声にする """ err_msg = "" try: response = client.audio.speech.create( model= "tts-1", # "tts-1-hd", voice=voice, input=text, speed=speed ) # 音声ファイルに出力 response.stream_to_file(file_path) except Exception as e: err_msg = "音声作成中にエラーが発生しました。" print(e) finally: return err_msg with gr.Blocks() as demo: title = "

Text to Speechデモアプリ

" message = "

最初に[設定]タブからOpenAIキーを入力してください。" message += "

" gr.Markdown(title + message) state = gr.State({ "openai_key" : "", "client" : None, "user_id" : "", "auto_play" : True, "speed" : 0.8, "voice" : "nova" }) with gr.Tab("音声にする") as main_tab: text = gr.Textbox(label="音声にするテキスト", lines=3, interactive = True) file = gr.File(label="入力ファイル", type="filepath",file_types=[".txt"], interactive = True) with gr.Row(): btn = gr.Button("音声にする") clear = gr.ClearButton([text, file], value="クリア") sys_msg = gr.Text(label="システムメッセージ", interactive = False) voice = gr.Audio(label="出力音声", type="filepath", interactive = False, autoplay = True) btn.click(create_voice, [state, text, file], [voice, sys_msg]) with gr.Tab("設定") as set_tab: openai_key = gr.Textbox(label="OpenAI API Key", interactive = True) voice = gr.Dropdown(choices=voice_list, value = "nova", label="Voice", interactive = True) auto_play = gr.Dropdown(choices=["ON", "OFF"], value = "ON", label="Auto Play", interactive = True) speed = gr.Slider(0, 1, value=0.8, label="Speed", info="1に近づけるほど読むスピードが速くなります。", interactive = True) # 設定変更時 main_tab.select(set_state, [state, openai_key, voice, auto_play, speed], state) with gr.Tab("声サンプル") as voice_chk: gr.Markdown("

Text to speechの声のサンプルです。(速度は0.8です)

") with gr.Row(): btn_alloy = gr.Button(value="alloy") btn_echo = gr.Button(value="echo") btn_fable = gr.Button(value="fable") with gr.Row(): btn_onyx = gr.Button(value="onyx") btn_nova = gr.Button(value="nova") btn_shimmer = gr.Button(value="shimmer") sample_voice=gr.Audio(type="filepath", interactive = False, autoplay = True) btn_alloy.click(lambda:"voice_sample/alloy.mp3", None, sample_voice) btn_echo.click(lambda:"voice_sample/echo.mp3", None, sample_voice) btn_fable.click(lambda:"voice_sample/fable.mp3", None, sample_voice) btn_onyx.click(lambda:"voice_sample/onyx.mp3", None, sample_voice) btn_nova.click(lambda:"voice_sample/nova.mp3", None, sample_voice) btn_shimmer.click(lambda:"voice_sample/shimmer.mp3", None, sample_voice) demo.queue() demo.launch(debug=False)