import gradio as gr import os import datetime import inference import spaces example1 = ["sample_data/ref1.jpg", "sample_data/ano.mp3"] example2 = ["sample_data/ref2.jpg", "sample_data/ano.mp3"] @spaces.GPU(duration=120) def fix_face_video(input_image, input_audio): dt = datetime.datetime.now() + datetime.timedelta(hours=9) fol_name = dt.strftime("%Y%m%d") file_name = dt.strftime("%H%M%S") out_video = "./output/" + fol_name+ "/fix_face_" + file_name + ".mp4" inference.fix_face(input_image, input_audio, out_video) return out_video image = gr.Image(label="画像(image)", type="filepath") audio = gr.File(label="音声(audio)", file_types=[".mp3", ".MP3"]) out_video = gr.Video(label="Fix Face Video") btn = gr.Button("送信", variant="primary") title = "V_Express" description = "

画像と音声だけで生成できます。(Using only images and audio.)" description += "
This uses the following V-Express \"https://github.com/tencent-ailab/V-Express\"" description += "

※GPU ZERO制限のため短い音声しか生成できません。(Can only generate short sounds.)

" demo = gr.Interface( fn=fix_face_video, inputs=[image, audio], examples=[example1, example2], outputs=[out_video], title=title, submit_btn=btn, clear_btn=None, description=description, allow_flagging="never", cache_examples=False ) demo.queue() demo.launch(share=True, debug=True)