|
import os |
|
import time |
|
import sys |
|
|
|
import cuid |
|
import gradio as gr |
|
import spaces |
|
import numpy as np |
|
|
|
from huggingface_hub import snapshot_download |
|
|
|
|
|
ProjectDir = os.path.abspath(os.path.dirname(__file__)) |
|
sys.path.insert(0, ProjectDir) |
|
sys.path.insert(0, os.path.join(ProjectDir, "MMCM")) |
|
sys.path.insert(0, os.path.join(ProjectDir, "diffusers/src")) |
|
sys.path.insert(0, os.path.join(ProjectDir, "controlnet_aux/src")) |
|
|
|
CheckpointsDir = os.path.join(ProjectDir, "checkpoints") |
|
ignore_video2video = True |
|
max_image_edge = 960 |
|
|
|
def download_model(): |
|
if not os.path.exists(CheckpointsDir): |
|
print("Checkpoint Not Downloaded, start downloading...") |
|
tic = time.time() |
|
snapshot_download( |
|
repo_id="TMElyralab/MuseV", |
|
local_dir=CheckpointsDir, |
|
max_workers=8, |
|
local_dir_use_symlinks=True, |
|
) |
|
toc = time.time() |
|
print(f"download cost {toc-tic} seconds") |
|
else: |
|
print("Already download the model.") |
|
|
|
|
|
print("Starting model download...") |
|
download_model() |
|
|
|
|
|
from gradio_text2video import online_t2v_inference |
|
|
|
@spaces.GPU(duration=180) |
|
def hf_online_t2v_inference( |
|
prompt, |
|
image_np, |
|
seed, |
|
fps, |
|
w, |
|
h, |
|
video_len, |
|
img_edge_ratio, |
|
): |
|
if not isinstance(image_np, np.ndarray): |
|
raise gr.Error("Need input reference image") |
|
return online_t2v_inference( |
|
prompt, image_np, seed, fps, w, h, video_len, img_edge_ratio |
|
) |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# MuseV Demo") |
|
|
|
with gr.Tab("Text to Video"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
prompt = gr.Textbox(label="Prompt") |
|
image = gr.Image(label="VisionCondImage") |
|
seed = gr.Number( |
|
label="Seed (seed=-1 means that the seeds run each time are different)", |
|
value=-1, |
|
) |
|
video_length = gr.Number( |
|
label="Video Length(need smaller than 144)", |
|
value=12, |
|
) |
|
fps = gr.Number(label="Generate Video FPS", value=6) |
|
with gr.Row(): |
|
w = gr.Number(label="Width", value=-1) |
|
h = gr.Number(label="Height", value=-1) |
|
img_edge_ratio = gr.Number(label="img_edge_ratio", value=1.0) |
|
btn = gr.Button("Generate") |
|
video_output = gr.Video() |
|
|
|
btn.click( |
|
fn=hf_online_t2v_inference, |
|
inputs=[ |
|
prompt, |
|
image, |
|
seed, |
|
fps, |
|
w, |
|
h, |
|
video_length, |
|
img_edge_ratio, |
|
], |
|
outputs=video_output, |
|
) |
|
|
|
with gr.Tab("Video to Video"): |
|
gr.Markdown( |
|
"Due to GPU limit, MuseVDemo now only support Text2Video. If you want to try Video2Video, please run it locally." |
|
) |
|
|
|
|
|
demo.queue().launch(server_name="0.0.0.0", server_port=7860) |
|
|