File size: 3,191 Bytes
06e9d12 8dc54dd 06e9d12 8dc54dd 06e9d12 8dc54dd d299ce8 8dc54dd 06e9d12 8dc54dd 06e9d12 8dc54dd 06e9d12 8dc54dd 06e9d12 8dc54dd 06e9d12 8dc54dd 06e9d12 8dc54dd 06e9d12 8dc54dd 06e9d12 8dc54dd 06e9d12 8dc54dd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import os
import time
import sys
import cuid
import gradio as gr
import spaces
import numpy as np
from huggingface_hub import snapshot_download
# Add necessary paths
ProjectDir = os.path.abspath(os.path.dirname(__file__))
sys.path.insert(0, ProjectDir)
sys.path.insert(0, os.path.join(ProjectDir, "MMCM"))
sys.path.insert(0, os.path.join(ProjectDir, "diffusers/src"))
sys.path.insert(0, os.path.join(ProjectDir, "controlnet_aux/src"))
CheckpointsDir = os.path.join(ProjectDir, "checkpoints")
ignore_video2video = True
max_image_edge = 960
def download_model():
if not os.path.exists(CheckpointsDir):
print("Checkpoint Not Downloaded, start downloading...")
tic = time.time()
snapshot_download(
repo_id="TMElyralab/MuseV",
local_dir=CheckpointsDir,
max_workers=8,
local_dir_use_symlinks=True,
)
toc = time.time()
print(f"download cost {toc-tic} seconds")
else:
print("Already download the model.")
# Download model first
print("Starting model download...")
download_model()
# Import after model download to ensure all dependencies are ready
from gradio_text2video import online_t2v_inference
@spaces.GPU(duration=180)
def hf_online_t2v_inference(
prompt,
image_np,
seed,
fps,
w,
h,
video_len,
img_edge_ratio,
):
if not isinstance(image_np, np.ndarray): # None
raise gr.Error("Need input reference image")
return online_t2v_inference(
prompt, image_np, seed, fps, w, h, video_len, img_edge_ratio
)
# Create Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# MuseV Demo")
with gr.Tab("Text to Video"):
with gr.Row():
with gr.Column():
prompt = gr.Textbox(label="Prompt")
image = gr.Image(label="VisionCondImage")
seed = gr.Number(
label="Seed (seed=-1 means that the seeds run each time are different)",
value=-1,
)
video_length = gr.Number(
label="Video Length(need smaller than 144)",
value=12,
)
fps = gr.Number(label="Generate Video FPS", value=6)
with gr.Row():
w = gr.Number(label="Width", value=-1)
h = gr.Number(label="Height", value=-1)
img_edge_ratio = gr.Number(label="img_edge_ratio", value=1.0)
btn = gr.Button("Generate")
video_output = gr.Video()
btn.click(
fn=hf_online_t2v_inference,
inputs=[
prompt,
image,
seed,
fps,
w,
h,
video_length,
img_edge_ratio,
],
outputs=video_output,
)
with gr.Tab("Video to Video"):
gr.Markdown(
"Due to GPU limit, MuseVDemo now only support Text2Video. If you want to try Video2Video, please run it locally."
)
# Launch the app
demo.queue().launch(server_name="0.0.0.0", server_port=7860)
|