MuseV / scripts /gradio /app_gradio_space.py
jmanhype
Update Dockerfile and app for Hugging Face Space deployment
8dc54dd
import os
import time
import sys
import cuid
import gradio as gr
import spaces
import numpy as np
from huggingface_hub import snapshot_download
# Add necessary paths
ProjectDir = os.path.abspath(os.path.dirname(__file__))
sys.path.insert(0, ProjectDir)
sys.path.insert(0, os.path.join(ProjectDir, "MMCM"))
sys.path.insert(0, os.path.join(ProjectDir, "diffusers/src"))
sys.path.insert(0, os.path.join(ProjectDir, "controlnet_aux/src"))
CheckpointsDir = os.path.join(ProjectDir, "checkpoints")
ignore_video2video = True
max_image_edge = 960
def download_model():
if not os.path.exists(CheckpointsDir):
print("Checkpoint Not Downloaded, start downloading...")
tic = time.time()
snapshot_download(
repo_id="TMElyralab/MuseV",
local_dir=CheckpointsDir,
max_workers=8,
local_dir_use_symlinks=True,
)
toc = time.time()
print(f"download cost {toc-tic} seconds")
else:
print("Already download the model.")
# Download model first
print("Starting model download...")
download_model()
# Import after model download to ensure all dependencies are ready
from gradio_text2video import online_t2v_inference
@spaces.GPU(duration=180)
def hf_online_t2v_inference(
prompt,
image_np,
seed,
fps,
w,
h,
video_len,
img_edge_ratio,
):
if not isinstance(image_np, np.ndarray): # None
raise gr.Error("Need input reference image")
return online_t2v_inference(
prompt, image_np, seed, fps, w, h, video_len, img_edge_ratio
)
# Create Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# MuseV Demo")
with gr.Tab("Text to Video"):
with gr.Row():
with gr.Column():
prompt = gr.Textbox(label="Prompt")
image = gr.Image(label="VisionCondImage")
seed = gr.Number(
label="Seed (seed=-1 means that the seeds run each time are different)",
value=-1,
)
video_length = gr.Number(
label="Video Length(need smaller than 144)",
value=12,
)
fps = gr.Number(label="Generate Video FPS", value=6)
with gr.Row():
w = gr.Number(label="Width", value=-1)
h = gr.Number(label="Height", value=-1)
img_edge_ratio = gr.Number(label="img_edge_ratio", value=1.0)
btn = gr.Button("Generate")
video_output = gr.Video()
btn.click(
fn=hf_online_t2v_inference,
inputs=[
prompt,
image,
seed,
fps,
w,
h,
video_length,
img_edge_ratio,
],
outputs=video_output,
)
with gr.Tab("Video to Video"):
gr.Markdown(
"Due to GPU limit, MuseVDemo now only support Text2Video. If you want to try Video2Video, please run it locally."
)
# Launch the app
demo.queue().launch(server_name="0.0.0.0", server_port=7860)