Spaces:

jmanhype
/

MuseV

Runtime error

File size: 3,191 Bytes

06e9d12
 
8dc54dd
06e9d12
 
 
 
 
 
 
 
8dc54dd
06e9d12
 
8dc54dd
 
 
d299ce8
8dc54dd
06e9d12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8dc54dd
 
 
06e9d12
8dc54dd
06e9d12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8dc54dd
 
 
 
06e9d12
 
 
 
 
 
 
 
 
 
8dc54dd
06e9d12
 
 
 
 
 
 
8dc54dd
 
 
 
06e9d12
 
 
 
 
 
 
 
 
8dc54dd
06e9d12
8dc54dd
06e9d12
 
 
8dc54dd
 
 
06e9d12
8dc54dd

import os
import time
import sys

import cuid
import gradio as gr
import spaces
import numpy as np

from huggingface_hub import snapshot_download

# Add necessary paths
ProjectDir = os.path.abspath(os.path.dirname(__file__))
sys.path.insert(0, ProjectDir)
sys.path.insert(0, os.path.join(ProjectDir, "MMCM"))
sys.path.insert(0, os.path.join(ProjectDir, "diffusers/src"))
sys.path.insert(0, os.path.join(ProjectDir, "controlnet_aux/src"))

CheckpointsDir = os.path.join(ProjectDir, "checkpoints")
ignore_video2video = True
max_image_edge = 960

def download_model():
    if not os.path.exists(CheckpointsDir):
        print("Checkpoint Not Downloaded, start downloading...")
        tic = time.time()
        snapshot_download(
            repo_id="TMElyralab/MuseV",
            local_dir=CheckpointsDir,
            max_workers=8,
            local_dir_use_symlinks=True,
        )
        toc = time.time()
        print(f"download cost {toc-tic} seconds")
    else:
        print("Already download the model.")

# Download model first
print("Starting model download...")
download_model()

# Import after model download to ensure all dependencies are ready
from gradio_text2video import online_t2v_inference

@spaces.GPU(duration=180)
def hf_online_t2v_inference(
    prompt,
    image_np,
    seed,
    fps,
    w,
    h,
    video_len,
    img_edge_ratio,
):
    if not isinstance(image_np, np.ndarray):  # None
        raise gr.Error("Need input reference image")
    return online_t2v_inference(
        prompt, image_np, seed, fps, w, h, video_len, img_edge_ratio
    )

# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# MuseV Demo")
    
    with gr.Tab("Text to Video"):
        with gr.Row():
            with gr.Column():
                prompt = gr.Textbox(label="Prompt")
                image = gr.Image(label="VisionCondImage")
                seed = gr.Number(
                    label="Seed (seed=-1 means that the seeds run each time are different)",
                    value=-1,
                )
                video_length = gr.Number(
                    label="Video Length(need smaller than 144)",
                    value=12,
                )
                fps = gr.Number(label="Generate Video FPS", value=6)
                with gr.Row():
                    w = gr.Number(label="Width", value=-1)
                    h = gr.Number(label="Height", value=-1)
                    img_edge_ratio = gr.Number(label="img_edge_ratio", value=1.0)
                btn = gr.Button("Generate")
            video_output = gr.Video()
            
        btn.click(
            fn=hf_online_t2v_inference,
            inputs=[
                prompt,
                image,
                seed,
                fps,
                w,
                h,
                video_length,
                img_edge_ratio,
            ],
            outputs=video_output,
        )

    with gr.Tab("Video to Video"):
        gr.Markdown(
            "Due to GPU limit, MuseVDemo now only support Text2Video. If you want to try Video2Video, please run it locally."
        )

# Launch the app
demo.queue().launch(server_name="0.0.0.0", server_port=7860)