vilarin's picture
Update app.py
80eaab0 verified
raw
history blame
9.81 kB
import subprocess
subprocess.run(
'pip install numpy==1.26.4',
shell=True
)
import os
import gradio as gr
import torch
import spaces
import random
from PIL import Image
import numpy as np
from glob import glob
from pathlib import Path
from typing import Optional
#Core functions from https://github.com/modelscope/DiffSynth-Studio
from diffsynth import save_video, ModelManager, SVDVideoPipeline
from diffsynth import SDVideoPipeline, ControlNetConfigUnit, VideoData, save_frames
from diffsynth.extensions.RIFE import RIFESmoother
import requests
def download_model(url, file_path):
model_file = requests.get(url, allow_redirects=True)
with open(file_path, "wb") as f:
f.write(model_file.content)
download_model("https://civitai.com/api/download/models/266360?type=Model&format=SafeTensor&size=pruned&fp=fp16", "models/stable_diffusion/flat2DAnimerge_v45Sharp.safetensors")
download_model("https://huggingface.co/guoyww/animatediff/resolve/main/mm_sd_v15_v2.ckpt", "models/AnimateDiff/mm_sd_v15_v2.ckpt")
download_model("https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_lineart.pth", "models/ControlNet/control_v11p_sd15_lineart.pth")
download_model("https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11f1e_sd15_tile.pth", "models/ControlNet/control_v11f1e_sd15_tile.pth")
download_model("https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model.pth", "models/Annotators/sk_model.pth")
download_model("https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model2.pth", "models/Annotators/sk_model2.pth")
download_model("https://civitai.com/api/download/models/25820?type=Model&format=PickleTensor&size=full&fp=fp16", "models/textual_inversion/verybadimagenegative_v1.3.pt")
HF_TOKEN = os.environ.get("HF_TOKEN", None)
# Constants
MAX_SEED = np.iinfo(np.int32).max
CSS = """
footer {
visibility: hidden;
}
"""
JS = """function () {
gradioURL = window.location.href
if (!gradioURL.endsWith('?__theme=dark')) {
window.location.replace(gradioURL + '?__theme=dark');
}
}"""
# Ensure model and scheduler are initialized in GPU-enabled function
if torch.cuda.is_available():
model_manager = ModelManager(
torch_dtype=torch.float16,
device="cuda",
model_id_list=["stable-video-diffusion-img2vid-xt", "ExVideo-SVD-128f-v1"],
downloading_priority=["HuggingFace"])
pipe = SVDVideoPipeline.from_model_manager(model_manager)
model_manager2 = ModelManager(torch_dtype=torch.float16, device="cuda")
model_manager2.load_textual_inversions("models/textual_inversion")
model_manager2.load_models([
"models/stable_diffusion/flat2DAnimerge_v45Sharp.safetensors",
"models/AnimateDiff/mm_sd_v15_v2.ckpt",
"models/ControlNet/control_v11p_sd15_lineart.pth",
"models/ControlNet/control_v11f1e_sd15_tile.pth",
"models/RIFE/flownet.pkl"
])
pipe2 = SDVideoPipeline.from_model_manager(
model_manager2,
[
ControlNetConfigUnit(
processor_id="lineart",
model_path="models/ControlNet/control_v11p_sd15_lineart.pth",
scale=0.5
),
ControlNetConfigUnit(
processor_id="tile",
model_path="models/ControlNet/control_v11f1e_sd15_tile.pth",
scale=0.5
)
]
)
smoother = RIFESmoother.from_model_manager(model_manager2)
def change_media(image_in, video_in, selected):
if selected == "ExVideo":
return gr.update(visible=True), gr.update(visible=False), image_in, gr.update(visible=False)
elif selected == "Diffutoon":
return gr.update(visible=False), gr.update(visible=True), video_in, gr.update(visible=True)
@spaces.GPU(duration=120)
def generate(
media,
selected,
seed: Optional[int] = -1,
num_inference_steps: int = 5,
animatediff_batch_size: int = 32,
animatediff_stride: int = 16,
motion_bucket_id: int = 127,
fps_id: int = 25,
num_frames: int = 50,
prompt: str = "best quality",
output_folder: str = "outputs",
progress=gr.Progress(track_tqdm=True)):
print(media)
if seed == -1:
seed = random.randint(0, MAX_SEED)
torch.manual_seed(seed)
os.makedirs(output_folder, exist_ok=True)
base_count = len(glob(os.path.join(output_folder, "*.mp4")))
video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
if selected == "ExVideo":
image = Image.open(media)
video = pipe(
input_image=image.resize((512, 512)),
num_frames=num_frames,
fps=fps_id,
height=512,
width=512,
motion_bucket_id=motion_bucket_id,
num_inference_steps=num_inference_steps,
min_cfg_scale=2,
max_cfg_scale=2,
contrast_enhance_scale=1.2
)
model_manager.to("cpu")
elif selected == "Diffutoon":
up_video = VideoData(
video_file=media,
height=512, width=512)
input_video = [up_video[i] for i in range(1, 30)]
video = pipe2(
prompt=prompt,
negative_prompt="verybadimagenegative_v1.3",
cfg_scale=3,
clip_skip=2,
controlnet_frames=input_video, num_frames=len(input_video),
num_inference_steps=num_inference_steps,
height=512,
width=512,
animatediff_batch_size=animatediff_batch_size,
animatediff_stride=animatediff_stride,
vram_limit_level=0,
)
video = smoother(video)
save_video(video, video_path, fps=fps_id)
return video_path, seed
examples = [
['./walking.mp4', "A woman walking on the street", "Diffutoon"],
['./smilegirl.mp4', "A girl stand on the grass", "Diffutoon"],
['./working.mp4', "A woman is doing the dishes", "Diffutoon"],
["./train.jpg", "", "ExVideo"],
["./girl.webp", "", "ExVideo"],
["./robo.jpg", "", "ExVideo"],
]
# Gradio Interface
with gr.Blocks(css=CSS, js=JS, theme="soft") as demo:
gr.HTML("<h1><center>Exvideo📽️Diffutoon</center></h1>")
gr.HTML("<p><center>Exvideo and Diffutoon video generation<br><b>Update</b>: first version<br><b>Note</b>: ZeroGPU limited, Set the parameters appropriately.</center></p>")
with gr.Row():
video_in = gr.Video(label='Upload Video', height=600, scale=2)
image_in = gr.Image(label='Upload Image', height=600, scale=2, image_mode="RGB", type="filepath", visible=False)
media = video_in
video = gr.Video(label="Generated Video", height=600, scale=2)
with gr.Column(scale=1):
selected = gr.Radio(
label="Select App",
choices=["ExVideo", "Diffutoon"],
value="Diffutoon"
)
seed = gr.Slider(
label="Seed (-1 Random)",
minimum=-1,
maximum=MAX_SEED,
step=1,
value=-1,
)
num_inference_steps = gr.Slider(
label="Inference steps",
info="Inference steps",
step=1,
value=5,
minimum=1,
maximum=50
)
with gr.Accordion("Diffutoon Options", open=False):
animatediff_batch_size = gr.Slider(
label="Animatediff batch size",
minimum=1,
maximum=50,
step=1,
value=32,
)
animatediff_stride = gr.Slider(
label="Animatediff stride",
minimum=1,
maximum=50,
step=1,
value=16,
)
with gr.Accordion("ExVideo Options", open=False):
motion_bucket_id = gr.Slider(
label="Motion bucket id",
info="Controls how much motion to add/remove from the image",
value=127,
step=1,
minimum=1,
maximum=255
)
fps_id = gr.Slider(
label="Frames per second",
info="The length of your video in seconds will be 25/fps",
value=6,
step=1,
minimum=5,
maximum=30
)
num_frames = gr.Slider(
label="Frames num",
info="Frames num",
step=1,
value=50,
minimum=1,
maximum=128
)
prompt = gr.Textbox(label="Prompt")
with gr.Row():
submit_btn = gr.Button(value="Generate")
#stop_btn = gr.Button(value="Stop", variant="stop")
clear_btn = gr.ClearButton([media, seed, video])
gr.Examples(
examples=examples,
inputs=[media, prompt, selected],
outputs=[video, seed],
fn=generate
cache_examples="lazy",
examples_per_page=4,
)
selected.change(change_media, inputs=[image_in, video_in, selected], outputs=[image_in, video_in, media, prompt])
submit_event = submit_btn.click(fn=generate, inputs=[media, selected, seed, num_inference_steps, animatediff_batch_size, animatediff_stride, motion_bucket_id, fps_id, num_frames, prompt], outputs=[video, seed], api_name="video")
#stop_btn.click(fn=None, inputs=None, outputs=None, cancels=[submit_event])
demo.queue().launch()