Diffutoon-ExVideo

Paused

App Files Files Community

Diffutoon-ExVideo / app.py

vilarin

Update app.py

80eaab0 verified about 1 year ago

raw

history blame

9.81 kB

	import subprocess
	subprocess.run(
	'pip install numpy==1.26.4',
	shell=True
	)

	import os
	import gradio as gr
	import torch
	import spaces
	import random
	from PIL import Image
	import numpy as np

	from glob import glob
	from pathlib import Path
	from typing import Optional

	#Core functions from https://github.com/modelscope/DiffSynth-Studio
	from diffsynth import save_video, ModelManager, SVDVideoPipeline
	from diffsynth import SDVideoPipeline, ControlNetConfigUnit, VideoData, save_frames
	from diffsynth.extensions.RIFE import RIFESmoother

	import requests


	def download_model(url, file_path):
	model_file = requests.get(url, allow_redirects=True)
	with open(file_path, "wb") as f:
	f.write(model_file.content)

	download_model("https://civitai.com/api/download/models/266360?type=Model&format=SafeTensor&size=pruned&fp=fp16", "models/stable_diffusion/flat2DAnimerge_v45Sharp.safetensors")
	download_model("https://huggingface.co/guoyww/animatediff/resolve/main/mm_sd_v15_v2.ckpt", "models/AnimateDiff/mm_sd_v15_v2.ckpt")
	download_model("https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_lineart.pth", "models/ControlNet/control_v11p_sd15_lineart.pth")
	download_model("https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11f1e_sd15_tile.pth", "models/ControlNet/control_v11f1e_sd15_tile.pth")
	download_model("https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model.pth", "models/Annotators/sk_model.pth")
	download_model("https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model2.pth", "models/Annotators/sk_model2.pth")
	download_model("https://civitai.com/api/download/models/25820?type=Model&format=PickleTensor&size=full&fp=fp16", "models/textual_inversion/verybadimagenegative_v1.3.pt")

	HF_TOKEN = os.environ.get("HF_TOKEN", None)
	# Constants
	MAX_SEED = np.iinfo(np.int32).max
	CSS = """
	footer {
	visibility: hidden;
	}
	"""

	JS = """function () {
	gradioURL = window.location.href
	if (!gradioURL.endsWith('?__theme=dark')) {
	window.location.replace(gradioURL + '?__theme=dark');
	}
	}"""




	# Ensure model and scheduler are initialized in GPU-enabled function
	if torch.cuda.is_available():
	model_manager = ModelManager(
	torch_dtype=torch.float16,
	device="cuda",
	model_id_list=["stable-video-diffusion-img2vid-xt", "ExVideo-SVD-128f-v1"],
	downloading_priority=["HuggingFace"])
	pipe = SVDVideoPipeline.from_model_manager(model_manager)


	model_manager2 = ModelManager(torch_dtype=torch.float16, device="cuda")
	model_manager2.load_textual_inversions("models/textual_inversion")
	model_manager2.load_models([
	"models/stable_diffusion/flat2DAnimerge_v45Sharp.safetensors",
	"models/AnimateDiff/mm_sd_v15_v2.ckpt",
	"models/ControlNet/control_v11p_sd15_lineart.pth",
	"models/ControlNet/control_v11f1e_sd15_tile.pth",
	"models/RIFE/flownet.pkl"
	])
	pipe2 = SDVideoPipeline.from_model_manager(
	model_manager2,
	[
	ControlNetConfigUnit(
	processor_id="lineart",
	model_path="models/ControlNet/control_v11p_sd15_lineart.pth",
	scale=0.5
	),
	ControlNetConfigUnit(
	processor_id="tile",
	model_path="models/ControlNet/control_v11f1e_sd15_tile.pth",
	scale=0.5
	)
	]
	)
	smoother = RIFESmoother.from_model_manager(model_manager2)


	def change_media(image_in, video_in, selected):
	if selected == "ExVideo":
	return gr.update(visible=True), gr.update(visible=False), image_in, gr.update(visible=False)
	elif selected == "Diffutoon":
	return gr.update(visible=False), gr.update(visible=True), video_in, gr.update(visible=True)



	@spaces.GPU(duration=120)
	def generate(
	media,
	selected,
	seed: Optional[int] = -1,
	num_inference_steps: int = 5,
	animatediff_batch_size: int = 32,
	animatediff_stride: int = 16,
	motion_bucket_id: int = 127,
	fps_id: int = 25,
	num_frames: int = 50,
	prompt: str = "best quality",
	output_folder: str = "outputs",
	progress=gr.Progress(track_tqdm=True)):

	print(media)

	if seed == -1:
	seed = random.randint(0, MAX_SEED)

	torch.manual_seed(seed)

	os.makedirs(output_folder, exist_ok=True)
	base_count = len(glob(os.path.join(output_folder, "*.mp4")))
	video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")

	if selected == "ExVideo":
	image = Image.open(media)
	video = pipe(
	input_image=image.resize((512, 512)),
	num_frames=num_frames,
	fps=fps_id,
	height=512,
	width=512,
	motion_bucket_id=motion_bucket_id,
	num_inference_steps=num_inference_steps,
	min_cfg_scale=2,
	max_cfg_scale=2,
	contrast_enhance_scale=1.2
	)
	model_manager.to("cpu")
	elif selected == "Diffutoon":
	up_video = VideoData(
	video_file=media,
	height=512, width=512)
	input_video = [up_video[i] for i in range(1, 30)]

	video = pipe2(
	prompt=prompt,
	negative_prompt="verybadimagenegative_v1.3",
	cfg_scale=3,
	clip_skip=2,
	controlnet_frames=input_video, num_frames=len(input_video),
	num_inference_steps=num_inference_steps,
	height=512,
	width=512,
	animatediff_batch_size=animatediff_batch_size,
	animatediff_stride=animatediff_stride,
	vram_limit_level=0,
	)
	video = smoother(video)


	save_video(video, video_path, fps=fps_id)

	return video_path, seed


	examples = [
	['./walking.mp4', "A woman walking on the street", "Diffutoon"],
	['./smilegirl.mp4', "A girl stand on the grass", "Diffutoon"],
	['./working.mp4', "A woman is doing the dishes", "Diffutoon"],
	["./train.jpg", "", "ExVideo"],
	["./girl.webp", "", "ExVideo"],
	["./robo.jpg", "", "ExVideo"],
	]



	# Gradio Interface

	with gr.Blocks(css=CSS, js=JS, theme="soft") as demo:
	gr.HTML("<h1><center>Exvideo📽️Diffutoon</center></h1>")
	gr.HTML("<p><center>Exvideo and Diffutoon video generation<br><b>Update</b>: first version<br><b>Note</b>: ZeroGPU limited, Set the parameters appropriately.</center></p>")
	with gr.Row():
	video_in = gr.Video(label='Upload Video', height=600, scale=2)
	image_in = gr.Image(label='Upload Image', height=600, scale=2, image_mode="RGB", type="filepath", visible=False)
	media = video_in
	video = gr.Video(label="Generated Video", height=600, scale=2)
	with gr.Column(scale=1):
	selected = gr.Radio(
	label="Select App",
	choices=["ExVideo", "Diffutoon"],
	value="Diffutoon"
	)
	seed = gr.Slider(
	label="Seed (-1 Random)",
	minimum=-1,
	maximum=MAX_SEED,
	step=1,
	value=-1,
	)
	num_inference_steps = gr.Slider(
	label="Inference steps",
	info="Inference steps",
	step=1,
	value=5,
	minimum=1,
	maximum=50
	)
	with gr.Accordion("Diffutoon Options", open=False):
	animatediff_batch_size = gr.Slider(
	label="Animatediff batch size",
	minimum=1,
	maximum=50,
	step=1,
	value=32,
	)
	animatediff_stride = gr.Slider(
	label="Animatediff stride",
	minimum=1,
	maximum=50,
	step=1,
	value=16,
	)
	with gr.Accordion("ExVideo Options", open=False):
	motion_bucket_id = gr.Slider(
	label="Motion bucket id",
	info="Controls how much motion to add/remove from the image",
	value=127,
	step=1,
	minimum=1,
	maximum=255
	)
	fps_id = gr.Slider(
	label="Frames per second",
	info="The length of your video in seconds will be 25/fps",
	value=6,
	step=1,
	minimum=5,
	maximum=30
	)
	num_frames = gr.Slider(
	label="Frames num",
	info="Frames num",
	step=1,
	value=50,
	minimum=1,
	maximum=128
	)
	prompt = gr.Textbox(label="Prompt")
	with gr.Row():
	submit_btn = gr.Button(value="Generate")
	#stop_btn = gr.Button(value="Stop", variant="stop")
	clear_btn = gr.ClearButton([media, seed, video])

	gr.Examples(
	examples=examples,
	inputs=[media, prompt, selected],
	outputs=[video, seed],
	fn=generate
	cache_examples="lazy",
	examples_per_page=4,
	)
	selected.change(change_media, inputs=[image_in, video_in, selected], outputs=[image_in, video_in, media, prompt])
	submit_event = submit_btn.click(fn=generate, inputs=[media, selected, seed, num_inference_steps, animatediff_batch_size, animatediff_stride, motion_bucket_id, fps_id, num_frames, prompt], outputs=[video, seed], api_name="video")
	#stop_btn.click(fn=None, inputs=None, outputs=None, cancels=[submit_event])

	demo.queue().launch()