Spaces:

orderlymirror
/

Text-to-Video

Runtime error

App Files Files Community

Text-to-Video / app.py

Tennish

Update app.py

bc46fd5 verified 7 months ago

raw

history blame

3.54 kB

	import gradio as gr
	import torch
	import os
	import uuid

	from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler
	from diffusers.utils import export_to_video
	from huggingface_hub import hf_hub_download
	from safetensors.torch import load_file

	# Ensure GPU Availability
	if not torch.cuda.is_available():
	raise NotImplementedError("A GPU is required for this task.")

	device = "cuda"
	dtype = torch.float16

	# Base Model Paths
	BASE_MODELS = {
	"Realistic": "emilianJR/epiCRealism",
	"Cartoon": "frankjoshua/toonyou_beta6",
	"3D": "Lykon/DreamShaper",
	"Anime": "Yntec/mistoonAnime2",
	}

	# Initialize Pipeline
	print("Loading AnimateDiff pipeline...")
	base_model = "Realistic"
	pipe = AnimateDiffPipeline.from_pretrained(BASE_MODELS[base_model], torch_dtype=dtype).to(device)
	pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing", beta_schedule="linear")
	print("Pipeline loaded successfully.")

	# Video Generation Function
	def generate_video(prompt, base="Realistic", motion="", steps=8):
	global pipe
	print(f"Generating video: Prompt='{prompt}', Base='{base}', Steps='{steps}'")

	# Switch Base Model
	if base in BASE_MODELS:
	print(f"Loading base model: {base}")
	pipe = AnimateDiffPipeline.from_pretrained(BASE_MODELS[base], torch_dtype=dtype).to(device)

	# Set Inference Steps
	steps = int(steps)
	fps = 10 # Frames per second
	duration = 30 # Video duration in seconds
	total_frames = fps * duration # Total frames to generate

	# Generate Frames
	video_frames = []
	for i in range(total_frames):
	output = pipe(
	prompt=prompt,
	guidance_scale=1.2,
	num_inference_steps=steps
	)
	video_frames.extend(output.frames[0])

	# Export to Video
	name = str(uuid.uuid4()).replace("-", "")
	output_path = f"/tmp/{name}.mp4"
	export_to_video(video_frames, output_path, fps=fps)
	print(f"Video saved to {output_path}")
	return output_path

	# Gradio Interface
	with gr.Blocks() as demo:
	gr.HTML("<h1><center>30-Second Text-to-Video Generation</center></h1>")

	with gr.Row():
	prompt = gr.Textbox(label="Text Prompt", placeholder="Describe your scene...")

	with gr.Row():
	base_model = gr.Dropdown(
	label="Base Model",
	choices=["Realistic", "Cartoon", "3D", "Anime"],
	value="Realistic"
	)
	motion = gr.Dropdown(
	label="Motion Adapter",
	choices=[
	("None", ""),
	("Zoom In", "guoyww/animatediff-motion-lora-zoom-in"),
	("Zoom Out", "guoyww/animatediff-motion-lora-zoom-out"),
	("Tilt Up", "guoyww/animatediff-motion-lora-tilt-up"),
	("Tilt Down", "guoyww/animatediff-motion-lora-tilt-down"),
	("Pan Left", "guoyww/animatediff-motion-lora-pan-left"),
	("Pan Right", "guoyww/animatediff-motion-lora-pan-right"),
	],
	value=""
	)
	steps = gr.Dropdown(
	label="Inference Steps",
	choices=["4", "8", "12"],
	value="8"
	)

	with gr.Row():
	generate_button = gr.Button("Generate Video")

	video_output = gr.Video(label="Generated Video", autoplay=True, height=512, width=512)

	generate_button.click(
	fn=generate_video,
	inputs=[prompt, base_model, motion, steps],
	outputs=video_output
	)

	demo.launch()