cybermark's picture
Video Py
6b357fe
raw
history blame
3.7 kB
#!/usr/bin/env python
from __future__ import annotations
import os
import random
import tempfile
import gradio as gr
import imageio
import numpy as np
import torch
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
MAX_NUM_FRAMES = int(os.getenv('MAX_NUM_FRAMES', '200'))
DEFAULT_NUM_FRAMES = min(MAX_NUM_FRAMES,
int(os.getenv('DEFAULT_NUM_FRAMES', '16')))
pipe = DiffusionPipeline.from_pretrained('damo-vilab/text-to-video-ms-1.7b',
torch_dtype=torch.float16,
variant='fp16')
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe.enable_model_cpu_offload()
pipe.enable_vae_slicing()
def to_video(frames: list[np.ndarray], fps: int) -> str:
out_file = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
writer = imageio.get_writer(out_file.name, format='FFMPEG', fps=fps)
for frame in frames:
writer.append_data(frame)
writer.close()
return out_file.name
def generate(prompt: str, seed: int, num_frames: int,
num_inference_steps: int) -> str:
if seed == -1:
seed = random.randint(0, 1000000)
generator = torch.Generator().manual_seed(seed)
frames = pipe(prompt,
num_inference_steps=num_inference_steps,
num_frames=num_frames,
generator=generator).frames
return to_video(frames, 8)
examples = [
['An astronaut riding a horse.', 0, 16, 25],
['A panda eating bamboo on a rock.', 0, 16, 25],
['Spiderman is surfing.', 0, 16, 25],
]
with gr.Blocks(css='style.css') as demo:
gr.Markdown(DESCRIPTION)
with gr.Group():
with gr.Box():
with gr.Row(elem_id='prompt-container').style(equal_height=True):
prompt = gr.Text(
label='Prompt',
show_label=False,
max_lines=1,
placeholder='Enter your prompt',
elem_id='prompt-text-input').style(container=False)
run_button = gr.Button('Generate video').style(
full_width=False)
result = gr.Video(label='Result', show_label=False, elem_id='gallery')
with gr.Accordion('Advanced options', open=False):
seed = gr.Slider(
label='Seed',
minimum=-1,
maximum=1000000,
step=1,
value=-1,
info='If set to -1, a different seed will be used each time.')
num_frames = gr.Slider(
label='Number of frames',
minimum=16,
maximum=MAX_NUM_FRAMES,
step=1,
value=16,
info=
'Note that the content of the video also changes when you change the number of frames.'
)
num_inference_steps = gr.Slider(label='Number of inference steps',
minimum=10,
maximum=50,
step=1,
value=25)
inputs = [
prompt,
seed,
num_frames,
num_inference_steps,
]
gr.Examples(examples=examples,
inputs=inputs,
outputs=result,
fn=generate,
cache_examples=os.getenv('SYSTEM') == 'spaces')
prompt.submit(fn=generate, inputs=inputs, outputs=result)
run_button.click(fn=generate, inputs=inputs, outputs=result)
demo.queue(api_open=False, max_size=15).launch()