Spaces:

rerun
/

Vista

Vista / app.py

Leonard Bruns

Add Vista example

d323598 4 months ago

4.23 kB

	"""Gradio interface for Vista model."""
	from __future__ import annotations

	import glob
	import os
	import queue
	import threading

	import gradio as gr
	import gradio_rerun
	import rerun as rr
	import spaces

	import vista


	@spaces.GPU(duration=400)
	@rr.thread_local_stream("Vista")
	def generate_gradio(
	first_frame_file_name: str,
	n_rounds: float=3,
	n_steps: float=10,
	height=576,
	width=1024,
	n_frames=25,
	cfg_scale=2.5,
	cond_aug=0.0,
	):
	global model

	n_rounds = int(n_rounds)
	n_steps = int(n_steps)

	# Use a queue to log immediately from internals
	log_queue = queue.SimpleQueue()

	stream = rr.binary_stream()

	blueprint = vista.generate_blueprint(n_rounds)
	rr.send_blueprint(blueprint)
	yield stream.read()

	handle = threading.Thread(
	target=vista.run_sampling,
	args=[
	log_queue,
	first_frame_file_name,
	height,
	width,
	n_rounds,
	n_frames,
	n_steps,
	cfg_scale,
	cond_aug,
	model,
	],
	)
	handle.start()
	while True:
	msg = log_queue.get()
	if msg == "done":
	break
	else:
	entity_path, entity, times = msg
	rr.reset_time()
	for timeline, time in times:
	if isinstance(time, int):
	rr.set_time_sequence(timeline, time)
	else:
	rr.set_time_seconds(timeline, time)
	rr.log(entity_path, entity)
	yield stream.read()
	handle.join()


	model = vista.create_model()

	with gr.Blocks(css="style.css") as demo:
	gr.Markdown(
	"""
	# Vista: A Generalizable Driving World Model with High Fidelity and Versatile Controllability

	[Shenyuan Gao](https://github.com/Little-Podi), [Jiazhi Yang](https://scholar.google.com/citations?user=Ju7nGX8AAAAJ&hl=en), [Li Chen](https://scholar.google.com/citations?user=ulZxvY0AAAAJ&hl=en), [Kashyap Chitta](https://kashyap7x.github.io/), [Yihang Qiu](https://scholar.google.com/citations?user=qgRUOdIAAAAJ&hl=en), [Andreas Geiger](https://www.cvlibs.net/), [Jun Zhang](https://eejzhang.people.ust.hk/), [Hongyang Li](https://lihongyang.info/)

	This is a demo of the [Vista model](https://github.com/OpenDriveLab/Vista), a driving world model that can be used to simulate a variety of driving scenarios. This demo uses [Rerun](https://rerun.io/)'s custom [gradio component](https://www.gradio.app/custom-components/gallery?id=radames%2Fgradio_rerun) to livestream the model's output and show intermediate results.

	[📜technical report](https://arxiv.org/abs/2405.17398), [🎬video demos](https://vista-demo.github.io/), [🤗model weights](https://huggingface.co/OpenDriveLab/Vista)

	Note that the GPU time is limited to 400 seconds per run. If you need more time, you can run the model locally or on your own server.
	"""
	)
	first_frame = gr.Image(sources="upload", type="filepath")
	example_dir_path = os.path.join(os.path.dirname(__file__), "example_images")
	example_file_paths = sorted(glob.glob(os.path.join(example_dir_path, ".")))
	example_gallery = gr.Examples(
	examples=example_file_paths,
	inputs=first_frame,
	cache_examples=False,
	)

	btn = gr.Button("Generate video")
	num_rounds = gr.Slider(
	label="Segments",
	info="Number of 25 frame segments to generate. Higher values lead to longer videos. Try to keep the product of segments and steps below 30 to avoid running out of time.",
	minimum=1,
	maximum=5,
	value=2,
	step=1
	)
	num_steps = gr.Slider(
	label="Diffusion Steps",
	info="Number of diffusion steps per segment. Higher values lead to more detailed videos. Try to keep the product of segments and steps below 30 to avoid running out of time.",
	minimum=1,
	maximum=50,
	value=15,
	step=1
	)

	with gr.Row():
	viewer = gradio_rerun.Rerun(streaming=True)
	btn.click(
	generate_gradio,
	inputs=[first_frame, num_rounds, num_steps],
	outputs=[viewer],
	)

	demo.launch()