import gradio as gr import os import spaces import sys from copy import deepcopy sys.path.append('./VADER-VideoCrafter/scripts/main') sys.path.append('./VADER-VideoCrafter/scripts') sys.path.append('./VADER-VideoCrafter') from train_t2v_lora import main_fn, setup_model examples = [ ["Fairy and Magical Flowers: A fairy tends to enchanted, glowing flowers.", 'huggingface-hps-aesthetic', 8, 901, 384, 512, 12.0, 25, 1.0, 24, 10], ["A cat playing an electric guitar in a loft with industrial-style decor and soft, multicolored lights.", 'huggingface-hps-aesthetic', 8, 208, 384, 512, 12.0, 25, 1.0, 24, 10], ["A raccoon playing a guitar under a blossoming cherry tree.", 'huggingface-hps-aesthetic', 8, 180, 384, 512, 12.0, 25, 1.0, 24, 10], ["A raccoon playing an electric bass in a garage band setting.", 'huggingface-hps-aesthetic', 8, 400, 384, 512, 12.0, 25, 1.0, 24, 10], ["A talking bird with shimmering feathers and a melodious voice finds a legendary treasure, guiding through enchanted forests, ancient ruins, and mystical challenges.", "huggingface-pickscore", 16, 200, 384, 512, 12.0, 25, 1.0, 24, 10], ["A snow princess stands on the balcony of her ice castle, her hair adorned with delicate snowflakes, overlooking her serene realm.", "huggingface-pickscore", 16, 400, 384, 512, 12.0, 25, 1.0, 24, 10], ["A mermaid with flowing hair and a shimmering tail discovers a hidden underwater kingdom adorned with coral palaces, glowing pearls, and schools of colorful fish, encountering both wonders and dangers along the way.", "huggingface-pickscore", 16, 800, 384, 512, 12.0, 25, 1.0, 24, 10], ] model = setup_model() @spaces.GPU(duration=180) def gradio_main_fn(prompt, lora_model, lora_rank, seed, height, width, unconditional_guidance_scale, ddim_steps, ddim_eta, frames, savefps): global model if model is None: return "Model is not loaded. Please load the model first." video_path = main_fn(prompt=prompt, lora_model=lora_model, lora_rank=int(lora_rank), seed=int(seed), height=int(height), width=int(width), unconditional_guidance_scale=float(unconditional_guidance_scale), ddim_steps=int(ddim_steps), ddim_eta=float(ddim_eta), frames=int(frames), savefps=int(savefps), model=deepcopy(model)) return video_path def reset_fn(): return ("A brown dog eagerly eats from a bowl in a kitchen.", 200, 384, 512, 12.0, 25, 1.0, 24, 16, 10, "huggingface-pickscore") def update_lora_rank(lora_model): if lora_model == "huggingface-pickscore": return gr.update(value=16) elif lora_model == "huggingface-hps-aesthetic": return gr.update(value=8) else: # "Base Model" return gr.update(value=8) def update_dropdown(lora_rank): if lora_rank == 16: return gr.update(value="huggingface-pickscore") elif lora_rank == 8: return gr.update(value="huggingface-hps-aesthetic") else: # 0 return gr.update(value="Base Model") custom_css = """ #centered { display: flex; justify-content: center; width: 60%; margin: 0 auto; } .column-centered { display: flex; flex-direction: column; align-items: center; width: 60%; } #image-upload { flex-grow: 1; } #params .tabs { display: flex; flex-direction: column; flex-grow: 1; } #params .tabitem[style="display: block;"] { flex-grow: 1; display: flex !important; } #params .gap { flex-grow: 1; } #params .form { flex-grow: 1 !important; } #params .form > :last-child{ flex-grow: 1; } """ with gr.Blocks(css=custom_css) as demo: with gr.Row(): with gr.Column(): gr.HTML( """ <h1 style='text-align: center; font-size: 3.2em; margin-bottom: 0.5em; font-family: Arial, sans-serif; margin: 20px;'> Video Diffusion Alignment via Reward Gradient </h1> """ ) gr.HTML( """ <style> body { font-family: Arial, sans-serif; text-align: center; margin: 50px; } a { text-decoration: none !important; color: black !important; } </style> <body> <div style="font-size: 1.4em; margin-bottom: 0.5em; "> <a href="https://mihirp1998.github.io">Mihir Prabhudesai</a><sup>*</sup> <a href="https://russellmendonca.github.io/">Russell Mendonca</a><sup>*</sup> <a href="mailto: zheyangqin.qzy@gmail.com">Zheyang Qin</a><sup>*</sup> <a href="https://www.cs.cmu.edu/~katef/">Katerina Fragkiadaki</a><sup></sup> <a href="https://www.cs.cmu.edu/~dpathak/">Deepak Pathak</a><sup></sup> </div> <div style="font-size: 1.3em; font-style: italic;"> Carnegie Mellon University </div> </body> """ ) gr.HTML( """ <head> <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0-beta3/css/all.min.css"> <style> .button-container { display: flex; justify-content: center; gap: 10px; margin-top: 10px; } .button-container a { display: inline-flex; align-items: center; padding: 10px 20px; border-radius: 30px; border: 1px solid #ccc; text-decoration: none; color: #333 !important; font-size: 16px; text-decoration: none !important; } .button-container a i { margin-right: 8px; } </style> </head> <div class="button-container"> <a href="https://arxiv.org/abs/2407.08737" class="btn btn-outline-primary"> <i class="fa-solid fa-file-pdf"></i> Paper </a> <a href="https://vader-vid.github.io/" class="btn btn-outline-danger"> <i class="fa-solid fa-video"></i> Website <a href="https://github.com/mihirp1998/VADER" class="btn btn-outline-secondary"> <i class="fa-brands fa-github"></i> Code </a> </div> """ ) with gr.Row(elem_id="centered"): with gr.Column(elem_id="params"): lora_model = gr.Dropdown( label="VADER Model", choices=["huggingface-pickscore", "huggingface-hps-aesthetic"], value="huggingface-pickscore" ) lora_rank = gr.Slider(minimum=8, maximum=16, label="LoRA Rank", step = 8, value=16) prompt = gr.Textbox(placeholder="Enter prompt text here", lines=4, label="Text Prompt", value="A brown dog eagerly eats from a bowl in a kitchen.") run_btn = gr.Button("Run Inference") with gr.Column(): output_video = gr.Video(elem_id="image-upload") with gr.Row(elem_id="centered"): with gr.Column(): seed = gr.Slider(minimum=0, maximum=65536, label="Seed", step = 1, value=200) with gr.Row(): height = gr.Slider(minimum=0, maximum=512, label="Height", step = 16, value=384) width = gr.Slider(minimum=0, maximum=512, label="Width", step = 16, value=512) with gr.Row(): frames = gr.Slider(minimum=0, maximum=50, label="Frames", step = 1, value=24) savefps = gr.Slider(minimum=0, maximum=30, label="Save FPS", step = 1, value=10) with gr.Row(): DDIM_Steps = gr.Slider(minimum=0, maximum=50, label="DDIM Steps", step = 1, value=25) unconditional_guidance_scale = gr.Slider(minimum=0, maximum=50, label="Guidance Scale", step = 0.1, value=12.0) DDIM_Eta = gr.Slider(minimum=0, maximum=1, label="DDIM Eta", step = 0.01, value=1.0) # reset button reset_btn = gr.Button("Reset") reset_btn.click(fn=reset_fn, outputs=[prompt, seed, height, width, unconditional_guidance_scale, DDIM_Steps, DDIM_Eta, frames, lora_rank, savefps, lora_model]) run_btn.click(fn=gradio_main_fn, inputs=[prompt, lora_model, lora_rank, seed, height, width, unconditional_guidance_scale, DDIM_Steps, DDIM_Eta, frames, savefps], outputs=output_video ) lora_model.change(fn=update_lora_rank, inputs=lora_model, outputs=lora_rank) lora_rank.change(fn=update_dropdown, inputs=lora_rank, outputs=lora_model) gr.Examples(examples=examples, inputs=[prompt, lora_model, lora_rank, seed, height, width, unconditional_guidance_scale, DDIM_Steps, DDIM_Eta, frames, savefps], outputs=output_video, fn=gradio_main_fn, run_on_click=False, cache_examples="lazy", ) demo.launch(share=True)