Spaces:

nateraw
/

stable-diffusion-for-videos

Runtime error

App Files Files Community

nateraw commited on Oct 17, 2022

Commit

07b4fcf

1 Parent(s): 7a4f2ae

Create new file

Browse files

Files changed (1) hide show

app.py +153 -0

app.py ADDED Viewed

	@@ -0,0 +1,153 @@

+import time
+from pathlib import Path
+import gradio as gr
+from stable_diffusion_videos import StableDiffusionWalkPipeline, generate_images
+from diffusers.schedulers import LMSDiscreteScheduler
+import torch
+class ImageGenerationInterface:
+    def __init__(self, pipeline):
+        self.pipeline = pipeline
+        self.interface_images = gr.Interface(
+            self.fn,
+            inputs=[
+                gr.Textbox("blueberry spaghetti", label='Prompt'),
+                gr.Slider(1, 24, 16, step=1, label='Batch size'),
+                gr.Slider(1, 16, 1, step=1, label='# Batches'),
+                gr.Slider(10, 100, 50, step=1, label='# Inference Steps'),
+                gr.Slider(5.0, 15.0, 7.5, step=0.5, label='Guidance Scale'),
+                gr.Slider(512, 1024, 512, step=64, label='Height'),
+                gr.Slider(512, 1024, 512, step=64, label='Width'),
+                gr.Checkbox(False, label='Upsample'),
+                gr.Textbox("nateraw/stable-diffusion-gallery", label='(Optional) Repo ID'),
+                gr.Checkbox(False, label='Push to Hub'),
+                gr.Checkbox(False, label='Private'),
+                gr.Textbox("./images", label='Output directory'),
+            ],
+            outputs=gr.Gallery(),
+        )
+        self.interface_videos = gr.Interface(
+            self.fn_videos,
+            inputs=[
+                gr.Textbox("blueberry spaghetti\nstrawberry spaghetti", lines=2, label='Prompts, separated by new line'),
+                gr.Textbox("42\n1337", lines=2, label='Seeds, separated by new line'),
+                gr.Textbox("25\n27", lines=2, label='Audio Offsets (seconds in song), separated by new line'),
+                gr.Audio(type="filepath"),
+                gr.Slider(3, 60, 5, step=1, label='FPS'),
+                gr.Slider(1, 24, 16, step=1, label='Batch size'),
+                gr.Slider(10, 100, 50, step=1, label='# Inference Steps'),
+                gr.Slider(5.0, 15.0, 7.5, step=0.5, label='Guidance Scale'),
+                gr.Slider(512, 1024, 512, step=64, label='Height'),
+                gr.Slider(512, 1024, 512, step=64, label='Width'),
+                gr.Checkbox(False, label='Upsample'),
+            ],
+            outputs=gr.Video(),
+        )
+        self.interface = gr.TabbedInterface(
+            [self.interface_images, self.interface_videos],
+            ['Images!', 'Videos!'],
+        )
+    def fn_videos(
+        self,
+        prompts,
+        seeds,
+        audio_offsets,
+        audio_filepath,
+        fps,
+        batch_size,
+        num_inference_steps,
+        guidance_scale,
+        height,
+        width,
+        upsample,
+    ):
+        prompts = [x.strip() for x in prompts.split('\n')]
+        seeds = [int(x.strip()) for x in seeds.split('\n')]
+        audio_offsets = [float(x.strip()) for x in audio_offsets.split('\n')]
+        num_interpolation_steps = [(b-a) * fps for a, b in zip(audio_offsets, audio_offsets[1:])]
+        return self.pipeline.walk(
+            prompts=prompts,
+            seeds=seeds,
+            num_interpolation_steps=num_interpolation_steps,
+            audio_filepath=audio_filepath,
+            audio_start_sec=audio_offsets[0],
+            fps=fps,
+            height=height,
+            width=width,
+            output_dir='dreams',
+            guidance_scale=guidance_scale,
+            num_inference_steps=num_inference_steps,
+            upsample=upsample,
+            batch_size=batch_size
+        )
+    def fn(
+        self,
+        prompt,
+        batch_size,
+        num_batches,
+        num_inference_steps,
+        guidance_scale,
+        height,
+        width,
+        upsample,
+        repo_id,
+        push_to_hub,
+        private,
+        output_dir,
+    ):
+        output_path = Path(output_dir)
+        name = time.strftime("%Y%m%d-%H%M%S")
+        save_path = output_path / name
+        image_filepaths = generate_images(
+            self.pipeline,
+            prompt,
+            batch_size=batch_size,
+            num_batches=num_batches,
+            num_inference_steps=num_inference_steps,
+            guidance_scale=guidance_scale,
+            output_dir=output_dir,
+            name=name,
+            image_file_ext='.jpg',
+            upsample=upsample,
+            height=height,
+            width=width,
+            push_to_hub=push_to_hub,
+            repo_id=repo_id,
+            private=private,
+            create_pr=False,
+        )
+        return [(x, Path(x).stem) for x in sorted(image_filepaths)]
+    def launch(self, *args, **kwargs):
+        self.interface.launch(*args, **kwargs)
+def main(
+    model_id: str = "CompVis/stable-diffusion-v1-4",
+    tiled=False,
+    disable_safety_checker=False,
+):
+    safety_checker_kwargs = {'safety_checker': None} if disable_safety_checker else {}
+    pipeline = StableDiffusionWalkPipeline.from_pretrained(
+        model_id,
+        revision="fp16",
+        torch_dtype=torch.float16,
+        scheduler=LMSDiscreteScheduler(
+            beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear"
+        ),
+        tiled=tiled,
+        **safety_checker_kwargs
+    ).to("cuda")
+    ImageGenerationInterface(pipeline).launch(debug=True)
+if __name__ == '__main__':
+    import fire
+    fire.Fire(main)