Spaces:

fffiloni
/

svd_keyframe_interpolation

Runtime error

App Files Files Community

fffiloni commited on Sep 4, 2024

Commit

fdf117e

verified ·

1 Parent(s): f479bfc

Update gradio_app.py

Browse files

Files changed (1) hide show

gradio_app.py +41 -43

gradio_app.py CHANGED Viewed

@@ -2,6 +2,12 @@ import os
 import gradio as gr
 import torch
 from huggingface_hub import snapshot_download
 from diffusers.utils import load_image, export_to_video
 from diffusers import UNetSpatioTemporalConditionModel
 from custom_diffusers.pipelines.pipeline_frame_interpolation_with_noise_injection import FrameInterpolationWithNoiseInjectionPipeline
@@ -10,16 +16,8 @@ from attn_ctrl.attention_control import (AttentionStore,
                                          register_temporal_self_attention_control,
                                          register_temporal_self_attention_flip_control,
 )
-from torch.cuda.amp import autocast
-# Set up device
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# Download checkpoint
-snapshot_download(repo_id="fffiloni/svd_keyframe_interpolation", local_dir="checkpoints")
-checkpoint_dir = "checkpoints/svd_reverse_motion_with_attnflip"
-# Initialize pipeline
 pretrained_model_name_or_path = "stabilityai/stable-video-diffusion-img2vid-xt"
 noise_scheduler = EulerDiscreteScheduler.from_pretrained(pretrained_model_name_or_path, subfolder="scheduler")
@@ -31,14 +29,14 @@ pipe = FrameInterpolationWithNoiseInjectionPipeline.from_pretrained(
 )
 ref_unet = pipe.ori_unet
-# Compute delta w
 state_dict = pipe.unet.state_dict()
 finetuned_unet = UNetSpatioTemporalConditionModel.from_pretrained(
     checkpoint_dir,
     subfolder="unet",
     torch_dtype=torch.float16,
 )
-assert finetuned_unet.config.num_frames == 14
 ori_unet = UNetSpatioTemporalConditionModel.from_pretrained(
     "stabilityai/stable-video-diffusion-img2vid",
     subfolder="unet",
@@ -54,43 +52,43 @@ for name, param in finetuned_state_dict.items():
         state_dict[name] = state_dict[name] + delta_w
 pipe.unet.load_state_dict(state_dict)
-controller_ref = AttentionStore()
 register_temporal_self_attention_control(ref_unet, controller_ref)
 controller = AttentionStore()
 register_temporal_self_attention_flip_control(pipe.unet, controller, controller_ref)
-# Custom CUDA memory management function
-def cuda_memory_cleanup():
-    torch.cuda.empty_cache()
-    torch.cuda.ipc_collect()
 def check_outputs_folder(folder_path):
     if os.path.exists(folder_path) and os.path.isdir(folder_path):
         for filename in os.listdir(folder_path):
             file_path = os.path.join(folder_path, filename)
             try:
                 if os.path.isfile(file_path) or os.path.islink(file_path):
-                    os.unlink(file_path)
                 elif os.path.isdir(file_path):
-                    shutil.rmtree(file_path)
             except Exception as e:
                 print(f'Failed to delete {file_path}. Reason: {e}')
     else:
         print(f'The folder {folder_path} does not exist.')
-@torch.no_grad()
 def infer(frame1_path, frame2_path):
     seed = 42
     num_inference_steps = 10
     noise_injection_steps = 0
     noise_injection_ratio = 0.5
     weighted_average = False
-    decode_chunk_size = 8
     generator = torch.Generator(device)
     if seed is not None:
         generator = generator.manual_seed(seed)
     frame1 = load_image(frame1_path)
     frame1 = frame1.resize((512, 288))
@@ -98,33 +96,35 @@ def infer(frame1_path, frame2_path):
     frame2 = load_image(frame2_path)
     frame2 = frame2.resize((512, 288))
-    cuda_memory_cleanup()
-    with autocast():
-        frames = pipe(image1=frame1, image2=frame2,
-            num_inference_steps=num_inference_steps,
-            generator=generator,
-            weighted_average=weighted_average,
-            noise_injection_steps=noise_injection_steps,
-            noise_injection_ratio=noise_injection_ratio,
-            decode_chunk_size=decode_chunk_size
-        ).frames[0]
-    frames = [frame.cpu() for frame in frames]
     out_dir = "result"
     check_outputs_folder(out_dir)
     os.makedirs(out_dir, exist_ok=True)
     out_path = "result/video_result.gif"
     return "done"
-@torch.no_grad()
-def load_model():
-    global pipe
-    pipe = pipe.to(device)
 with gr.Blocks() as demo:
     with gr.Column():
         gr.Markdown("# Keyframe Interpolation with Stable Video Diffusion")
         with gr.Row():
@@ -136,12 +136,10 @@ with gr.Blocks() as demo:
                 output = gr.Textbox()
     submit_btn.click(
-        fn=infer,
-        inputs=[image_input1, image_input2],
-        outputs=[output],
-        show_api=False
     )
-    demo.load(load_model)
-demo.queue(max_size=1).launch(show_api=False, show_error=True)

 import gradio as gr
 import torch
 from huggingface_hub import snapshot_download
+# import argparse
+snapshot_download(repo_id="fffiloni/svd_keyframe_interpolation", local_dir="checkpoints")
+checkpoint_dir = "checkpoints/svd_reverse_motion_with_attnflip"
 from diffusers.utils import load_image, export_to_video
 from diffusers import UNetSpatioTemporalConditionModel
 from custom_diffusers.pipelines.pipeline_frame_interpolation_with_noise_injection import FrameInterpolationWithNoiseInjectionPipeline
                                          register_temporal_self_attention_control,
                                          register_temporal_self_attention_flip_control,
 )
 pretrained_model_name_or_path = "stabilityai/stable-video-diffusion-img2vid-xt"
 noise_scheduler = EulerDiscreteScheduler.from_pretrained(pretrained_model_name_or_path, subfolder="scheduler")
 )
 ref_unet = pipe.ori_unet
 state_dict = pipe.unet.state_dict()
+# computing delta w
 finetuned_unet = UNetSpatioTemporalConditionModel.from_pretrained(
     checkpoint_dir,
     subfolder="unet",
     torch_dtype=torch.float16,
 )
+assert finetuned_unet.config.num_frames==14
 ori_unet = UNetSpatioTemporalConditionModel.from_pretrained(
     "stabilityai/stable-video-diffusion-img2vid",
     subfolder="unet",
         state_dict[name] = state_dict[name] + delta_w
 pipe.unet.load_state_dict(state_dict)
+controller_ref= AttentionStore()
 register_temporal_self_attention_control(ref_unet, controller_ref)
 controller = AttentionStore()
 register_temporal_self_attention_flip_control(pipe.unet, controller, controller_ref)
+device = "cuda"
+pipe = pipe.to(device)
 def check_outputs_folder(folder_path):
+    # Check if the folder exists
     if os.path.exists(folder_path) and os.path.isdir(folder_path):
+        # Delete all contents inside the folder
         for filename in os.listdir(folder_path):
             file_path = os.path.join(folder_path, filename)
             try:
                 if os.path.isfile(file_path) or os.path.islink(file_path):
+                    os.unlink(file_path)  # Remove file or link
                 elif os.path.isdir(file_path):
+                    shutil.rmtree(file_path)  # Remove directory
             except Exception as e:
                 print(f'Failed to delete {file_path}. Reason: {e}')
     else:
         print(f'The folder {folder_path} does not exist.')
 def infer(frame1_path, frame2_path):
     seed = 42
     num_inference_steps = 10
     noise_injection_steps = 0
     noise_injection_ratio = 0.5
     weighted_average = False
     generator = torch.Generator(device)
     if seed is not None:
         generator = generator.manual_seed(seed)
     frame1 = load_image(frame1_path)
     frame1 = frame1.resize((512, 288))
     frame2 = load_image(frame2_path)
     frame2 = frame2.resize((512, 288))
+    torch.cuda.empty_cache()
+    frames = pipe(image1=frame1, image2=frame2,
+        num_inference_steps=num_inference_steps, # 50
+        generator=generator,
+        weighted_average=weighted_average, # True
+        noise_injection_steps=noise_injection_steps, # 0
+        noise_injection_ratio= noise_injection_ratio, # 0.5
+         decode_chunk_size=4
+    ).frames[0]
+    print(f"FRAMES: {frames}")
     out_dir = "result"
     check_outputs_folder(out_dir)
     os.makedirs(out_dir, exist_ok=True)
     out_path = "result/video_result.gif"
+    '''
+    if out_path.endswith('.gif'):
+        frames[0].save(out_path, save_all=True, append_images=frames[1:], duration=142, loop=0)
+    else:
+        export_to_video(frames, out_path, fps=7)
+    '''
     return "done"
 with gr.Blocks() as demo:
     with gr.Column():
         gr.Markdown("# Keyframe Interpolation with Stable Video Diffusion")
         with gr.Row():
                 output = gr.Textbox()
     submit_btn.click(
+        fn = infer,
+        inputs = [image_input1, image_input2],
+        outputs = [output],
+        show_api = False
     )
+demo.queue().launch(show_api=False, show_error=True)