Spaces:

fbnnb
/

ToonCrafter_Sketch

Runtime error

App Files Files Community

fbnnb commited on Dec 5, 2024

Commit

f3e748e

verified ·

1 Parent(s): caea331

Update scripts/gradio/i2v_test_application.py

Browse files

Files changed (1) hide show

scripts/gradio/i2v_test_application.py +11 -6

scripts/gradio/i2v_test_application.py CHANGED Viewed

@@ -30,7 +30,8 @@ def extract_frames(video_path):
         # フレームをリストに追加
         frame_list.append(frame)
         frame_num += 1
     # 動画ファイルを閉じる
     cap.release()
@@ -80,12 +81,15 @@ class Image2Video():
     @spaces.GPU(duration=100)
     def get_image(self, image, prompt, steps=50, cfg_scale=7.5, eta=1.0, fs=3, seed=123, image2=None, frame_guides=None,control_scale=0.6):
         control_frames = extract_frames(frame_guides)
         seed_everything(seed)
         transform = transforms.Compose([
             transforms.Resize(min(self.resolution)),
             transforms.CenterCrop(self.resolution),
             ])
         torch.cuda.empty_cache()
         print('start:', prompt, time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
         start = time.time()
@@ -103,7 +107,7 @@ class Image2Video():
         # text cond
         with torch.no_grad(), torch.cuda.amp.autocast():
             text_emb = model.get_learned_conditioning([prompt])
             #control cond
             if frame_guides is not None:
                 cn_videos = []
@@ -129,7 +133,7 @@ class Image2Video():
             else:
                 cn_videos = None
             # img cond
             img_tensor = torch.from_numpy(image).permute(2, 0, 1).float().to(model.device)
@@ -137,7 +141,7 @@ class Image2Video():
             image_tensor_resized = transform(img_tensor) #3,h,w
             videos = image_tensor_resized.unsqueeze(0).unsqueeze(2) # bc1hw
             # z = get_latent_z(model, videos) #bc,1,hw
             videos = repeat(videos, 'b c t h w -> b c (repeat t) h w', repeat=frames//2)
@@ -156,7 +160,7 @@ class Image2Video():
             img_tensor_repeat[:,:,:1,:,:] = z[:,:,:1,:,:]
             img_tensor_repeat[:,:,-1:,:,:] = z[:,:,-1:,:,:]
             cond_images = model.embedder(img_tensor.unsqueeze(0)) ## blc
             img_emb = model.image_proj_model(cond_images)
@@ -164,7 +168,8 @@ class Image2Video():
             fs = torch.tensor([fs], dtype=torch.long, device=model.device)
             cond = {"c_crossattn": [imtext_cond], "fs": fs, "c_concat": [img_tensor_repeat], "control_cond": cn_videos}
             ## inference
             batch_samples = batch_ddim_sampling(model, cond, noise_shape, n_samples=1, ddim_steps=steps, ddim_eta=eta, cfg_scale=cfg_scale, hs=hs)

         # フレームをリストに追加
         frame_list.append(frame)
         frame_num += 1
+    print("load video length:", len(frame_list))
     # 動画ファイルを閉じる
     cap.release()
     @spaces.GPU(duration=100)
     def get_image(self, image, prompt, steps=50, cfg_scale=7.5, eta=1.0, fs=3, seed=123, image2=None, frame_guides=None,control_scale=0.6):
+        print("enter fn")
         control_frames = extract_frames(frame_guides)
+        print("extract frames")
         seed_everything(seed)
         transform = transforms.Compose([
             transforms.Resize(min(self.resolution)),
             transforms.CenterCrop(self.resolution),
             ])
+        print("before empty cache")
         torch.cuda.empty_cache()
         print('start:', prompt, time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
         start = time.time()
         # text cond
         with torch.no_grad(), torch.cuda.amp.autocast():
             text_emb = model.get_learned_conditioning([prompt])
+            print("before control")
             #control cond
             if frame_guides is not None:
                 cn_videos = []
             else:
                 cn_videos = None
+            print("image cond")
             # img cond
             img_tensor = torch.from_numpy(image).permute(2, 0, 1).float().to(model.device)
             image_tensor_resized = transform(img_tensor) #3,h,w
             videos = image_tensor_resized.unsqueeze(0).unsqueeze(2) # bc1hw
+            print("get latent z")
             # z = get_latent_z(model, videos) #bc,1,hw
             videos = repeat(videos, 'b c t h w -> b c (repeat t) h w', repeat=frames//2)
             img_tensor_repeat[:,:,:1,:,:] = z[:,:,:1,:,:]
             img_tensor_repeat[:,:,-1:,:,:] = z[:,:,-1:,:,:]
+            print("image embedder")
             cond_images = model.embedder(img_tensor.unsqueeze(0)) ## blc
             img_emb = model.image_proj_model(cond_images)
             fs = torch.tensor([fs], dtype=torch.long, device=model.device)
             cond = {"c_crossattn": [imtext_cond], "fs": fs, "c_concat": [img_tensor_repeat], "control_cond": cn_videos}
+            print("before sample loop")
             ## inference
             batch_samples = batch_ddim_sampling(model, cond, noise_shape, n_samples=1, ddim_steps=steps, ddim_eta=eta, cfg_scale=cfg_scale, hs=hs)