Spaces:

anshuln
/

peekaboo-demo

Runtime error

App Files Files Community

anshuln commited on Feb 12, 2024

Commit

2d30cbe

verified ·

1 Parent(s): 759e47c

Delete src/app.py

Browse files

Files changed (1) hide show

src/app.py +0 -222

src/app.py DELETED Viewed

@@ -1,222 +0,0 @@
-import gradio as gr
-import os
-import numpy as np
-from gradio_utils import *
-def image_mod(image):
-    return image.rotate(45)
-import os
-import sys
-sys.path.insert(1, os.path.join(sys.path[0], '..'))
-import cv2
-import numpy as np
-import torch
-import torch.nn.functional as F
-from models.pipelines import TextToVideoSDPipelineSpatialAware
-NUM_POINTS = 3
-NUM_FRAMES = 24
-LARGE_BOX_SIZE = 256
-def generate_video(pipe, overall_prompt, latents, get_latents=False, num_frames=24, num_inference_steps=50, fg_masks=None,
-        fg_masked_latents=None, frozen_steps=0, frozen_prompt=None, custom_attention_mask=None, fg_prompt=None):
-    video_frames = pipe(overall_prompt, num_frames=num_frames, latents=latents, num_inference_steps=num_inference_steps, frozen_mask=fg_masks,
-    frozen_steps=frozen_steps, latents_all_input=fg_masked_latents, frozen_prompt=frozen_prompt, custom_attention_mask=custom_attention_mask, fg_prompt=fg_prompt,
-    make_attention_mask_2d=True, attention_mask_block_diagonal=True, height=320, width=576 ).frames
-    if get_latents:
-        video_latents = pipe(overall_prompt, num_frames=num_frames, latents=latents, num_inference_steps=num_inference_steps, output_type="latent").frames
-        return video_frames, video_latents
-    return video_frames
-# def generate_bb(prompt, fg_object, aspect_ratio, size, trajectory):
-#     if len(trajectory['layers']) < NUM_POINTS:
-#       raise ValueError
-#     final_canvas = torch.zeros((NUM_FRAMES,320,576))
-#     bbox_size_x = LARGE_BOX_SIZE if size == "large" else int(LARGE_BOX_SIZE * 0.75) if size == "medium" else LARGE_BOX_SIZE//2
-#     bbox_size_y = bbox_size_x if aspect_ratio == "square" else int(bbox_size_x * 0.75) if aspect_ratio == "horizontal" else int(bbox_size_x * 1.25)
-#     bbox_coords = []
-#     # TODO add checks for trajectory
-#     for t in trajectory['layers']:
-#         bbox_coords.append([int(t.sum(axis=-2).argmax()*576/800), int(t.sum(axis=-1)[140:460].argmax())])
-#     bbox_coords = np.array(bbox_coords)
-#     # Make a list of length 24
-#     # Each element is a list of length 2
-#     # First element is the x coordinate of the bbox
-#     # Second element is a set of y coordinates of the bbox
-#     new_bbox_coords = [np.zeros(2,) for i in range(NUM_FRAMES)]
-#     divisor = int(NUM_FRAMES / (NUM_POINTS-1))
-#     for i in range(NUM_POINTS-1):
-#         new_bbox_coords[i*divisor] = bbox_coords[i]
-#     new_bbox_coords[-1] = bbox_coords[-1]
-#     # Linearly interpolate in the middle
-#     for i in range(NUM_POINTS-1):
-#         for j in range(1,divisor):
-#             new_bbox_coords[i*divisor+j][1] = int((bbox_coords[i][0] * (divisor-j) + bbox_coords[(i+1)][0] * j) / divisor)
-#             new_bbox_coords[i*divisor+j][0] = int((bbox_coords[i][1] * (divisor-j) + bbox_coords[(i+1)][1] * j) / divisor)
-#     for i in range(NUM_FRAMES):
-#         x = int(new_bbox_coords[i][0])
-#         y = int(new_bbox_coords[i][1])
-#         final_canvas[i,int(x-bbox_size_x/2):int(x+bbox_size_x/2), int(y-bbox_size_y/2):int(y+bbox_size_y/2)] = 1
-#     torch_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-#     try:
-#         pipe = TextToVideoSDPipelineSpatialAware.from_pretrained(
-#             "cerspense/zeroscope_v2_576w", torch_dtype=torch.float, variant="fp32").to(torch_device)
-#     except:
-#         pipe = TextToVideoSDPipelineSpatialAware.from_pretrained(
-#             "cerspense/zeroscope_v2_576w", torch_dtype=torch.float, variant="fp32").to(torch_device)
-#     fg_masks = F.interpolate(final_canvas.unsqueeze(1), size=(40,72), mode="nearest").to(torch_device)
-#     # Save fg_masks as images
-#     for i in range(NUM_FRAMES):
-#         cv2.imwrite(f"./fg_masks/frame_{i:04d}.png", fg_masks[i,0].cpu().numpy()*255)
-#     seed = 2
-#     random_latents = torch.randn([1, 4, NUM_FRAMES, 40, 72], generator=torch.Generator().manual_seed(seed)).to(torch_device)
-#     overall_prompt = f"A realistic lively {prompt}"
-#     video_frames = generate_video(pipe, overall_prompt, random_latents, get_latents=False, num_frames=NUM_FRAMES, num_inference_steps=40,
-#                         fg_masks=fg_masks, fg_masked_latents=None, frozen_steps=2, frozen_prompt=None, fg_prompt=fg_object)
-#     return create_video(video_frames,fps=8, type="final")
-def interpolate_points(points, target_length):
-    print(points)
-    if len(points) == target_length:
-        return points
-    elif len(points) > target_length:
-        # Subsample the points uniformly
-        indices = np.round(np.linspace(0, len(points) - 1, target_length)).astype(int)
-        return [points[i] for i in indices]
-    else:
-        # Linearly interpolate to get more points
-        interpolated_points = []
-        num_points_to_add = target_length - len(points)
-        points_added_per_segment = num_points_to_add // (len(points) - 1)
-        for i in range(len(points) - 1):
-            start, end = points[i], points[i + 1]
-            interpolated_points.append(start)
-            for j in range(1, points_added_per_segment + 1):
-                fraction = j / (points_added_per_segment + 1)
-                new_point = np.round(start + fraction * (end - start))
-                interpolated_points.append(new_point)
-        # Add the last point
-        interpolated_points.append(points[-1])
-        # If there are still not enough points, add extras at the end
-        while len(interpolated_points) < target_length:
-            interpolated_points.append(points[-1])
-        return interpolated_points
-torch_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-try:
-    pipe = TextToVideoSDPipelineSpatialAware.from_pretrained(
-        "cerspense/zeroscope_v2_576w", torch_dtype=torch.float, variant="fp32").to(torch_device)
-except:
-    pipe = TextToVideoSDPipelineSpatialAware.from_pretrained(
-        "cerspense/zeroscope_v2_576w", torch_dtype=torch.float, variant="fp32").to(torch_device)
-def generate_bb(prompt, fg_object, aspect_ratio, size, motion_direction, trajectory):
-    # if len(trajectory['layers']) < NUM_POINTS:
-    #   raise ValueError
-    final_canvas = torch.zeros((NUM_FRAMES,320//8,576//8))
-    bbox_size_x = LARGE_BOX_SIZE if size == "large" else int(LARGE_BOX_SIZE * 0.75) if size == "medium" else LARGE_BOX_SIZE//2
-    bbox_size_y = bbox_size_x if aspect_ratio == "square" else int(bbox_size_x * 1.33) if aspect_ratio == "horizontal" else int(bbox_size_x * 0.75)
-    bbox_coords = []
-    image = trajectory['composite']
-    print(image.shape)
-    image = cv2.resize(image,(576, 320))
-    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-    _, thresh = cv2.threshold(gray, 30, 255, cv2.THRESH_BINARY_INV)
-    contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
-    # Process each contour
-    bbox_points = []
-    for contour in contours:
-        # You can approximate the contour to reduce the number of points
-        epsilon = 0.01 * cv2.arcLength(contour, True)
-        approx = cv2.approxPolyDP(contour, epsilon, True)
-        # Extracting and printing coordinates
-        for point in approx:
-            y, x = point.ravel()
-            if x in range(1,319) and y in range(1,575):
-              bbox_points.append([x,y])
-    if motion_direction in ['l2r', 'r2l']:
-      sorted_points = sorted(bbox_points, key=lambda x: x[1], reverse=motion_direction=="r2l")
-    else:
-      sorted_points = sorted(bbox_points, key=lambda x: x[0], reverse=motion_direction=="d2u")
-    target_length = 24
-    final_points = interpolate_points(np.array(sorted_points), target_length)
-    # Remember to reverse the co-ordinates
-    for i in range(NUM_FRAMES):
-      x = int(final_points[i][0])
-      y = int(final_points[i][1])
-      # Added Padding
-      final_canvas[i, max(int(x-bbox_size_x/2),16) // 8:min(int(x+bbox_size_x/2), 304)// 8,
-                    max(int(y-bbox_size_y/2),16)// 8:min(int(y+bbox_size_y/2),560)// 8] = 1
-    torch_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    fg_masks = final_canvas.unsqueeze(1).to(torch_device)
-#     # Save fg_masks as images
-    for i in range(NUM_FRAMES):
-        cv2.imwrite(f"./fg_masks/frame_{i:04d}.png", fg_masks[i,0].cpu().numpy()*255)
-    seed = 2
-    random_latents = torch.randn([1, 4, NUM_FRAMES, 40, 72], generator=torch.Generator().manual_seed(seed)).to(torch_device)
-    overall_prompt = f"A realistic lively {prompt}"
-    video_frames = generate_video(pipe, overall_prompt, random_latents, get_latents=False, num_frames=NUM_FRAMES, num_inference_steps=40,
-                        fg_masks=fg_masks, fg_masked_latents=None, frozen_steps=2, frozen_prompt=None, fg_prompt=fg_object)
-    return create_video(video_frames,fps=8, type="final")
-demo = gr.Interface(
-    fn=generate_bb,
-    inputs=["text", "text", gr.Radio(choices=["square", "horizontal", "vertical"]), gr.Radio(choices=["small", "medium", "large"]), gr.Radio(choices=["l2r", "r2l", "u2d", "d2u"]),
-            gr.Paint(value={'background':np.zeros((320,576)), 'layers': [], 'composite': np.zeros((320,576))},type="numpy", image_mode="RGB", height=320, width=576)],
-    outputs=gr.Video(),
-)
-if __name__ == "__main__":
-    demo.launch(share=True)