Spaces:

joselobenitezg
/

sapiens-demo

Build error

App Files Files Community

joselobenitezg commited on Aug 27, 2024

Commit

9930f16

1 Parent(s): 2e49a94

add normal

Browse files

Files changed (2) hide show

app.py +5 -0
inference/normal.py +176 -0

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ import spaces
 from inference.seg import process_image_or_video as process_seg
 from inference.pose import process_image_or_video as process_pose
 from inference.depth import process_image_or_video as process_depth
 from config import SAPIENS_LITE_MODELS_PATH
 def update_model_choices(task):
@@ -24,6 +25,8 @@ def process_image(input_image, task, version):
         result = process_pose(input_image, task=task.lower(), version=version)
     elif task.lower() == 'depth':
         result = process_depth(input_image, task=task.lower(), version=version)
     else:
         result = None
         print(f"Tarea no soportada: {task}")
@@ -50,6 +53,8 @@ def process_video(input_video, task, version):
             processed_frame = process_pose(frame_rgb, task=task.lower(), version=version)
         elif task.lower() == 'depth':
             processed_frame = process_depth(frame_rgb, task=task.lower(), version=version)
         else:
             processed_frame = None
             print(f"Tarea no soportada: {task}")

 from inference.seg import process_image_or_video as process_seg
 from inference.pose import process_image_or_video as process_pose
 from inference.depth import process_image_or_video as process_depth
+from inference.normal import process_image_or_video as process_normal
 from config import SAPIENS_LITE_MODELS_PATH
 def update_model_choices(task):
         result = process_pose(input_image, task=task.lower(), version=version)
     elif task.lower() == 'depth':
         result = process_depth(input_image, task=task.lower(), version=version)
+    elif task.lower() == 'normal':
+        result = process_normal(input_image, task=task.lower(), version=version)
     else:
         result = None
         print(f"Tarea no soportada: {task}")
             processed_frame = process_pose(frame_rgb, task=task.lower(), version=version)
         elif task.lower() == 'depth':
             processed_frame = process_depth(frame_rgb, task=task.lower(), version=version)
+        elif task.lower() == 'normal':
+            processed_frame = process_normal(frame_rgb, task=task.lower(), version=version)
         else:
             processed_frame = None
             print(f"Tarea no soportada: {task}")

inference/normal.py CHANGED Viewed

	@@ -0,0 +1,176 @@

+# import torch
+# import torch.nn.functional as F
+# import numpy as np
+# import cv2
+# from PIL import Image
+# from config import SAPIENS_LITE_MODELS_PATH
+# # Example usage
+# TASK = 'normal'
+# VERSION = 'sapiens_0.3b'
+# model_path = get_model_path(TASK, VERSION)
+# print(model_path)
+# model = torch.jit.load(model_path)
+# model.eval()
+# model.to("cuda")
+# import torch
+# import torch.nn.functional as F
+# import numpy as np
+# import cv2
+# def get_normal(image, normal_model, input_shape=(3, 1024, 768), device="cuda"):
+#     # Preprocess the image
+#     img = preprocess_image(image, input_shape)
+#     # Run the model
+#     with torch.no_grad():
+#         result = normal_model(img.to(device))
+#     # Post-process the output
+#     normal_map = post_process_normal(result, (image.shape[0], image.shape[1]))
+#     # Visualize the normal map
+#     normal_image = visualize_normal(normal_map)
+#     return normal_image, normal_map
+# def preprocess_image(image, input_shape):
+#     img = cv2.resize(image, (input_shape[2], input_shape[1]), interpolation=cv2.INTER_LINEAR).transpose(2, 0, 1)
+#     img = torch.from_numpy(img)
+#     img = img[[2, 1, 0], ...].float()
+#     mean = torch.tensor([123.5, 116.5, 103.5]).view(-1, 1, 1)
+#     std = torch.tensor([58.5, 57.0, 57.5]).view(-1, 1, 1)
+#     img = (img - mean) / std
+#     return img.unsqueeze(0)
+# def post_process_normal(result, original_shape):
+#     # Check the dimensionality of the result
+#     if result.dim() == 3:
+#         result = result.unsqueeze(0)
+#     elif result.dim() == 4:
+#         pass
+#     else:
+#         raise ValueError(f"Unexpected result dimension: {result.dim()}")
+#     # Ensure we're interpolating to the correct dimensions
+#     seg_logits = F.interpolate(result, size=original_shape, mode="bilinear", align_corners=False).squeeze(0)
+#     normal_map = seg_logits.float().cpu().numpy().transpose(1, 2, 0)  # H x W x 3
+#     return normal_map
+# def visualize_normal(normal_map):
+#     normal_map_norm = np.linalg.norm(normal_map, axis=-1, keepdims=True)
+#     normal_map_normalized = normal_map / (normal_map_norm + 1e-5)  # Add a small epsilon to avoid division by zero
+#     # Convert to 0-255 range and BGR format for visualization
+#     normal_map_vis = ((normal_map_normalized + 1) / 2 * 255).astype(np.uint8)
+#     normal_map_vis = normal_map_vis[:, :, ::-1]  # RGB to BGR
+#     return normal_map_vis
+# def load_normal_model(checkpoint, use_torchscript=False):
+#     if use_torchscript:
+#         return torch.jit.load(checkpoint)
+#     else:
+#         model = torch.export.load(checkpoint).module()
+#         model = model.to("cuda")
+#         model = torch.compile(model, mode="max-autotune", fullgraph=True)
+#         return model
+# import cv2
+# import numpy as np
+# # Load the model
+# normal_model = load_normal_model(model_path, use_torchscript='_torchscript')
+# # Load the image
+# image = cv2.imread("/home/user/app/assets/image.webp")
+# # Get the normal map and visualization
+# normal_image, normal_map = get_normal(image, normal_model)
+# # Save the results
+# cv2.imwrite("output_normal_image.png", normal_image)
+import torch
+import torch.nn.functional as F
+import numpy as np
+import cv2
+from PIL import Image
+from config import SAPIENS_LITE_MODELS_PATH
+def load_model(task, version):
+    try:
+        model_path = SAPIENS_LITE_MODELS_PATH[task][version]
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        model = torch.jit.load(model_path)
+        model.eval()
+        model.to(device)
+        return model, device
+    except KeyError as e:
+        print(f"Error: Tarea o versión inválida. {e}")
+        return None, None
+def preprocess_image(image, input_shape):
+    img = cv2.resize(image, (input_shape[2], input_shape[1]), interpolation=cv2.INTER_LINEAR).transpose(2, 0, 1)
+    img = torch.from_numpy(img)
+    img = img[[2, 1, 0], ...].float()
+    mean = torch.tensor([123.5, 116.5, 103.5]).view(-1, 1, 1)
+    std = torch.tensor([58.5, 57.0, 57.5]).view(-1, 1, 1)
+    img = (img - mean) / std
+    return img.unsqueeze(0)
+def post_process_normal(result, original_shape):
+    if result.dim() == 3:
+        result = result.unsqueeze(0)
+    elif result.dim() == 4:
+        pass
+    else:
+        raise ValueError(f"Unexpected result dimension: {result.dim()}")
+    seg_logits = F.interpolate(result, size=original_shape, mode="bilinear", align_corners=False).squeeze(0)
+    normal_map = seg_logits.float().cpu().numpy().transpose(1, 2, 0)  # H x W x 3
+    return normal_map
+def visualize_normal(normal_map):
+    normal_map_norm = np.linalg.norm(normal_map, axis=-1, keepdims=True)
+    normal_map_normalized = normal_map / (normal_map_norm + 1e-5)  # Add a small epsilon to avoid division by zero
+    normal_map_vis = ((normal_map_normalized + 1) / 2 * 255).astype(np.uint8)
+    normal_map_vis = normal_map_vis[:, :, ::-1]  # RGB to BGR
+    return normal_map_vis
+def process_image_or_video(input_data, task='normal', version='sapiens_0.3b'):
+    model, device = load_model(task, version)
+    if model is None or device is None:
+        return None
+    input_shape = (3, 1024, 768)
+    def process_frame(frame):
+        if isinstance(frame, Image.Image):
+            frame = np.array(frame)
+        if frame.shape[2] == 4:  # RGBA
+            frame = cv2.cvtColor(frame, cv2.COLOR_RGBA2RGB)
+        img = preprocess_image(frame, input_shape)
+        with torch.no_grad():
+            result = model(img.to(device))
+        normal_map = post_process_normal(result, (frame.shape[0], frame.shape[1]))
+        normal_image = visualize_normal(normal_map)
+        return Image.fromarray(cv2.cvtColor(normal_image, cv2.COLOR_BGR2RGB))
+    if isinstance(input_data, np.ndarray):  # Video frame
+        return process_frame(input_data)
+    elif isinstance(input_data, Image.Image):  # Imagen
+        return process_frame(input_data)
+    else:
+        print("Tipo de entrada no soportado. Por favor, proporcione una imagen PIL o un frame de video numpy.")
+        return None