|
|
|
|
|
import io |
|
from collections import defaultdict |
|
from typing import Tuple, List |
|
|
|
import cv2 |
|
import numpy as np |
|
from PIL import Image as PImage |
|
from hoho.color_mappings import gestalt_color_mapping |
|
from hoho.read_write_colmap import read_cameras_binary, read_images_binary, read_points3D_binary |
|
from scipy.spatial import KDTree |
|
from scipy.spatial.distance import cdist |
|
|
|
apex_color = gestalt_color_mapping["apex"] |
|
eave_end_point = gestalt_color_mapping["eave_end_point"] |
|
flashing_end_point = gestalt_color_mapping["flashing_end_point"] |
|
|
|
apex_color, eave_end_point, flashing_end_point = [np.array(i) for i in [apex_color, eave_end_point, flashing_end_point]] |
|
unclassified = np.array([(215, 62, 138)]) |
|
line_classes = ['eave', 'ridge', 'rake', 'valley'] |
|
|
|
|
|
def empty_solution(): |
|
'''Return a minimal valid solution, i.e. 2 vertices and 1 edge.''' |
|
return np.zeros((2, 3)), [(0, 1)] |
|
|
|
|
|
def undesired_objects(image): |
|
image = image.astype('uint8') |
|
nb_components, output, stats, centroids = cv2.connectedComponentsWithStats(image, connectivity=8) |
|
sizes = stats[:, -1] |
|
max_label = 1 |
|
max_size = sizes[1] |
|
for i in range(2, nb_components): |
|
if sizes[i] > max_size: |
|
max_label = i |
|
max_size = sizes[i] |
|
|
|
img2 = np.zeros(output.shape) |
|
img2[output == max_label] = 1 |
|
return img2 |
|
|
|
|
|
def clean_image(image_gestalt) -> np.ndarray: |
|
|
|
image_gestalt = np.array(image_gestalt) |
|
unclassified_mask = cv2.inRange(image_gestalt, unclassified + 0.0, unclassified + 0.8) |
|
unclassified_mask = cv2.bitwise_not(unclassified_mask) |
|
mask = undesired_objects(unclassified_mask).astype(np.uint8) |
|
mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, np.ones((11, 11), np.uint8), iterations=11) |
|
mask = cv2.morphologyEx(mask, cv2.MORPH_DILATE, np.ones((11, 11), np.uint8), iterations=2) |
|
|
|
image_gestalt[:, :, 0] *= mask |
|
image_gestalt[:, :, 1] *= mask |
|
image_gestalt[:, :, 2] *= mask |
|
return image_gestalt |
|
|
|
|
|
def get_vertices(image_gestalt, *, color_range=4., dialations=3, erosions=1, kernel_size=13): |
|
|
|
apex_mask = cv2.inRange(image_gestalt, apex_color - color_range, apex_color + color_range) |
|
eave_end_point_mask = cv2.inRange(image_gestalt, eave_end_point - color_range, eave_end_point + color_range) |
|
flashing_end_point_mask = cv2.inRange(image_gestalt, flashing_end_point - color_range, |
|
flashing_end_point + color_range) |
|
eave_end_point_mask = cv2.bitwise_or(eave_end_point_mask, flashing_end_point_mask) |
|
|
|
kernel = np.ones((kernel_size, kernel_size), np.uint8) |
|
|
|
apex_mask = cv2.morphologyEx(apex_mask, cv2.MORPH_DILATE, kernel, iterations=dialations) |
|
apex_mask = cv2.morphologyEx(apex_mask, cv2.MORPH_ERODE, kernel, iterations=erosions) |
|
|
|
eave_end_point_mask = cv2.morphologyEx(eave_end_point_mask, cv2.MORPH_DILATE, kernel, iterations=dialations) |
|
eave_end_point_mask = cv2.morphologyEx(eave_end_point_mask, cv2.MORPH_ERODE, kernel, iterations=erosions) |
|
|
|
*_, apex_centroids = cv2.connectedComponentsWithStats(apex_mask, connectivity=8, stats=cv2.CV_32S) |
|
*_, other_centroids = cv2.connectedComponentsWithStats(eave_end_point_mask, connectivity=8, stats=cv2.CV_32S) |
|
|
|
return apex_centroids[1:], other_centroids[1:], apex_mask, eave_end_point_mask |
|
|
|
|
|
def infer_vertices(image_gestalt, *, color_range=4.): |
|
ridge_color = np.array(gestalt_color_mapping["ridge"]) |
|
rake_color = np.array(gestalt_color_mapping["rake"]) |
|
ridge_mask = cv2.inRange(image_gestalt, |
|
ridge_color - color_range, |
|
ridge_color + color_range) |
|
ridge_mask = cv2.morphologyEx(ridge_mask, |
|
cv2.MORPH_DILATE, np.ones((3, 3)), iterations=4) |
|
rake_mask = cv2.inRange(image_gestalt, |
|
rake_color - color_range, |
|
rake_color + color_range) |
|
rake_mask = cv2.morphologyEx(rake_mask, |
|
cv2.MORPH_DILATE, np.ones((3, 3)), iterations=4) |
|
|
|
intersection_mask = cv2.bitwise_and(ridge_mask, rake_mask) |
|
intersection_mask = cv2.morphologyEx(intersection_mask, cv2.MORPH_DILATE, np.ones((11, 11)), iterations=3) |
|
|
|
*_, inferred_centroids = cv2.connectedComponentsWithStats(intersection_mask, connectivity=8, stats=cv2.CV_32S) |
|
|
|
return inferred_centroids[1:], intersection_mask |
|
|
|
|
|
def get_missed_vertices(vertices, inferred_centroids, *, min_missing_distance=200.0, **kwargs): |
|
vertices = KDTree(vertices) |
|
closest = vertices.query(inferred_centroids, k=1, distance_upper_bound=min_missing_distance) |
|
missed_points = inferred_centroids[closest[1] == len(vertices.data)] |
|
return missed_points |
|
|
|
|
|
def convert_entry_to_human_readable(entry): |
|
out = {} |
|
already_good = {'__key__', 'wf_vertices', 'wf_edges', 'edge_semantics', 'mesh_vertices', 'mesh_faces', |
|
'face_semantics', 'K', 'R', 't'} |
|
for k, v in entry.items(): |
|
if k in already_good: |
|
out[k] = v |
|
continue |
|
match k: |
|
case 'points3d': |
|
out[k] = read_points3D_binary(fid=io.BytesIO(v)) |
|
case 'cameras': |
|
out[k] = read_cameras_binary(fid=io.BytesIO(v)) |
|
case 'images': |
|
out[k] = read_images_binary(fid=io.BytesIO(v)) |
|
case 'ade20k' | 'gestalt': |
|
out[k] = [PImage.open(io.BytesIO(x)).convert('RGB') for x in v] |
|
case 'depthcm': |
|
out[k] = [PImage.open(io.BytesIO(x)) for x in entry['depthcm']] |
|
return out |
|
|
|
|
|
def get_vertices_and_edges_from_segmentation(gest_seg_np, *, color_range=4., point_radius=30, max_angle=5., extend=35, |
|
**kwargs): |
|
'''Get the vertices and edges from the gestalt segmentation mask of the house''' |
|
|
|
connections = [] |
|
deviation_threshold = np.cos(np.deg2rad(max_angle)) |
|
|
|
apex_centroids, eave_end_point_centroids, apex_mask, eave_end_point_mask = get_vertices(gest_seg_np) |
|
|
|
vertices = np.concatenate([apex_centroids, eave_end_point_centroids]) |
|
|
|
|
|
|
|
|
|
scale = 1 |
|
vertex_size = np.zeros(vertices.shape[0]) |
|
for i, coords in enumerate(vertices): |
|
|
|
radius = point_radius |
|
vertex_size[i] = (scale * radius) ** 2 |
|
|
|
for edge_class in ['eave', 'ridge', 'rake', 'valley', 'flashing', 'step_flashing']: |
|
if len(vertices) < 2: |
|
break |
|
edge_color = np.array(gestalt_color_mapping[edge_class]) |
|
|
|
mask = cv2.inRange(gest_seg_np, |
|
edge_color - color_range, |
|
edge_color + color_range) |
|
mask = cv2.morphologyEx(mask, |
|
cv2.MORPH_DILATE, np.ones((3, 3)), iterations=1) |
|
|
|
if np.any(mask): |
|
|
|
rho = 1 |
|
theta = np.pi / 180 |
|
threshold = 20 |
|
min_line_length = 60 |
|
max_line_gap = 40 |
|
|
|
|
|
|
|
cv2.GaussianBlur(mask, (11, 11), 0, mask) |
|
lines = cv2.HoughLinesP(mask, rho, theta, threshold, np.array([]), |
|
min_line_length, max_line_gap) |
|
|
|
edges = [] |
|
|
|
if lines is None: |
|
continue |
|
|
|
line_directions = np.zeros((len(lines), 2)) |
|
for line_idx, line in enumerate(lines): |
|
for x1, y1, x2, y2 in line: |
|
if x1 < x2: |
|
x1, y1, x2, y2 = x2, y2, x1, y1 |
|
direction = (np.array([x2 - x1, y2 - y1])) |
|
direction = direction / np.linalg.norm(direction) |
|
line_directions[line_idx] = direction |
|
|
|
direction = extend * direction |
|
|
|
x1, y1 = (-direction + (x1, y1)).astype(np.int32) |
|
x2, y2 = (+ direction + (x2, y2)).astype(np.int32) |
|
|
|
edges.append((x1, y1, x2, y2)) |
|
|
|
edges = np.array(edges) |
|
if len(edges) < 1: |
|
continue |
|
|
|
begin_distances = cdist(vertices, edges[:, :2], metric="sqeuclidean") |
|
end_distances = cdist(vertices, edges[:, 2:], metric="sqeuclidean") |
|
|
|
begin_in_range_mask = begin_distances < vertex_size[:, np.newaxis] |
|
end_in_range_mask = end_distances < vertex_size[:, np.newaxis] |
|
|
|
in_range_connected_mask = np.logical_and(np.any(begin_in_range_mask, axis=0), |
|
np.any(end_in_range_mask, axis=0)) |
|
|
|
|
|
begin_in_range_mask = np.logical_and(begin_in_range_mask, in_range_connected_mask) |
|
end_in_range_mask = np.logical_and(end_in_range_mask, in_range_connected_mask) |
|
|
|
begin_candidates = np.array(np.where(begin_in_range_mask)) |
|
end_candidates = np.array(np.where(end_in_range_mask)) |
|
|
|
|
|
sorted_begin_indices = np.argsort(begin_candidates[1]) |
|
sorted_end_indices = np.argsort(end_candidates[1]) |
|
begin_candidates = begin_candidates[:, sorted_begin_indices] |
|
end_candidates = end_candidates[:, sorted_end_indices] |
|
|
|
|
|
grouped_begins = np.split(begin_candidates[0], np.unique(begin_candidates[1], return_index=True)[1][1:]) |
|
grouped_ends = np.split(end_candidates[0], np.unique(end_candidates[1], return_index=True)[1][1:]) |
|
line_indices = np.unique(begin_candidates[1]) |
|
|
|
|
|
begin_vertex_list = [] |
|
end_vertex_list = [] |
|
line_idx_list = [] |
|
for begin_vertex, end_vertex, line_idx in zip(grouped_begins, grouped_ends, line_indices): |
|
begin_vertex, end_vertex = np.meshgrid(begin_vertex, end_vertex) |
|
begin_vertex_list.extend(begin_vertex.flatten()) |
|
end_vertex_list.extend(end_vertex.flatten()) |
|
line_idx_list.extend([line_idx] * len(begin_vertex.flatten())) |
|
|
|
line_idx_list = np.array(line_idx_list) |
|
all_connections = np.array([begin_vertex_list, end_vertex_list]) |
|
|
|
|
|
possible_connections = np.unique(all_connections, axis=1) |
|
possible_connections = np.sort(possible_connections, axis=0) |
|
possible_connections = np.unique(possible_connections, axis=1) |
|
possible_connections = possible_connections[:, possible_connections[0, :] != possible_connections[1, :]] |
|
|
|
if possible_connections.shape[1] < 1: |
|
continue |
|
|
|
|
|
possible_direction_vectors = vertices[possible_connections[0]] - vertices[possible_connections[1]] |
|
possible_direction_vectors = possible_direction_vectors / np.linalg.norm(possible_direction_vectors, |
|
axis=1)[:, np.newaxis] |
|
|
|
owned_lines_per_possible_connections = [list() for i in range(possible_connections.shape[1])] |
|
|
|
|
|
for line_idx, i, j in zip(line_idx_list, begin_vertex_list, end_vertex_list): |
|
if i == j: |
|
continue |
|
i, j = min(i, j), max(i, j) |
|
for connection_idx, connection in enumerate(possible_connections.T): |
|
if np.all((i, j) == connection): |
|
owned_lines_per_possible_connections[connection_idx].append(line_idx) |
|
break |
|
|
|
|
|
for fitted_line_idx, owned_lines_per_possible_connection in enumerate(owned_lines_per_possible_connections): |
|
line_deviations = np.abs(np.dot(line_directions[owned_lines_per_possible_connection], |
|
possible_direction_vectors[fitted_line_idx])) |
|
if np.any(line_deviations > deviation_threshold): |
|
connections.append(possible_connections[:, fitted_line_idx]) |
|
|
|
vertices = [{"xy": v, "type": "apex"} for v in apex_centroids] |
|
|
|
vertices += [{"xy": v, "type": "eave_end_point"} for v in eave_end_point_centroids] |
|
return vertices, connections |
|
|
|
|
|
def get_uv_depth(vertices, depth): |
|
'''Get the depth of the vertices from the depth image''' |
|
uv = [] |
|
for v in vertices: |
|
uv.append(v['xy']) |
|
uv = np.array(uv) |
|
uv_int = uv.astype(np.int32) |
|
H, W = depth.shape[:2] |
|
uv_int[:, 0] = np.clip(uv_int[:, 0], 0, W - 1) |
|
uv_int[:, 1] = np.clip(uv_int[:, 1], 0, H - 1) |
|
vertex_depth = depth[(uv_int[:, 1], uv_int[:, 0])] |
|
return uv, vertex_depth |
|
|
|
|
|
def merge_vertices_3d(vert_edge_per_image, merge_th=0.1, **kwargs): |
|
'''Merge vertices that are close to each other in 3D space and are of same types''' |
|
all_3d_vertices = [] |
|
connections_3d = [] |
|
all_indexes = [] |
|
cur_start = 0 |
|
types = [] |
|
for cimg_idx, (vertices, connections, vertices_3d) in vert_edge_per_image.items(): |
|
types += [int(v['type'] == 'apex') for v in vertices] |
|
all_3d_vertices.append(vertices_3d) |
|
connections_3d += [(x + cur_start, y + cur_start) for (x, y) in connections] |
|
cur_start += len(vertices_3d) |
|
all_3d_vertices = np.concatenate(all_3d_vertices, axis=0) |
|
|
|
distmat = cdist(all_3d_vertices, all_3d_vertices) |
|
types = np.array(types).reshape(-1, 1) |
|
same_types = cdist(types, types) |
|
mask_to_merge = (distmat <= merge_th) & (same_types == 0) |
|
new_vertices = [] |
|
new_connections = [] |
|
to_merge = sorted(list(set([tuple(a.nonzero()[0].tolist()) for a in mask_to_merge]))) |
|
to_merge_final = defaultdict(list) |
|
for i in range(len(all_3d_vertices)): |
|
for j in to_merge: |
|
if i in j: |
|
to_merge_final[i] += j |
|
for k, v in to_merge_final.items(): |
|
to_merge_final[k] = list(set(v)) |
|
already_there = set() |
|
merged = [] |
|
for k, v in to_merge_final.items(): |
|
if k in already_there: |
|
continue |
|
merged.append(v) |
|
for vv in v: |
|
already_there.add(vv) |
|
old_idx_to_new = {} |
|
count = 0 |
|
for idxs in merged: |
|
new_vertices.append(all_3d_vertices[idxs].mean(axis=0)) |
|
for idx in idxs: |
|
old_idx_to_new[idx] = count |
|
count += 1 |
|
|
|
new_vertices = np.array(new_vertices) |
|
|
|
for conn in connections_3d: |
|
new_con = sorted((old_idx_to_new[conn[0]], old_idx_to_new[conn[1]])) |
|
if new_con[0] == new_con[1]: |
|
continue |
|
if new_con not in new_connections: |
|
new_connections.append(new_con) |
|
|
|
return new_vertices, new_connections |
|
|
|
|
|
def prune_not_connected(all_3d_vertices, connections_3d): |
|
'''Prune vertices that are not connected to any other vertex''' |
|
connected = defaultdict(list) |
|
for c in connections_3d: |
|
connected[c[0]].append(c) |
|
connected[c[1]].append(c) |
|
new_indexes = {} |
|
new_verts = [] |
|
connected_out = [] |
|
for k, v in connected.items(): |
|
vert = all_3d_vertices[k] |
|
if tuple(vert) not in new_verts: |
|
new_verts.append(tuple(vert)) |
|
new_indexes[k] = len(new_verts) - 1 |
|
for k, v in connected.items(): |
|
for vv in v: |
|
connected_out.append((new_indexes[vv[0]], new_indexes[vv[1]])) |
|
connected_out = list(set(connected_out)) |
|
|
|
return np.array(new_verts), connected_out |
|
|
|
|
|
def predict(entry, visualize=False, scale_estimation_coefficient=2.5, **kwargs) -> Tuple[np.ndarray, List[int]]: |
|
good_entry = convert_entry_to_human_readable(entry) |
|
if 'gestalt' not in good_entry or 'depthcm' not in good_entry or 'K' not in good_entry or 'R' not in good_entry or 't' not in good_entry: |
|
print('Missing required fields in the entry') |
|
return (good_entry['__key__'], *empty_solution()) |
|
vert_edge_per_image = {} |
|
for i, (gest, depth, K, R, t) in enumerate(zip(good_entry['gestalt'], |
|
good_entry['depthcm'], |
|
good_entry['K'], |
|
good_entry['R'], |
|
good_entry['t'] |
|
)): |
|
gest_seg = gest.resize(depth.size) |
|
gest_seg_np = np.array(gest_seg).astype(np.uint8) |
|
|
|
depth_np = np.array(depth) / scale_estimation_coefficient |
|
vertices, connections = get_vertices_and_edges_from_segmentation(gest_seg_np, **kwargs) |
|
if (len(vertices) < 2) or (len(connections) < 1): |
|
print(f'Not enough vertices or connections in image {i}') |
|
vert_edge_per_image[i] = np.empty((0, 2)), [], np.empty((0, 3)) |
|
continue |
|
uv, depth_vert = get_uv_depth(vertices, depth_np) |
|
|
|
xy_local = np.ones((len(uv), 3)) |
|
xy_local[:, 0] = (uv[:, 0] - K[0, 2]) / K[0, 0] |
|
xy_local[:, 1] = (uv[:, 1] - K[1, 2]) / K[1, 1] |
|
|
|
vertices_3d_local = depth_vert[..., None] * (xy_local / np.linalg.norm(xy_local, axis=1)[..., None]) |
|
world_to_cam = np.eye(4) |
|
world_to_cam[:3, :3] = R |
|
world_to_cam[:3, 3] = t.reshape(-1) |
|
cam_to_world = np.linalg.inv(world_to_cam) |
|
vertices_3d = cv2.transform(cv2.convertPointsToHomogeneous(vertices_3d_local), cam_to_world) |
|
vertices_3d = cv2.convertPointsFromHomogeneous(vertices_3d).reshape(-1, 3) |
|
vert_edge_per_image[i] = vertices, connections, vertices_3d |
|
all_3d_vertices, connections_3d = merge_vertices_3d(vert_edge_per_image, **kwargs) |
|
all_3d_vertices_clean, connections_3d_clean = prune_not_connected(all_3d_vertices, connections_3d) |
|
if (len(all_3d_vertices_clean) < 2) or len(connections_3d_clean) < 1: |
|
print(f'Not enough vertices or connections in the 3D vertices') |
|
return (good_entry['__key__'], *empty_solution()) |
|
if visualize: |
|
from hoho.viz3d import plot_estimate_and_gt |
|
plot_estimate_and_gt(all_3d_vertices_clean, |
|
connections_3d_clean, |
|
good_entry['wf_vertices'], |
|
good_entry['wf_edges']) |
|
return good_entry['__key__'], all_3d_vertices_clean, connections_3d_clean |
|
|