Spaces:
Sleeping
Sleeping
File size: 5,915 Bytes
b2ffc9b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
from typing import Tuple, List
from atoms_detection.dl_detection import DLDetection
from utils.constants import ModelArgs
from sklearn.mixture import GaussianMixture
from scipy.ndimage import label
import math
import numpy as np
class DLGMMdetection(DLDetection):
MAX_SINGLE_ATOM_AREA = 200
MAX_ATOMS_PER_AREA = 3
COVARIANCE_TYPE = "full"
def __init__(
self,
model_name: ModelArgs,
ckpt_filename: str,
dataset_csv: str,
threshold: float,
detections_path: str,
inference_cache_path: str,
covariance_penalisation: float = 0.03,
n_clusters_penalisation: float = 0.33,
distance_penalisation: float = 0.11,
n_samples_per_gmm: int = 600,
):
super(DLGMMdetection, self).__init__(
model_name,
ckpt_filename,
dataset_csv,
threshold,
detections_path,
inference_cache_path,
)
self.covariance_penalisation = covariance_penalisation
self.n_clusters_penalisation = n_clusters_penalisation
self.distance_penalisation = distance_penalisation
self.n_samples_per_gmm = n_samples_per_gmm
def pred_map_to_atoms(
self, pred_map: np.ndarray
) -> Tuple[List[Tuple[int, int]], List[float]]:
pred_mask = pred_map > self.threshold
labeled_array, num_features = label(pred_mask)
self.current_pred_map = pred_map
# Convert labelled_array to indexes
center_coords_list = []
likelihood_list = []
for label_idx in range(num_features + 1):
if label_idx == 0:
continue
label_mask = np.where(labeled_array == label_idx)
likelihood = np.max(pred_map[label_mask])
# label_size = len(label_mask[0])
# print(f"\t\tAtom {label_idx}: {label_size}")
atom_bbox = (
label_mask[1].min(),
label_mask[1].max(),
label_mask[0].min(),
label_mask[0].max(),
)
center_coord = self.bbox_to_center_coords(atom_bbox)
center_coords_list += center_coord
pixel_area = (atom_bbox[1] - atom_bbox[0]) * (atom_bbox[3] - atom_bbox[2])
if pixel_area < self.MAX_SINGLE_ATOM_AREA:
likelihood_list.append(likelihood)
else:
for i in range(0, len(center_coord)):
likelihood_list.append(likelihood)
self.current_pred_map = None
print(f"number for atoms {len(center_coords_list)}")
return center_coords_list, likelihood_list
def bbox_to_center_coords(
self, bbox: Tuple[int, int, int, int]
) -> List[Tuple[int, int]]:
pixel_area = (bbox[1] - bbox[0]) * (bbox[3] - bbox[2])
if pixel_area < self.MAX_SINGLE_ATOM_AREA:
return super().bbox_to_center_coords(bbox)
else:
pmap = self.get_current_prediction_map_region(bbox)
local_atom_center_list = self.run_gmm_pipeline(pmap)
atom_center_list = [
(x + bbox[0], y + bbox[2]) for x, y in local_atom_center_list
]
return atom_center_list
def sample_img_hist(self, img_region):
x_bin_midpoints = list(range(img_region.shape[1]))
y_bin_midpoints = list(range(img_region.shape[0]))
# noinspection PyUnresolvedReferences
cdf = np.cumsum(img_region.ravel())
cdf = cdf / cdf[-1]
values = np.random.rand(self.n_samples_per_gmm)
# noinspection PyUnresolvedReferences
value_bins = np.searchsorted(cdf, values)
x_idx, y_idx = np.unravel_index(
value_bins, (len(x_bin_midpoints), len(y_bin_midpoints))
)
random_from_cdf = np.column_stack((x_idx, y_idx))
new_x, new_y = random_from_cdf.T
return new_x, new_y
def run_gmm_pipeline(self, prediction_map: np.ndarray) -> List[Tuple[int, int]]:
retries = 2
new_x, new_y = self.sample_img_hist(prediction_map)
best_gmm, best_score = None, np.NINF
obs = np.array((new_x, new_y)).T
for k in range(1, self.MAX_ATOMS_PER_AREA + 1):
for i in range(retries):
gmm = GaussianMixture(
n_components=k, covariance_type=self.COVARIANCE_TYPE
)
gmm.fit(obs)
logLike = gmm.score(obs)
covar = np.linalg.norm(gmm.covariances_)
if k == 1:
score = (
logLike
- covar * self.covariance_penalisation
- k * self.n_clusters_penalisation
)
print(k, score)
else:
distances = [
math.dist(p1, p2)
for i, p1 in enumerate(gmm.means_[:-1])
for p2 in gmm.means_[i + 1 :]
]
dist_penalisation = sum([max(12 - d, 0) ** 2 for d in distances])
score = (
logLike
- covar * self.covariance_penalisation
- k * self.n_clusters_penalisation
- dist_penalisation * self.distance_penalisation
)
print(
k,
score,
logLike,
covar * self.covariance_penalisation,
k * self.n_clusters_penalisation,
dist_penalisation * self.distance_penalisation,
)
if score > best_score:
best_gmm, best_score = gmm, score
# print(best_gmm.means_)
return [(x, y) for y, x in best_gmm.means_.tolist()]
|