import numpy as np class KMeans: # Initialization and properties def __init__(self): self.centroids = np.empty(1) # Fit method def fit(self, data, clusters, epochs = 1, random_seed = 42): np.random.seed(random_seed) N = len(data) centroids = data[np.random.choice(N, clusters, replace=False), :] labels = np.empty(N) old_labels = np.empty(N) while True: distances = np.linalg.norm(data[:, None, :] - centroids, axis=2) labels = np.argmin(distances, axis=1) for j in range(clusters): centroids[j] = np.mean(data[labels == j], axis=0) if np.all(labels == old_labels): break old_labels = labels self.centroids = centroids return labels # Predict method def predict(self, data): distances = np.linalg.norm(data[:, None, :] - self.centroids, axis=2) labels = np.argmin(distances, axis=1) return labels class KMedoids: # Initialization and properties def __init__(self, data, clusters): self.medoids = np.empty(1) self.data = data self.N = len(data) self.clusters = clusters # Fit def fit(self, random_seed = 42): np.random.seed(random_seed) data = self.data N = self.N clusters = self.clusters medoids_idx = np.random.choice(N, clusters, replace=False) medoids = data[medoids_idx].copy() distances = np.zeros((N, clusters)) for i in range(clusters): distances[:, i] = np.sum(np.abs(data - medoids[i]), axis=1) labels = np.argmin(distances, axis=1) old_labels = np.empty(N) all_idxs = np.arange(N) while True: best_swap = (-1, -1, 0) best_distances = np.zeros(N) for i in range(clusters): non_medoids_idx = all_idxs[np.logical_not(np.isin(all_idxs, medoids_idx))] for j in non_medoids_idx: new_medoid = data[j] new_distances = np.sum(np.abs(data - new_medoid), axis=1) cost_change = np.sum(new_distances[labels == i]) - np.sum( distances[labels == i, i] ) if cost_change < best_swap[2]: best_swap = (i, j, cost_change) best_distances = new_distances if best_swap == (-1, -1, 0): break i, j, _ = best_swap distances[:, i] = best_distances medoids[i] = data[j] labels = np.argmin(distances, axis=1) old_labels = labels self.medoids = medoids return labels # Predict def predict(self, data): distances = np.zeros((len(data), self.clusters)) for i in range(self.clusters): distances[:, i] = np.sum(np.abs(data - self.medoids[i]), axis=1) labels = np.argmin(distances, axis=1) return labels class EnsembleClustering: # Initialization def __init__(self, data, clusters): self.data = data self.clusters = clusters self.kmeans = None self.kmedoids = None # Fit method def fit(self): kmeans = KMeans() kmeans_labels = kmeans.fit(self.data, self.clusters) self.kmeans = kmeans kmedoids = KMedoids(data = self.data, clusters = self.clusters) kmedoids_labels = kmedoids.fit() self.kmedoids = kmedoids labels = self.maximumVoting(kmeans_labels, kmedoids_labels) return labels # Maximum voting method def maximumVoting(self, labels1, labels2): labels = np.zeros(len(labels1), dtype=int) for i in range(len(labels1)): voting = np.zeros(self.clusters, dtype=int) voting[labels1[i]] += 1 voting[labels2[i]] += 1 labels[i] = voting.argmax() return labels # Predict method def predict(self, data): kmeans_labels = self.kmeans.predict(data) kmedoids_labels = self.kmedoids.predict(data) labels = self.maximumVoting(kmeans_labels, kmedoids_labels) return labels