trustvis_with_dataset / singleVis /spatial_skeleton_edge_constructor.py
Yvonnefanf
first
7b5e67a
from abc import ABC, abstractmethod
import numpy as np
import os
import time
import math
import json
from umap.umap_ import fuzzy_simplicial_set, make_epochs_per_sample
from pynndescent import NNDescent
from sklearn.neighbors import NearestNeighbors
from sklearn.utils import check_random_state
from singleVis.kcenter_greedy import kCenterGreedy
from singleVis.intrinsic_dim import IntrinsicDim
from singleVis.backend import get_graph_elements, get_attention
from singleVis.utils import find_neighbor_preserving_rate
from kmapper import KeplerMapper
from sklearn.cluster import DBSCAN
from scipy.spatial import distance
from scipy.sparse import csr_matrix
import networkx as nx
from itertools import combinations
import torch
from scipy.stats import entropy
from umap import UMAP
from scipy.special import softmax
from trustVis.sampeling import Sampleing
from trustVis.data_generation import DataGeneration
from sklearn.neighbors import KernelDensity
from singleVis.utils import *
from scipy.sparse import coo_matrix
seed_value = 0
# np.random.seed(seed_value)
torch.manual_seed(seed_value)
torch.cuda.manual_seed(seed_value)
torch.cuda.manual_seed_all(seed_value)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
# Set the random seed for numpy
np.random.seed(seed_value)
class SpatialEdgeConstructorAbstractClass(ABC):
@abstractmethod
def __init__(self, data_provider) -> None:
pass
@abstractmethod
def construct(self, *args, **kwargs):
# return head, tail, weight, feature_vectors
pass
@abstractmethod
def record_time(self, save_dir, file_name, operation, t):
pass
'''Base class for Spatial Edge Constructor'''
class SpatialEdgeConstructor(SpatialEdgeConstructorAbstractClass):
'''Construct spatial complex
'''
def __init__(self, data_provider, init_num, s_n_epochs, b_n_epochs, n_neighbors) -> None:
"""Init parameters for spatial edge constructor
Parameters
----------
data_provider : data.DataProvider
data provider
init_num : int
init number to calculate c
s_n_epochs : int
the number of epochs to fit for one iteration(epoch)
e.g. n_epochs=5 means each edge will be sampled 5*prob times in one training epoch
b_n_epochs : int
the number of epochs to fit boundary samples for one iteration (epoch)
n_neighbors: int
local connectivity
"""
self.data_provider = data_provider
self.init_num = init_num
self.s_n_epochs = s_n_epochs
self.b_n_epochs = b_n_epochs
self.n_neighbors = n_neighbors
def get_pred_diff(self, data, neibour_data, knn_indices, epoch):
pred = self.data_provider.get_pred(epoch, data)
pred_n = self.data_provider.get_pred(epoch, neibour_data)
new_l =[]
for i in range(len(knn_indices)):
pred_i = pred_n[knn_indices[i]]
pred_diff = np.mean(np.abs(pred_i - pred[i]), axis=-1) #
pred_diff = np.exp(pred_diff) - 1 # amplify the difference
new_l.append(pred_diff)
new_l = np.array(new_l)
return new_l
def _construct_fuzzy_complex(self, train_data):
print(train_data.shape)
# """
# construct a vietoris-rips complex
# """
# number of trees in random projection forest
n_trees = min(64, 5 + int(round((train_data.shape[0]) ** 0.5 / 20.0)))
# max number of nearest neighbor iters to perform
n_iters = max(5, int(round(np.log2(train_data.shape[0]))))
# distance metric
metric = "euclidean"
# # get nearest neighbors
nnd = NNDescent(
train_data,
n_neighbors=self.n_neighbors,
metric=metric,
n_trees=n_trees,
n_iters=n_iters,
max_candidates=60,
verbose=True
)
knn_indices, knn_dists = nnd.neighbor_graph
random_state = check_random_state(None)
complex, sigmas, rhos = fuzzy_simplicial_set(
X=train_data,
n_neighbors=self.n_neighbors,
metric=metric,
random_state=random_state,
knn_indices=knn_indices,
knn_dists=knn_dists
)
return complex, sigmas, rhos, knn_indices
def _get_perturb_neibour(self,train_data,n_perturbations=10,perturbation_scale=0.04):
# step1, find neibour for each sample
X = train_data
nn = NearestNeighbors(n_neighbors=self.n_neighbors)
nn.fit(X)
_, indices = nn.kneighbors(X)
# 步骤2、3、4:对每个数据点和它的每个邻居生成扰动,然后将扰动应用到邻居上
for i in range(X.shape[0]):
for j in range(self.n_neighbors):
for _ in range(n_perturbations):
# random perturbation
perturbation = np.random.normal(scale=perturbation_scale, size=X.shape[1])
# perturbation on neibour
perturbed_point = X[indices[i, j]] + perturbation
X_perturbed.append(perturbed_point)
X_perturbed = np.array(X_perturbed)
def _construct_sample_fuzzy_complex(self, train_data):
n_neighbors = 2
# """
# construct a vietoris-rips complex
# """
# number of trees in random projection forest
n_trees = min(64, 5 + int(round((train_data.shape[0]) ** 0.5 / 20.0)))
# max number of nearest neighbor iters to perform
n_iters = max(5, int(round(np.log2(train_data.shape[0]))))
# distance metric
metric = "euclidean"
# # get nearest neighbors
nnd = NNDescent(
train_data,
n_neighbors=n_neighbors,
metric=metric,
n_trees=n_trees,
n_iters=n_iters,
max_candidates=60,
verbose=True
)
knn_indices, knn_dists = nnd.neighbor_graph
random_state = check_random_state(42)
complex, sigmas, rhos = fuzzy_simplicial_set(
X=train_data,
n_neighbors=n_neighbors,
metric=metric,
random_state=random_state,
knn_indices=knn_indices,
knn_dists=knn_dists
)
return complex, sigmas, rhos, knn_indices
def _construct_boundary_wise_complex(self, train_data, border_centers):
"""compute the boundary wise complex
for each border point, we calculate its k nearest train points
for each train data, we calculate its k nearest border points
"""
print("rrrrr",train_data.shape,border_centers.shape)
high_neigh = NearestNeighbors(n_neighbors=self.n_neighbors, radius=0.4)
high_neigh.fit(border_centers)
fitting_data = np.concatenate((train_data, border_centers), axis=0)
knn_dists, knn_indices = high_neigh.kneighbors(fitting_data, n_neighbors=self.n_neighbors, return_distance=True)
knn_indices = knn_indices + len(train_data)
random_state = check_random_state(42)
bw_complex, sigmas, rhos = fuzzy_simplicial_set(
X=fitting_data,
n_neighbors=self.n_neighbors,
metric="euclidean",
random_state=random_state,
knn_indices=knn_indices,
knn_dists=knn_dists
)
return bw_complex, sigmas, rhos, knn_indices
def _construct_boundary_wise_complex_skeleton(self, train_data, border_centers):
"""compute the boundary wise complex
for each skeleton point, we calculate its k nearest train points
for each train data, we calculate its k nearest skeleton points
"""
print("train data:",train_data.shape, "skeleton data:",border_centers.shape)
high_neigh = NearestNeighbors(n_neighbors=self.n_neighbors, radius=0.4)
high_neigh.fit(border_centers)
fitting_data = np.concatenate((train_data, border_centers), axis=0)
knn_dists, knn_indices = high_neigh.kneighbors(fitting_data, n_neighbors=self.n_neighbors, return_distance=True)
knn_indices = knn_indices + len(train_data)
random_state = check_random_state(42)
sk_complex, sigmas, rhos = fuzzy_simplicial_set(
X=fitting_data,
n_neighbors=self.n_neighbors,
metric="euclidean",
random_state=random_state,
knn_indices=knn_indices,
knn_dists=knn_dists
)
return sk_complex, sigmas, rhos, knn_indices
def _construct_proxy_based_edge_dataset(self, proxy_complex, sample_sample, proxy_training_complex):
"""
construct the mixed edge dataset for one time step
connect border points and train data(both direction)
:param vr_complex: Vietoris-Rips complex
:param bw_complex: boundary-augmented complex
:param n_epochs: the number of epoch that we iterate each round
:return: edge dataset
"""
# get data from graph
_, pv_head, pv_tail, pv_weight, _ = get_graph_elements(proxy_complex, self.s_n_epochs)
_, pt_head, pt_tail, pt_weight, _ = get_graph_elements(proxy_training_complex, self.s_n_epochs)
head = np.concatenate((pv_head, pt_head), axis=0)
tail = np.concatenate((pv_tail, pt_tail), axis=0)
weight = np.concatenate((pv_weight, pt_weight), axis=0)
if sample_sample != None:
_, s_head, s_tail, s_weight, _ = get_graph_elements(sample_sample, self.s_n_epochs)
head = np.concatenate((pv_head, pt_head,s_head), axis=0)
tail = np.concatenate((pv_tail, pt_tail,s_tail), axis=0)
weight = np.concatenate((pv_weight, pt_weight,s_weight), axis=0)
return head, tail, weight
def _construct_active_learning_step_edge_dataset(self, vr_complex, bw_complex, al_complex):
"""
construct the mixed edge dataset for one time step
connect border points and train data(both direction)
:param vr_complex: Vietoris-Rips complex
:param bw_complex: boundary-augmented complex
:param n_epochs: the number of epoch that we iterate each round
:return: edge dataset
"""
# get data from graph
_, vr_head, vr_tail, vr_weight, _ = get_graph_elements(vr_complex, self.s_n_epochs)
_, al_head, al_tail, al_weight, _ = get_graph_elements(al_complex, self.s_n_epochs)
# get data from graph
if self.b_n_epochs == 0:
return vr_head, vr_tail, vr_weight
else:
_, bw_head, bw_tail, bw_weight, _ = get_graph_elements(bw_complex, self.b_n_epochs)
# bw_weight = 1.5 * bw_weight
head = np.concatenate((vr_head, bw_head, al_head), axis=0)
tail = np.concatenate((vr_tail, bw_tail, al_tail), axis=0)
weight = np.concatenate((vr_weight, bw_weight, al_weight), axis=0)
return head, tail, weight
def _construct_step_edge_dataset(self, vr_complex, bw_complex):
"""
construct the mixed edge dataset for one time step
connect border points and train data(both direction)
:param vr_complex: Vietoris-Rips complex
:param bw_complex: augmented complex
:param n_epochs: the number of epoch that we iterate each round
:return: edge dataset
"""
# get data from graph
_, vr_head, vr_tail, vr_weight, _ = get_graph_elements(vr_complex, self.s_n_epochs)
# get data from graph
if bw_complex == None:
return vr_head, vr_tail, vr_weight
else:
_, bw_head, bw_tail, bw_weight, _ = get_graph_elements(bw_complex, self.b_n_epochs)
head = np.concatenate((vr_head, bw_head), axis=0)
tail = np.concatenate((vr_tail, bw_tail), axis=0)
weight = np.concatenate((vr_weight, bw_weight), axis=0)
return head, tail, weight
def construct(self):
return NotImplemented
def record_time(self, save_dir, file_name, operation, t):
file_path = os.path.join(save_dir, file_name+".json")
if os.path.exists(file_path):
with open(file_path, "r") as f:
ti = json.load(f)
else:
ti = dict()
ti[operation] = t
with open(file_path, "w") as f:
json.dump(ti, f)
class ProxyBasedSpatialEdgeConstructor(SpatialEdgeConstructor):
def __init__(self, data_provider, iteration, s_n_epochs, b_n_epochs, n_neighbors,model,proxy) -> None:
super().__init__(data_provider, 100, s_n_epochs, b_n_epochs, n_neighbors)
self.iteration = iteration
self.model = model
self.proxy = proxy
def construct(self):
print("Trustvis")
# load train data and border centers
train_data = self.data_provider.train_representation(self.iteration)
train_data = train_data.reshape(train_data.shape[0],train_data.shape[1])
# build proxy-proxy-connection
proxy_proxy_complex, _, _, _ = self._construct_fuzzy_complex(self.proxy)
# build proxy-sample-connection
proxy_sample_complex, _, _, _ = self._construct_boundary_wise_complex(self.proxy, train_data)
sample_complex, _, _, _ = self._construct_sample_fuzzy_complex(train_data)
edge_to, edge_from, weight = self._construct_proxy_based_edge_dataset(proxy_proxy_complex, sample_complex, proxy_sample_complex)
#### enhance the connection between the sample and its nearest proxy
#### find nearest skeleton for each training data
nearest_proxy_distances, nearest_proxy_indices = self._find_nearest_proxy(train_data, self.proxy)
# #### add nearest skeleton to edge
train_data_indices = np.arange(len(train_data))
added_edge_from = train_data_indices + len(self.proxy)
added_edge_to = nearest_proxy_indices.squeeze()
# use inverse as weight
added_weight = 1.0 / (nearest_proxy_distances.squeeze() + 1e-5)
# add new edge
edge_to = np.concatenate((edge_to, added_edge_to), axis=0)
edge_from = np.concatenate((edge_from, added_edge_from), axis=0)
weight = np.concatenate((weight, added_weight), axis=0)
feature_vectors = np.concatenate((self.proxy, train_data ), axis=0)
pred_model = self.data_provider.prediction_function(self.iteration)
attention = get_attention(pred_model, feature_vectors, temperature=.01, device=self.data_provider.DEVICE, verbose=1)
# attention = np.zeros(feature_vectors.shape)
return edge_to, edge_from, weight, feature_vectors, attention
def _find_nearest_proxy(self, train_data, proxy):
nearest_neighbor = NearestNeighbors(n_neighbors=1).fit(proxy)
# find nearest skeleton for each training data
distances, indices = nearest_neighbor.kneighbors(train_data)
return distances, indices
def record_time(self, save_dir, file_name, operation, t):
file_path = os.path.join(save_dir, file_name+".json")
if os.path.exists(file_path):
with open(file_path, "r") as f:
ti = json.load(f)
else:
ti = dict()
if operation not in ti.keys():
ti[operation] = dict()
ti[operation][str(self.iteration)] = t
with open(file_path, "w") as f:
json.dump(ti, f)
class OriginSingleEpochSpatialEdgeConstructor(SpatialEdgeConstructor):
def __init__(self, data_provider, iteration, s_n_epochs, b_n_epochs, n_neighbors) -> None:
super().__init__(data_provider, 100, s_n_epochs, b_n_epochs, n_neighbors)
self.iteration = iteration
def construct(self):
# load train data and border centers
train_data = self.data_provider.train_representation(self.iteration)
# selected = np.random.choice(len(train_data), int(0.9*len(train_data)), replace=False)
# train_data = train_data[selected]
complex, _, _, _ = self._construct_fuzzy_complex(train_data)
edge_to, edge_from, weight = self._construct_step_edge_dataset(complex, None)
feature_vectors = np.copy(train_data)
pred_model = self.data_provider.prediction_function(self.iteration)
attention = get_attention(pred_model, feature_vectors, temperature=.01, device=self.data_provider.DEVICE, verbose=1)
return edge_to, edge_from, weight, feature_vectors, attention
def record_time(self, save_dir, file_name, operation, t):
file_path = os.path.join(save_dir, file_name+".json")
if os.path.exists(file_path):
with open(file_path, "r") as f:
ti = json.load(f)
else:
ti = dict()
if operation not in ti.keys():
ti[operation] = dict()
ti[operation][str(self.iteration)] = t
with open(file_path, "w") as f:
json.dump(ti, f)
class PredDistSingleEpochSpatialEdgeConstructor(SpatialEdgeConstructor):
def __init__(self, data_provider, iteration, s_n_epochs, b_n_epochs, n_neighbors) -> None:
super().__init__(data_provider, 100, s_n_epochs, b_n_epochs, n_neighbors)
self.iteration = iteration
def construct(self):
# load train data and border centers
train_data = self.data_provider.train_representation(self.iteration)
# selected = np.random.choice(len(train_data), int(0.9*len(train_data)), replace=False)
# train_data = train_data[selected]
if self.b_n_epochs > 0:
border_centers = self.data_provider.border_representation(self.iteration).squeeze()
complex, _, _, _ = self._construct_fuzzy_complex(train_data, self.iteration)
bw_complex, _, _, _ = self._construct_boundary_wise_complex(train_data, border_centers, self.iteration)
edge_to, edge_from, weight = self._construct_step_edge_dataset(complex, bw_complex)
feature_vectors = np.concatenate((train_data, border_centers), axis=0)
# pred_model = self.data_provider.prediction_function(self.iteration)
# attention = get_attention(pred_model, feature_vectors, temperature=.01, device=self.data_provider.DEVICE, verbose=1)
attention = np.zeros(feature_vectors.shape)
elif self.b_n_epochs == 0:
complex, _, _, _ = self._construct_fuzzy_complex(train_data)
edge_to, edge_from, weight = self._construct_step_edge_dataset(complex, None)
feature_vectors = np.copy(train_data)
# pred_model = self.data_provider.prediction_function(self.iteration)
# attention = get_attention(pred_model, feature_vectors, temperature=.01, device=self.data_provider.DEVICE, verbose=1)
attention = np.zeros(feature_vectors.shape)
else:
raise Exception("Illegal border edges proposion!")
return edge_to, edge_from, weight, feature_vectors, attention
def record_time(self, save_dir, file_name, operation, t):
file_path = os.path.join(save_dir, file_name+".json")
if os.path.exists(file_path):
with open(file_path, "r") as f:
ti = json.load(f)
else:
ti = dict()
if operation not in ti.keys():
ti[operation] = dict()
ti[operation][str(self.iteration)] = t
with open(file_path, "w") as f:
json.dump(ti, f)
class ActiveLearningEpochSpatialEdgeConstructor(SpatialEdgeConstructor):
def __init__(self, data_provider, iteration, s_n_epochs, b_n_epochs, n_neighbors, cluster_points, uncluster_points) -> None:
super().__init__(data_provider, 100, s_n_epochs, b_n_epochs, n_neighbors)
self.iteration = iteration
self.cluster_points = cluster_points
self.uncluster_points = uncluster_points
def construct(self):
# load train data and border centers
train_data = self.data_provider.train_representation(self.iteration)
# selected = np.random.choice(len(train_data), int(0.9*len(train_data)), replace=False)
# train_data = train_data[selected]
cluster_data = np.concatenate((train_data, self.cluster_points), axis=0)
if self.b_n_epochs > 0:
border_centers = self.data_provider.border_representation(self.iteration).squeeze()
complex, _, _, _ = self._construct_fuzzy_complex(cluster_data)
bw_complex, _, _, _ = self._construct_boundary_wise_complex(cluster_data, border_centers)
if self.uncluster_points.shape[0] != 0:
al_complex, _, _, _ = self._construct_fuzzy_complex(self.uncluster_points)
edge_to, edge_from, weight = self._construct_active_learning_step_edge_dataset(complex, bw_complex, al_complex)
else:
edge_to, edge_from, weight = self._construct_active_learning_step_edge_dataset(complex, bw_complex, None)
feature_vectors = np.concatenate((cluster_data, border_centers), axis=0)
# pred_model = self.data_provider.prediction_function(self.iteration)
# attention = get_attention(pred_model, feature_vectors, temperature=.01, device=self.data_provider.DEVICE, verbose=1)
attention = np.zeros(feature_vectors.shape)
elif self.b_n_epochs == 0:
complex, _, _, _ = self._construct_fuzzy_complex(cluster_data)
if self.uncluster_points.shape[0] != 0:
al_complex, _, _, _ = self._construct_fuzzy_complex(self.uncluster_points)
edge_to, edge_from, weight = self._construct_active_learning_step_edge_dataset(complex, bw_complex, al_complex)
else:
edge_to, edge_from, weight = self._construct_active_learning_step_edge_dataset(complex, None, None)
feature_vectors = np.copy(cluster_data)
# pred_model = self.data_provider.prediction_function(self.iteration)
# attention = get_attention(pred_model, feature_vectors, temperature=.01, device=self.data_provider.DEVICE, verbose=1)
attention = np.zeros(feature_vectors.shape)
else:
raise Exception("Illegal border edges proposion!")
return edge_to, edge_from, weight, feature_vectors, attention
def record_time(self, save_dir, file_name, operation, t):
file_path = os.path.join(save_dir, file_name+".json")
if os.path.exists(file_path):
with open(file_path, "r") as f:
ti = json.load(f)
else:
ti = dict()
if operation not in ti.keys():
ti[operation] = dict()
ti[operation][str(self.iteration)] = t
with open(file_path, "w") as f:
json.dump(ti, f)