File size: 7,330 Bytes
5f33ab8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Jul 17 06:46:20 PM EDT 2022 
author: Ryan Hildebrandt, github.com/ryancahildebrandt
"""
# imports
import random
import sklearn as sk
import sklearn.manifold
import umap

from sklearn.decomposition import FactorAnalysis
from sklearn.decomposition import FastICA
from sklearn.decomposition import IncrementalPCA
from sklearn.decomposition import KernelPCA
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.decomposition import MiniBatchSparsePCA
from sklearn.decomposition import NMF
from sklearn.decomposition import PCA
from sklearn.decomposition import SparsePCA
from sklearn.decomposition import TruncatedSVD
from sklearn.random_projection import GaussianRandomProjection
from sklearn.random_projection import SparseRandomProjection

random.seed(42)

def dim_tsne(in_embs, metric, method):
	"""
TSNE(n_components=2, *, perplexity=30.0, early_exaggeration=12.0, learning_rate='warn', n_iter=1000, n_iter_without_progress=300, min_grad_norm=1e-07, metric='euclidean', metric_params=None, init='warn', verbose=0, random_state=None, method='barnes_hut', angle=0.5, n_jobs=None, square_distances='deprecated')[source]¶
	"""
	d2 = sk.manifold.TSNE(n_components = 2, metric = metric, method = method).fit_transform(in_embs)
	d3 = sk.manifold.TSNE(n_components = 3, metric = metric, method = method).fit_transform(in_embs)
	return [d2,d3]

def dim_gaussrandom(in_embs, eps):
	"""
GaussianRandomProjection(n_components='auto', *, eps=0.1, compute_inverse_components=False, random_state=None)[source]¶
	"""
	d2 = GaussianRandomProjection(n_components = 2, eps = eps).fit_transform(in_embs)
	d3 = GaussianRandomProjection(n_components = 3, eps = eps).fit_transform(in_embs)
	return [d2,d3]

def dim_sparserandom(in_embs, eps):
	"""
SparseRandomProjection(n_components='auto', *, density='auto', eps=0.1, dense_output=False, compute_inverse_components=False, random_state=None)[source]¶
	"""
	d2 = SparseRandomProjection(n_components = 2, eps = eps).fit_transform(in_embs)
	d3 = SparseRandomProjection(n_components = 3, eps = eps).fit_transform(in_embs)
	return [d2,d3]

def dim_factor(in_embs, svd_method):
	"""
FactorAnalysis(n_components=None, *, tol=0.01, copy=True, max_iter=1000, noise_variance_init=None, svd_method='randomized', iterated_power=3, rotation=None, random_state=0)[source]¶
	"""
	d2 = FactorAnalysis(n_components = 2, svd_method = svd_method).fit_transform(in_embs)
	d3 = FactorAnalysis(n_components = 3, svd_method = svd_method).fit_transform(in_embs)
	return [d2,d3]

def dim_fastica(in_embs, algorithm):
	"""
FastICA(n_components=None, *, algorithm='parallel', whiten='warn', fun='logcosh', fun_args=None, max_iter=200, tol=0.0001, w_init=None, random_state=None)[source]¶
	"""
	d2 = FastICA(n_components = 2, algorithm = algorithm).fit_transform(in_embs)
	d3 = FastICA(n_components = 3, algorithm = algorithm).fit_transform(in_embs)
	return [d2,d3]

def dim_ipca(in_embs):
	"""
IncrementalPCA(n_components=None, *, whiten=False, copy=True, batch_size=None)[source]¶
	"""
	d2 = IncrementalPCA(n_components = 2).fit_transform(in_embs)
	d3 = IncrementalPCA(n_components = 3).fit_transform(in_embs)
	return [d2,d3]

def dim_kpca(in_embs, kernel):
	"""
KernelPCA(n_components=None, *, kernel='linear', gamma=None, degree=3, coef0=1, kernel_params=None, alpha=1.0, fit_inverse_transform=False, eigen_solver='auto', tol=0, max_iter=None, iterated_power='auto', remove_zero_eig=False, random_state=None, copy_X=True, n_jobs=None)[source]¶
	"""
	d2 = KernelPCA(n_components = 2, kernel = kernel).fit_transform(in_embs)
	d3 = KernelPCA(n_components = 3, kernel = kernel).fit_transform(in_embs)
	return [d2,d3]

def dim_lda(in_embs):
	"""
LatentDirichletAllocation(n_components=10, *, doc_topic_prior=None, topic_word_prior=None, learning_method='batch', learning_decay=0.7, learning_offset=10.0, max_iter=10, batch_size=128, evaluate_every=- 1, total_samples=1000000.0, perp_tol=0.1, mean_change_tol=0.001, max_doc_update_iter=100, n_jobs=None, verbose=0, random_state=None)[source]¶
	"""
	d2 = LatentDirichletAllocation(n_components = 2).fit_transform(in_embs)
	d3 = LatentDirichletAllocation(n_components = 3).fit_transform(in_embs)
	return [d2,d3]

def dim_minibatchspca(in_embs, method):
	"""
MiniBatchSparsePCA(n_components=None, *, alpha=1, ridge_alpha=0.01, n_iter=100, callback=None, batch_size=3, verbose=False, shuffle=True, n_jobs=None, method='lars', random_state=None)[source]¶
	"""
	d2 = MiniBatchSparsePCA(n_components = 2, method = method).fit_transform(in_embs)
	d3 = MiniBatchSparsePCA(n_components = 3, method = method).fit_transform(in_embs)
	return [d2,d3]

def dim_nmf(in_embs, init):
	"""
NMF(n_components=None, *, init=None, solver='cd', beta_loss='frobenius', tol=0.0001, max_iter=200, random_state=None, alpha='deprecated', alpha_W=0.0, alpha_H='same', l1_ratio=0.0, verbose=0, shuffle=False, regularization='deprecated')[source]¶
	"""
	d2 = NMF(n_components = 2, init = init).fit_transform(in_embs)
	d3 = NMF(n_components = 3, init = init).fit_transform(in_embs)
	return [d2,d3]

def dim_pca(in_embs):
	"""
PCA(n_components=None, *, copy=True, whiten=False, svd_solver='auto', tol=0.0, iterated_power='auto', n_oversamples=10, power_iteration_normalizer='auto', random_state=None)[source]¶
	"""
	d2 = PCA(n_components = 2).fit_transform(in_embs)
	d3 = PCA(n_components = 3).fit_transform(in_embs)
	return [d2,d3]

def dim_spca(in_embs, method):
	"""
SparsePCA(n_components=None, *, alpha=1, ridge_alpha=0.01, max_iter=1000, tol=1e-08, method='lars', n_jobs=None, U_init=None, V_init=None, verbose=False, random_state=None)[source]¶
	"""
	d2 = SparsePCA(n_components = 2, method = method).fit_transform(in_embs)
	d3 = SparsePCA(n_components = 3, method = method).fit_transform(in_embs)
	return [d2,d3]

def dim_tsvd(in_embs, algorithm):
	"""
TruncatedSVD(n_components=2, *, algorithm='randomized', n_iter=5, n_oversamples=10, power_iteration_normalizer='auto', random_state=None, tol=0.0)[source]¶
	"""
	d2 = TruncatedSVD(n_components = 2, algorithm = algorithm).fit_transform(in_embs)
	d3 = TruncatedSVD(n_components = 3, algorithm = algorithm).fit_transform(in_embs)
	return [d2,d3]

def dim_umap(in_embs, n_neighbors, min_dist, metric):
	"""
UMAP(n_neighbors=15, n_components=2, metric='euclidean', metric_kwds=None, output_metric='euclidean', output_metric_kwds=None, n_epochs=None, learning_rate=1.0, init='spectral', min_dist=0.1, spread=1.0, low_memory=True, n_jobs=-1, set_op_mix_ratio=1.0, local_connectivity=1.0, repulsion_strength=1.0, negative_sample_rate=5, transform_queue_size=4.0, a=None, b=None, random_state=None, angular_rp_forest=False, target_n_neighbors=-1, target_metric='categorical', target_metric_kwds=None, target_weight=0.5, transform_seed=42, transform_mode='embedding', force_approximation_algorithm=False, verbose=False, tqdm_kwds=None, unique=False, densmap=False, dens_lambda=2.0, dens_frac=0.3, dens_var_shift=0.1, output_dens=False, disconnection_distance=None, precomputed_knn=(None, None, None))
	"""
	d2 = umap.UMAP(n_components = 2, n_neighbors = n_neighbors, min_dist = min_dist, metric = metric).fit_transform(in_embs)
	d3 = umap.UMAP(n_components = 3, n_neighbors = n_neighbors, min_dist = min_dist, metric = metric).fit_transform(in_embs)
	return [d2,d3]