Spaces:
Runtime error
Runtime error
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
""" | |
Created on Sun Jul 17 06:46:20 PM EDT 2022 | |
author: Ryan Hildebrandt, github.com/ryancahildebrandt | |
""" | |
# imports | |
import random | |
import sklearn as sk | |
import sklearn.manifold | |
import umap | |
from sklearn.decomposition import FactorAnalysis | |
from sklearn.decomposition import FastICA | |
from sklearn.decomposition import IncrementalPCA | |
from sklearn.decomposition import KernelPCA | |
from sklearn.decomposition import LatentDirichletAllocation | |
from sklearn.decomposition import MiniBatchSparsePCA | |
from sklearn.decomposition import NMF | |
from sklearn.decomposition import PCA | |
from sklearn.decomposition import SparsePCA | |
from sklearn.decomposition import TruncatedSVD | |
from sklearn.random_projection import GaussianRandomProjection | |
from sklearn.random_projection import SparseRandomProjection | |
random.seed(42) | |
def dim_tsne(in_embs, metric, method): | |
""" | |
TSNE(n_components=2, *, perplexity=30.0, early_exaggeration=12.0, learning_rate='warn', n_iter=1000, n_iter_without_progress=300, min_grad_norm=1e-07, metric='euclidean', metric_params=None, init='warn', verbose=0, random_state=None, method='barnes_hut', angle=0.5, n_jobs=None, square_distances='deprecated')[source]¶ | |
""" | |
d2 = sk.manifold.TSNE(n_components = 2, metric = metric, method = method).fit_transform(in_embs) | |
d3 = sk.manifold.TSNE(n_components = 3, metric = metric, method = method).fit_transform(in_embs) | |
return [d2,d3] | |
def dim_gaussrandom(in_embs, eps): | |
""" | |
GaussianRandomProjection(n_components='auto', *, eps=0.1, compute_inverse_components=False, random_state=None)[source]¶ | |
""" | |
d2 = GaussianRandomProjection(n_components = 2, eps = eps).fit_transform(in_embs) | |
d3 = GaussianRandomProjection(n_components = 3, eps = eps).fit_transform(in_embs) | |
return [d2,d3] | |
def dim_sparserandom(in_embs, eps): | |
""" | |
SparseRandomProjection(n_components='auto', *, density='auto', eps=0.1, dense_output=False, compute_inverse_components=False, random_state=None)[source]¶ | |
""" | |
d2 = SparseRandomProjection(n_components = 2, eps = eps).fit_transform(in_embs) | |
d3 = SparseRandomProjection(n_components = 3, eps = eps).fit_transform(in_embs) | |
return [d2,d3] | |
def dim_factor(in_embs, svd_method): | |
""" | |
FactorAnalysis(n_components=None, *, tol=0.01, copy=True, max_iter=1000, noise_variance_init=None, svd_method='randomized', iterated_power=3, rotation=None, random_state=0)[source]¶ | |
""" | |
d2 = FactorAnalysis(n_components = 2, svd_method = svd_method).fit_transform(in_embs) | |
d3 = FactorAnalysis(n_components = 3, svd_method = svd_method).fit_transform(in_embs) | |
return [d2,d3] | |
def dim_fastica(in_embs, algorithm): | |
""" | |
FastICA(n_components=None, *, algorithm='parallel', whiten='warn', fun='logcosh', fun_args=None, max_iter=200, tol=0.0001, w_init=None, random_state=None)[source]¶ | |
""" | |
d2 = FastICA(n_components = 2, algorithm = algorithm).fit_transform(in_embs) | |
d3 = FastICA(n_components = 3, algorithm = algorithm).fit_transform(in_embs) | |
return [d2,d3] | |
def dim_ipca(in_embs): | |
""" | |
IncrementalPCA(n_components=None, *, whiten=False, copy=True, batch_size=None)[source]¶ | |
""" | |
d2 = IncrementalPCA(n_components = 2).fit_transform(in_embs) | |
d3 = IncrementalPCA(n_components = 3).fit_transform(in_embs) | |
return [d2,d3] | |
def dim_kpca(in_embs, kernel): | |
""" | |
KernelPCA(n_components=None, *, kernel='linear', gamma=None, degree=3, coef0=1, kernel_params=None, alpha=1.0, fit_inverse_transform=False, eigen_solver='auto', tol=0, max_iter=None, iterated_power='auto', remove_zero_eig=False, random_state=None, copy_X=True, n_jobs=None)[source]¶ | |
""" | |
d2 = KernelPCA(n_components = 2, kernel = kernel).fit_transform(in_embs) | |
d3 = KernelPCA(n_components = 3, kernel = kernel).fit_transform(in_embs) | |
return [d2,d3] | |
def dim_lda(in_embs): | |
""" | |
LatentDirichletAllocation(n_components=10, *, doc_topic_prior=None, topic_word_prior=None, learning_method='batch', learning_decay=0.7, learning_offset=10.0, max_iter=10, batch_size=128, evaluate_every=- 1, total_samples=1000000.0, perp_tol=0.1, mean_change_tol=0.001, max_doc_update_iter=100, n_jobs=None, verbose=0, random_state=None)[source]¶ | |
""" | |
d2 = LatentDirichletAllocation(n_components = 2).fit_transform(in_embs) | |
d3 = LatentDirichletAllocation(n_components = 3).fit_transform(in_embs) | |
return [d2,d3] | |
def dim_minibatchspca(in_embs, method): | |
""" | |
MiniBatchSparsePCA(n_components=None, *, alpha=1, ridge_alpha=0.01, n_iter=100, callback=None, batch_size=3, verbose=False, shuffle=True, n_jobs=None, method='lars', random_state=None)[source]¶ | |
""" | |
d2 = MiniBatchSparsePCA(n_components = 2, method = method).fit_transform(in_embs) | |
d3 = MiniBatchSparsePCA(n_components = 3, method = method).fit_transform(in_embs) | |
return [d2,d3] | |
def dim_nmf(in_embs, init): | |
""" | |
NMF(n_components=None, *, init=None, solver='cd', beta_loss='frobenius', tol=0.0001, max_iter=200, random_state=None, alpha='deprecated', alpha_W=0.0, alpha_H='same', l1_ratio=0.0, verbose=0, shuffle=False, regularization='deprecated')[source]¶ | |
""" | |
d2 = NMF(n_components = 2, init = init).fit_transform(in_embs) | |
d3 = NMF(n_components = 3, init = init).fit_transform(in_embs) | |
return [d2,d3] | |
def dim_pca(in_embs): | |
""" | |
PCA(n_components=None, *, copy=True, whiten=False, svd_solver='auto', tol=0.0, iterated_power='auto', n_oversamples=10, power_iteration_normalizer='auto', random_state=None)[source]¶ | |
""" | |
d2 = PCA(n_components = 2).fit_transform(in_embs) | |
d3 = PCA(n_components = 3).fit_transform(in_embs) | |
return [d2,d3] | |
def dim_spca(in_embs, method): | |
""" | |
SparsePCA(n_components=None, *, alpha=1, ridge_alpha=0.01, max_iter=1000, tol=1e-08, method='lars', n_jobs=None, U_init=None, V_init=None, verbose=False, random_state=None)[source]¶ | |
""" | |
d2 = SparsePCA(n_components = 2, method = method).fit_transform(in_embs) | |
d3 = SparsePCA(n_components = 3, method = method).fit_transform(in_embs) | |
return [d2,d3] | |
def dim_tsvd(in_embs, algorithm): | |
""" | |
TruncatedSVD(n_components=2, *, algorithm='randomized', n_iter=5, n_oversamples=10, power_iteration_normalizer='auto', random_state=None, tol=0.0)[source]¶ | |
""" | |
d2 = TruncatedSVD(n_components = 2, algorithm = algorithm).fit_transform(in_embs) | |
d3 = TruncatedSVD(n_components = 3, algorithm = algorithm).fit_transform(in_embs) | |
return [d2,d3] | |
def dim_umap(in_embs, n_neighbors, min_dist, metric): | |
""" | |
UMAP(n_neighbors=15, n_components=2, metric='euclidean', metric_kwds=None, output_metric='euclidean', output_metric_kwds=None, n_epochs=None, learning_rate=1.0, init='spectral', min_dist=0.1, spread=1.0, low_memory=True, n_jobs=-1, set_op_mix_ratio=1.0, local_connectivity=1.0, repulsion_strength=1.0, negative_sample_rate=5, transform_queue_size=4.0, a=None, b=None, random_state=None, angular_rp_forest=False, target_n_neighbors=-1, target_metric='categorical', target_metric_kwds=None, target_weight=0.5, transform_seed=42, transform_mode='embedding', force_approximation_algorithm=False, verbose=False, tqdm_kwds=None, unique=False, densmap=False, dens_lambda=2.0, dens_frac=0.3, dens_var_shift=0.1, output_dens=False, disconnection_distance=None, precomputed_knn=(None, None, None)) | |
""" | |
d2 = umap.UMAP(n_components = 2, n_neighbors = n_neighbors, min_dist = min_dist, metric = metric).fit_transform(in_embs) | |
d3 = umap.UMAP(n_components = 3, n_neighbors = n_neighbors, min_dist = min_dist, metric = metric).fit_transform(in_embs) | |
return [d2,d3] |