xnetba's picture
Duplicate from ryancahildebrandt/all_in_one_sentence_embeddings
5f33ab8
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Jul 17 06:46:20 PM EDT 2022
author: Ryan Hildebrandt, github.com/ryancahildebrandt
"""
# imports
import random
import sklearn as sk
import sklearn.manifold
import umap
from sklearn.decomposition import FactorAnalysis
from sklearn.decomposition import FastICA
from sklearn.decomposition import IncrementalPCA
from sklearn.decomposition import KernelPCA
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.decomposition import MiniBatchSparsePCA
from sklearn.decomposition import NMF
from sklearn.decomposition import PCA
from sklearn.decomposition import SparsePCA
from sklearn.decomposition import TruncatedSVD
from sklearn.random_projection import GaussianRandomProjection
from sklearn.random_projection import SparseRandomProjection
random.seed(42)
def dim_tsne(in_embs, metric, method):
"""
TSNE(n_components=2, *, perplexity=30.0, early_exaggeration=12.0, learning_rate='warn', n_iter=1000, n_iter_without_progress=300, min_grad_norm=1e-07, metric='euclidean', metric_params=None, init='warn', verbose=0, random_state=None, method='barnes_hut', angle=0.5, n_jobs=None, square_distances='deprecated')[source]¶
"""
d2 = sk.manifold.TSNE(n_components = 2, metric = metric, method = method).fit_transform(in_embs)
d3 = sk.manifold.TSNE(n_components = 3, metric = metric, method = method).fit_transform(in_embs)
return [d2,d3]
def dim_gaussrandom(in_embs, eps):
"""
GaussianRandomProjection(n_components='auto', *, eps=0.1, compute_inverse_components=False, random_state=None)[source]¶
"""
d2 = GaussianRandomProjection(n_components = 2, eps = eps).fit_transform(in_embs)
d3 = GaussianRandomProjection(n_components = 3, eps = eps).fit_transform(in_embs)
return [d2,d3]
def dim_sparserandom(in_embs, eps):
"""
SparseRandomProjection(n_components='auto', *, density='auto', eps=0.1, dense_output=False, compute_inverse_components=False, random_state=None)[source]¶
"""
d2 = SparseRandomProjection(n_components = 2, eps = eps).fit_transform(in_embs)
d3 = SparseRandomProjection(n_components = 3, eps = eps).fit_transform(in_embs)
return [d2,d3]
def dim_factor(in_embs, svd_method):
"""
FactorAnalysis(n_components=None, *, tol=0.01, copy=True, max_iter=1000, noise_variance_init=None, svd_method='randomized', iterated_power=3, rotation=None, random_state=0)[source]¶
"""
d2 = FactorAnalysis(n_components = 2, svd_method = svd_method).fit_transform(in_embs)
d3 = FactorAnalysis(n_components = 3, svd_method = svd_method).fit_transform(in_embs)
return [d2,d3]
def dim_fastica(in_embs, algorithm):
"""
FastICA(n_components=None, *, algorithm='parallel', whiten='warn', fun='logcosh', fun_args=None, max_iter=200, tol=0.0001, w_init=None, random_state=None)[source]¶
"""
d2 = FastICA(n_components = 2, algorithm = algorithm).fit_transform(in_embs)
d3 = FastICA(n_components = 3, algorithm = algorithm).fit_transform(in_embs)
return [d2,d3]
def dim_ipca(in_embs):
"""
IncrementalPCA(n_components=None, *, whiten=False, copy=True, batch_size=None)[source]¶
"""
d2 = IncrementalPCA(n_components = 2).fit_transform(in_embs)
d3 = IncrementalPCA(n_components = 3).fit_transform(in_embs)
return [d2,d3]
def dim_kpca(in_embs, kernel):
"""
KernelPCA(n_components=None, *, kernel='linear', gamma=None, degree=3, coef0=1, kernel_params=None, alpha=1.0, fit_inverse_transform=False, eigen_solver='auto', tol=0, max_iter=None, iterated_power='auto', remove_zero_eig=False, random_state=None, copy_X=True, n_jobs=None)[source]¶
"""
d2 = KernelPCA(n_components = 2, kernel = kernel).fit_transform(in_embs)
d3 = KernelPCA(n_components = 3, kernel = kernel).fit_transform(in_embs)
return [d2,d3]
def dim_lda(in_embs):
"""
LatentDirichletAllocation(n_components=10, *, doc_topic_prior=None, topic_word_prior=None, learning_method='batch', learning_decay=0.7, learning_offset=10.0, max_iter=10, batch_size=128, evaluate_every=- 1, total_samples=1000000.0, perp_tol=0.1, mean_change_tol=0.001, max_doc_update_iter=100, n_jobs=None, verbose=0, random_state=None)[source]¶
"""
d2 = LatentDirichletAllocation(n_components = 2).fit_transform(in_embs)
d3 = LatentDirichletAllocation(n_components = 3).fit_transform(in_embs)
return [d2,d3]
def dim_minibatchspca(in_embs, method):
"""
MiniBatchSparsePCA(n_components=None, *, alpha=1, ridge_alpha=0.01, n_iter=100, callback=None, batch_size=3, verbose=False, shuffle=True, n_jobs=None, method='lars', random_state=None)[source]¶
"""
d2 = MiniBatchSparsePCA(n_components = 2, method = method).fit_transform(in_embs)
d3 = MiniBatchSparsePCA(n_components = 3, method = method).fit_transform(in_embs)
return [d2,d3]
def dim_nmf(in_embs, init):
"""
NMF(n_components=None, *, init=None, solver='cd', beta_loss='frobenius', tol=0.0001, max_iter=200, random_state=None, alpha='deprecated', alpha_W=0.0, alpha_H='same', l1_ratio=0.0, verbose=0, shuffle=False, regularization='deprecated')[source]¶
"""
d2 = NMF(n_components = 2, init = init).fit_transform(in_embs)
d3 = NMF(n_components = 3, init = init).fit_transform(in_embs)
return [d2,d3]
def dim_pca(in_embs):
"""
PCA(n_components=None, *, copy=True, whiten=False, svd_solver='auto', tol=0.0, iterated_power='auto', n_oversamples=10, power_iteration_normalizer='auto', random_state=None)[source]¶
"""
d2 = PCA(n_components = 2).fit_transform(in_embs)
d3 = PCA(n_components = 3).fit_transform(in_embs)
return [d2,d3]
def dim_spca(in_embs, method):
"""
SparsePCA(n_components=None, *, alpha=1, ridge_alpha=0.01, max_iter=1000, tol=1e-08, method='lars', n_jobs=None, U_init=None, V_init=None, verbose=False, random_state=None)[source]¶
"""
d2 = SparsePCA(n_components = 2, method = method).fit_transform(in_embs)
d3 = SparsePCA(n_components = 3, method = method).fit_transform(in_embs)
return [d2,d3]
def dim_tsvd(in_embs, algorithm):
"""
TruncatedSVD(n_components=2, *, algorithm='randomized', n_iter=5, n_oversamples=10, power_iteration_normalizer='auto', random_state=None, tol=0.0)[source]¶
"""
d2 = TruncatedSVD(n_components = 2, algorithm = algorithm).fit_transform(in_embs)
d3 = TruncatedSVD(n_components = 3, algorithm = algorithm).fit_transform(in_embs)
return [d2,d3]
def dim_umap(in_embs, n_neighbors, min_dist, metric):
"""
UMAP(n_neighbors=15, n_components=2, metric='euclidean', metric_kwds=None, output_metric='euclidean', output_metric_kwds=None, n_epochs=None, learning_rate=1.0, init='spectral', min_dist=0.1, spread=1.0, low_memory=True, n_jobs=-1, set_op_mix_ratio=1.0, local_connectivity=1.0, repulsion_strength=1.0, negative_sample_rate=5, transform_queue_size=4.0, a=None, b=None, random_state=None, angular_rp_forest=False, target_n_neighbors=-1, target_metric='categorical', target_metric_kwds=None, target_weight=0.5, transform_seed=42, transform_mode='embedding', force_approximation_algorithm=False, verbose=False, tqdm_kwds=None, unique=False, densmap=False, dens_lambda=2.0, dens_frac=0.3, dens_var_shift=0.1, output_dens=False, disconnection_distance=None, precomputed_knn=(None, None, None))
"""
d2 = umap.UMAP(n_components = 2, n_neighbors = n_neighbors, min_dist = min_dist, metric = metric).fit_transform(in_embs)
d3 = umap.UMAP(n_components = 3, n_neighbors = n_neighbors, min_dist = min_dist, metric = metric).fit_transform(in_embs)
return [d2,d3]