deepvats / dvats /dr.py
misantamaria's picture
bring dvats & requirements & entrypoint
7399708
raw
history blame
5.66 kB
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/dr.ipynb.
# %% auto 0
__all__ = ['get_gpu_memory', 'color_for_percentage', 'create_bar', 'gpu_memory_status', 'check_compatibility', 'get_UMAP_prjs',
'get_PCA_prjs', 'get_TSNE_prjs', 'cluster_score']
# %% ../nbs/dr.ipynb 2
import subprocess
def get_gpu_memory(device = 0):
total_memory = subprocess.check_output(["nvidia-smi", "--query-gpu=memory.total", "--format=csv,noheader,nounits", "--id=" + str(device)])
total_memory = int(total_memory.decode().split('\n')[0])
used_memory = subprocess.check_output(["nvidia-smi", "--query-gpu=memory.used", "--format=csv,noheader,nounits", "--id=" + str(device)])
used_memory = int(used_memory.decode().split('\n')[0])
percentage = round((used_memory / total_memory) * 100)
return used_memory, total_memory, percentage
def color_for_percentage(percentage):
if percentage < 20:
return "\033[90m" # Gray
elif percentage < 40:
return "\033[94m" # Blue
elif percentage < 60:
return "\033[92m" # Green
elif percentage < 80:
return "\033[93m" # Orange
else:
return "\033[91m" # Red
def create_bar(percentage, color_code, length=20):
filled_length = int(length * percentage // 100)
bar = "█" * filled_length + "-" * (length - filled_length)
return color_code + bar + "\033[0m" # Apply color and reset after bar
def gpu_memory_status(device=0):
used, total, percentage = get_gpu_memory(device)
color_code = color_for_percentage(percentage)
bar = create_bar(percentage, color_code)
print(f"GPU | Used mem: {used}")
print(f"GPU | Used mem: {total}")
print(f"GPU | Memory Usage: [{bar}] {color_code}{percentage}%\033[0m")
# %% ../nbs/dr.ipynb 4
import umap
import cudf
import cuml
import pandas as pd
import numpy as np
from fastcore.all import *
from .imports import *
from .load import TSArtifact
# %% ../nbs/dr.ipynb 5
def check_compatibility(dr_ar:TSArtifact, enc_ar:TSArtifact):
"Function to check that the artifact used by the encoder model and the artifact that is \
going to be passed through the DR are compatible"
try:
# Check that both artifacts have the same variables
chk_vars = dr_ar.metadata['TS']['vars'] == enc_ar.metadata['TS']['vars']
# Check that both artifacts have the same freq
chk_freq = dr_ar.metadata['TS']['freq'] == enc_ar.metadata['TS']['freq']
# Check that the dr artifact is not normalized (not normalized data has not the key normalization)
chk_norm = dr_ar.metadata['TS'].get('normalization') is None
# Check that the dr artifact has not missing values
chk_miss = dr_ar.metadata['TS']['has_missing_values'] == "False"
# Check all logical vars.
if chk_vars and chk_freq and chk_norm and chk_miss:
print("Artifacts are compatible.")
else:
raise Exception
except Exception as e:
print("Artifacts are not compatible.")
raise e
return None
# %% ../nbs/dr.ipynb 7
#Comment this part after 4_seconds debugged
import hashlib
# %% ../nbs/dr.ipynb 8
import warnings
import sys
from numba.core.errors import NumbaPerformanceWarning
@delegates(cuml.UMAP)
def get_UMAP_prjs(
input_data,
cpu=True,
print_flag = False,
check_memory_usage = True,
**kwargs
):
"Compute the projections of `input_data` using UMAP, with a configuration contained in `**kwargs`."
if print_flag:
print("--> get_UMAP_prjs")
print("kwargs: ", kwargs)
sys.stdout.flush()
####
checksum = hashlib.md5(input_data.tobytes()).hexdigest()
print(checksum)
####
if check_memory_usage: gpu_memory_status()
warnings.filterwarnings("ignore", category=NumbaPerformanceWarning) # silence NumbaPerformanceWarning
#reducer = umap.UMAP(**kwargs) if cpu else cuml.UMAP(**kwargs)
if cpu:
print("-- umap.UMAP --", cpu)
sys.stdout.flush()
reducer = umap.UMAP(**kwargs)
else:
print("-- cuml.UMAP --", cpu)
sys.stdout.flush()
if 'random_state' in kwargs:
kwargs['random_state'] = np.uint64(kwargs['random_state'])
reducer = cuml.UMAP(**kwargs)
if print_flag:
print("------- reducer --------")
print(reducer)
print(reducer.get_params())
print("------- reducer --------")
sys.stdout.flush()
projections = reducer.fit_transform(input_data)
if check_memory_usage: gpu_memory_status()
if print_flag:
checksum = hashlib.md5(projections.tobytes()).hexdigest()
print("prjs checksum ", checksum)
print("get_UMAP_prjs -->")
sys.stdout.flush()
return projections
# %% ../nbs/dr.ipynb 13
@delegates(cuml.PCA)
def get_PCA_prjs(X, cpu=False, **kwargs):
r"""
Computes PCA projections of X
"""
if cpu:
raise NotImplementedError
else:
reducer = cuml.PCA(**kwargs)
projections = reducer.fit_transform(X)
return projections
# %% ../nbs/dr.ipynb 15
@delegates(cuml.TSNE)
def get_TSNE_prjs(X, cpu=False, **kwargs):
r"""
Computes TSNE projections of X
"""
if cpu:
raise NotImplementedError
else:
reducer = cuml.TSNE(**kwargs)
projections = reducer.fit_transform(X)
return projections
# %% ../nbs/dr.ipynb 18
from sklearn.metrics import silhouette_score
def cluster_score(prjs, clusters_labels, print_flag):
score = silhouette_score(prjs, clusters_labels)
if print_flag: print("Silhouette_score:", score)
return score