File size: 5,642 Bytes
6d51833
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/dr.ipynb.

# %% auto 0
__all__ = ['get_gpu_memory', 'color_for_percentage', 'create_bar', 'gpu_memory_status', 'check_compatibility', 'get_UMAP_prjs',
           'get_PCA_prjs', 'get_TSNE_prjs', 'cluster_score']

# %% ../nbs/dr.ipynb 2
import subprocess
def get_gpu_memory(device = 0):
    total_memory = subprocess.check_output(["nvidia-smi", "--query-gpu=memory.total", "--format=csv,noheader,nounits", "--id=" + str(device)])
    total_memory = int(total_memory.decode().split('\n')[0])
    used_memory = subprocess.check_output(["nvidia-smi", "--query-gpu=memory.used", "--format=csv,noheader,nounits",  "--id=" + str(device)])
    used_memory = int(used_memory.decode().split('\n')[0])

    percentage = round((used_memory / total_memory) * 100)
    return used_memory, total_memory, percentage

def color_for_percentage(percentage):
    if percentage < 20:
        return "\033[90m"  # Gray
    elif percentage < 40:
        return "\033[94m"  # Blue
    elif percentage < 60:
        return "\033[92m"  # Green
    elif percentage < 80:
        return "\033[93m"  # Orange
    else:
        return "\033[91m"  # Red
        
def create_bar(percentage, color_code, length=20):
    filled_length = int(length * percentage // 100)
    bar = "█" * filled_length + "-" * (length - filled_length)
    return color_code + bar + "\033[0m"  # Apply color and reset after bar

def gpu_memory_status(device=0):
    used, total, percentage = get_gpu_memory(device)
    color_code = color_for_percentage(percentage)
    bar = create_bar(percentage, color_code)
    print(f"Used mem: {used}")
    print(f"Used mem: {total}")
    print(f"Memory Usage: [{bar}] {color_code}{percentage}%\033[0m")

# %% ../nbs/dr.ipynb 4
import umap
import cudf
import cuml
import pandas as pd
import numpy as np
from fastcore.all import *
from .imports import *
from .load import TSArtifact

# %% ../nbs/dr.ipynb 5
def check_compatibility(dr_ar:TSArtifact, enc_ar:TSArtifact):
    "Function to check that the artifact used by the encoder model and the artifact that is \
    going to be passed through the DR are compatible"
    try:
        # Check that both artifacts have the same variables
        chk_vars = dr_ar.metadata['TS']['vars'] == enc_ar.metadata['TS']['vars']
        # Check that both artifacts have the same freq
        chk_freq = dr_ar.metadata['TS']['freq'] == enc_ar.metadata['TS']['freq']
        # Check that the dr artifact is not normalized (not normalized data has not the key normalization)
        chk_norm = dr_ar.metadata['TS'].get('normalization') is None
        # Check that the dr artifact has not missing values
        chk_miss = dr_ar.metadata['TS']['has_missing_values'] == "False"
        # Check all logical vars.
        if chk_vars and chk_freq and chk_norm and chk_miss:
            print("Artifacts are compatible.")
        else:
            raise Exception
    except Exception as e:
        print("Artifacts are not compatible.")
        raise e
    return None

# %% ../nbs/dr.ipynb 7
#Comment this part after 4_seconds debugged
import hashlib

# %% ../nbs/dr.ipynb 8
import warnings
import sys
from numba.core.errors import NumbaPerformanceWarning
@delegates(cuml.UMAP)
def get_UMAP_prjs(
    input_data, 
    cpu=True, 
    print_flag = False, 
    check_memory_usage = True,
    **kwargs
):
    "Compute the projections of `input_data` using UMAP, with a configuration contained in `**kwargs`."
    if print_flag: 
        print("--> get_UMAP_prjs")
        print("kwargs: ", kwargs)
        sys.stdout.flush()
        ####
        checksum = hashlib.md5(input_data.tobytes()).hexdigest()
        print(checksum)
        ####
        
    if check_memory_usage: gpu_memory_status()
    
    warnings.filterwarnings("ignore", category=NumbaPerformanceWarning) # silence NumbaPerformanceWarning
    
    #reducer = umap.UMAP(**kwargs) if cpu else cuml.UMAP(**kwargs)
    if cpu:
        print("-- umap.UMAP --", cpu)
        sys.stdout.flush()
        reducer = umap.UMAP(**kwargs)
    else:
        print("-- cuml.UMAP --", cpu)
        sys.stdout.flush()
        if 'random_state' in kwargs:
            kwargs['random_state'] = np.uint64(kwargs['random_state'])
        reducer = cuml.UMAP(**kwargs)
    
    if print_flag: 
        print("------- reducer --------")
        print(reducer)
        print(reducer.get_params())
        print("------- reducer --------")
        sys.stdout.flush()
    
    projections = reducer.fit_transform(input_data)
    
    if check_memory_usage: gpu_memory_status()
    if print_flag: 
        checksum = hashlib.md5(projections.tobytes()).hexdigest()
        print("prjs checksum ", checksum)
        print("get_UMAP_prjs -->")
        sys.stdout.flush()
    return projections

# %% ../nbs/dr.ipynb 13
@delegates(cuml.PCA)
def get_PCA_prjs(X, cpu=False, **kwargs):
    r"""
    Computes PCA projections of X
    """
    if cpu:
        raise NotImplementedError
    else:
        reducer = cuml.PCA(**kwargs)
    projections = reducer.fit_transform(X)
    return projections

# %% ../nbs/dr.ipynb 15
@delegates(cuml.TSNE)
def get_TSNE_prjs(X, cpu=False, **kwargs):
    r"""
    Computes TSNE projections of X
    """
    if cpu:
        raise NotImplementedError
    else:
        reducer = cuml.TSNE(**kwargs)
    projections = reducer.fit_transform(X)
    return projections

# %% ../nbs/dr.ipynb 18
from sklearn.metrics import silhouette_score
def cluster_score(prjs, clusters_labels, print_flag):
    score = silhouette_score(prjs, clusters_labels)
    if print_flag: print("Silhouette_score:", score)
    return score