File size: 9,424 Bytes
7399708
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/utils.ipynb.

# %% auto 0
__all__ = ['generate_TS_df', 'normalize_columns', 'remove_constant_columns', 'ReferenceArtifact', 'PrintLayer',
           'get_wandb_artifacts', 'get_pickle_artifact', 'exec_with_feather', 'py_function',
           'exec_with_feather_k_output', 'exec_with_and_feather_k_output', 'learner_module_leaves',
           'learner_module_leaves_subtables']

# %% ../nbs/utils.ipynb 3
from .imports import *
from fastcore.all import *
import wandb
import pickle
import pandas as pd
import numpy as np
#import tensorflow as tf
import torch.nn as nn
from fastai.basics import *

# %% ../nbs/utils.ipynb 5
def generate_TS_df(rows, cols):
    "Generates a dataframe containing a multivariate time series, where each column \
    represents a variable and each row a time point (sample). The timestamp is in the \
    index of the dataframe, and it is created with a even space of 1 second between samples"
    index = np.arange(pd.Timestamp.now(),
                      pd.Timestamp.now() + pd.Timedelta(rows-1, 'seconds'),
                      pd.Timedelta(1, 'seconds'))
    data = np.random.randn(len(index), cols)
    return pd.DataFrame(data, index=index)

# %% ../nbs/utils.ipynb 10
def normalize_columns(df:pd.DataFrame):
    "Normalize columns from `df` to have 0 mean and 1 standard deviation"
    mean = df.mean()
    std = df.std() + 1e-7
    return (df-mean)/std

# %% ../nbs/utils.ipynb 16
def remove_constant_columns(df:pd.DataFrame):
    return df.loc[:, (df != df.iloc[0]).any()]

# %% ../nbs/utils.ipynb 21
class ReferenceArtifact(wandb.Artifact):
    default_storage_path = Path('data/wandb_artifacts/') # * this path is relative to Path.home()
    "This class is meant to create an artifact with a single reference to an object \
    passed as argument in the contructor. The object will be pickled, hashed and stored \
    in a specified folder."
    @delegates(wandb.Artifact.__init__)
    def __init__(self, obj, name, type='object', folder=None, **kwargs):
        super().__init__(type=type, name=name, **kwargs)
        # pickle dumps the object and then hash it
        hash_code = str(hash(pickle.dumps(obj)))
        folder = Path(ifnone(folder, Path.home()/self.default_storage_path))
        with open(f'{folder}/{hash_code}', 'wb') as f:
            pickle.dump(obj, f)
        self.add_reference(f'file://{folder}/{hash_code}')
        if self.metadata is None:
            self.metadata = dict()
        self.metadata['ref'] = dict()
        self.metadata['ref']['hash'] = hash_code
        self.metadata['ref']['type'] = str(obj.__class__)

# %% ../nbs/utils.ipynb 24
@patch
def to_obj(self:wandb.apis.public.Artifact):
    """Download the files of a saved ReferenceArtifact and get the referenced object. The artifact must \
    come from a call to `run.use_artifact` with a proper wandb run."""
    if self.metadata.get('ref') is None:
        print(f'ERROR:{self} does not come from a saved ReferenceArtifact')
        return None
    original_path = ReferenceArtifact.default_storage_path/self.metadata['ref']['hash']
    path = original_path if original_path.exists() else Path(self.download()).ls()[0]
    with open(path, 'rb') as f:
        obj = pickle.load(f)
    return obj

# %% ../nbs/utils.ipynb 33
import torch.nn as nn
class PrintLayer(nn.Module):
    def __init__(self):
        super(PrintLayer, self).__init__()

    def forward(self, x):
        # Do your print / debug stuff here
        print(x.shape)
        return x

# %% ../nbs/utils.ipynb 34
@patch
def export_and_get(self:Learner, keep_exported_file=False):
    """
        Export the learner into an auxiliary file, load it and return it back.
    """
    aux_path = Path('aux.pkl')
    self.export(fname='aux.pkl')
    aux_learn = load_learner('aux.pkl')
    if not keep_exported_file: aux_path.unlink()
    return aux_learn

# %% ../nbs/utils.ipynb 35
def get_wandb_artifacts(project_path, type=None, name=None, last_version=True):
    """
        Get the artifacts logged in a wandb project.
        Input:
        - `project_path` (str): entity/project_name
        - `type` (str): whether to return only one type of artifacts
        - `name` (str): Leave none to have all artifact names
        - `last_version`: whether to return only the last version of each artifact or not

        Output: List of artifacts
    """
    public_api = wandb.Api()
    if type is not None:
        types = [public_api.artifact_type(type, project_path)]
    else:
        types = public_api.artifact_types(project_path)

    res = L()
    for kind in types:
        for collection in kind.collections():
            if name is None or name == collection.name:
                versions = public_api.artifact_versions(
                    kind.type,
                    "/".join([kind.entity, kind.project, collection.name]),
                    per_page=1,
                )
                if last_version: res += next(versions)
                else: res += L(versions)
    return list(res)

# %% ../nbs/utils.ipynb 39
def get_pickle_artifact(filename):

    with open(filename, "rb") as f:
        df = pickle.load(f)
    
    return df

# %% ../nbs/utils.ipynb 41
import pyarrow.feather as ft
import pickle

# %% ../nbs/utils.ipynb 42
def exec_with_feather(function, path = None, print_flag = False, *args, **kwargs):
    result = None
    if not (path is none):
        if print_flag: print("--> Exec with feather | reading input from ", path)
        input = ft.read_feather(path)
        if print_flag: print("--> Exec with feather | Apply function ", path)
        result = function(input, *args, **kwargs)
        if print_flag: print("Exec with feather --> ", path)
    return result

# %% ../nbs/utils.ipynb 43
def py_function(module_name, function_name, print_flag = False):
    try:
        function = getattr(__import__('__main__'), function_name)
    except:
        module = __import__(module_name, fromlist=[''])
        function = getattr(module, function_name)
    print("py function: ", function_name, ": ", function)
    return function

# %% ../nbs/utils.ipynb 46
import time
def exec_with_feather_k_output(function_name, module_name = "main", path = None, k_output = 0, print_flag = False, time_flag = False, *args, **kwargs):
    result = None
    function = py_function(module_name, function_name, print_flag)
    if time_flag: t_start = time.time()
    if not (path is None):
        if print_flag: print("--> Exec with feather | reading input from ", path)
        input = ft.read_feather(path)
        if print_flag: print("--> Exec with feather | Apply function ", path)
        result = function(input, *args, **kwargs)[k_output]
    if time_flag:
        t_end = time.time()
        print("Exec with feather | time: ", t_end-t_start)
    if print_flag: print("Exec with feather --> ", path)
    return result

# %% ../nbs/utils.ipynb 48
def exec_with_and_feather_k_output(function_name, module_name = "main", path_input = None, path_output = None, k_output = 0, print_flag = False, time_flag = False, *args, **kwargs):
    result = None
    function = py_function(module_name, function_name, print_flag)
    if time_flag: t_start = time.time()
    if not (path_input is None):
        if print_flag: print("--> Exec with feather | reading input from ", path_input)
        input = ft.read_feather(path_input)
        if print_flag: 
            print("--> Exec with feather | Apply function ", function_name, "input type: ", type(input))
        
        result = function(input, *args, **kwargs)[k_output]
        ft.write_feather(df, path, compression = 'lz4')
    if time_flag:
        t_end = time.time()
        print("Exec with feather | time: ", t_end-t_start)
    if print_flag: print("Exec with feather --> ", path_output)
    return path_output

# %% ../nbs/utils.ipynb 52
def learner_module_leaves(learner):
    modules = list(learner.modules())[0]  # Obtener el módulo raíz
    rows = []

    def find_leave_modules(module, path=[]):
        for name, sub_module in module.named_children():
            current_path = path + [f"{type(sub_module).__name__}"]
            if not list(sub_module.children()):
                leave_name = ' -> '.join(current_path)
                leave_params = str(sub_module).strip()  
                rows.append([
                    leave_name,
                    f"{type(sub_module).__name__}",
                    name,
                    leave_params
                ]
                )

            find_leave_modules(sub_module, current_path)

    find_leave_modules(modules)
    
    df = pd.DataFrame(rows, columns=['Path', 'Module_type', 'Module_name', 'Module'])
    return df

# %% ../nbs/utils.ipynb 56
def learner_module_leaves_subtables(learner, print_flag = False):
    df = pd.DataFrame(columns=['Path', 'Module_type', 'Module_name', 'Module'])
    md = learner_module_leaves(learner).drop(
            'Path', axis = 1
        ).sort_values(
            by = 'Module_type'
        )
    if print_flag: print("The layers are of this types:")

    md_types = pd.DataFrame(md['Module_type'].drop_duplicates())
    if print_flag: 
        display(md_types)
        print("And they are called with this parameters:")
    
    md_modules = pd.DataFrame(md['Module'].drop_duplicates())
    
    if print_flag: display(md_modules)
    
    return md_types, md_modules