File size: 6,217 Bytes
c176aea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
from nets.envs import SCI

import os
import numpy as np
import optuna
from optuna.trial import TrialState
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
from torchmetrics import R2Score
import neptune.new as neptune
import neptune.new.integrations.optuna as outils
from optuna.visualization import plot_contour, plot_optimization_history,plot_parallel_coordinate

DEVICE = torch.device("cpu")
BATCHSIZE = 2
DIR = os.getcwd()
EPOCHS = 10
N_TRAIN_EXAMPLES = BATCHSIZE * 10
N_VALID_EXAMPLES = BATCHSIZE * 10

class Hyper(SCI):
    """ Hyper parameter tunning class. Allows to generate best NN architecture for task. Inputs are column indexes. idx[-1] is targeted value.

    
    """
    def __init__(self,idx:tuple=(1,3,7),*args, **kwargs):
        super(Hyper,self).__init__()
        self.loader = self.data_flow(idx=idx)
        
        # call self dataflow
    def define_model(self,trial):
        # We optimize the number of layers, hidden units and 
        n_layers = trial.suggest_int("n_layers", 2, 6)
        layers = []

        in_features = self.input_dim
        
        for i in range(n_layers):
            out_features = trial.suggest_int("n_units_l{}".format(i), 4, 128)
            activations = trial.suggest_categorical('activation',['ReLU','Tanh','SiLU','SELU','RReLU'])
            
            layers.append(nn.Linear(in_features, out_features))
            layers.append(getattr(nn,activations)())
            p = trial.suggest_float("dropout_l{}".format(i), 0.0, 0.2)
            layers.append(nn.Dropout(p))

            in_features = out_features
        layers.append(nn.Linear(in_features, 1))

        return nn.Sequential(*layers)
    def objective(self,trial):
        # Generate the model.
        model = self.define_model(trial).to(DEVICE)
        mape = R2Score()
        # Generate the optimizers.
        optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD",'AdamW','Adamax','Adagrad'])
        lr = trial.suggest_float("lr", 1e-7, 1e-3, log=True)
        optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)
        
        train_loader, valid_loader = self.loader,self.loader

        # Training of the model.
        for epoch in range(EPOCHS):
            model.train()
            for batch_idx, (data, target) in enumerate(train_loader):
                # Limiting training data for faster epochs.
                if batch_idx * BATCHSIZE >= N_TRAIN_EXAMPLES:
                    break

                data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)

                optimizer.zero_grad()
                output = model(data)
                loss = F.mse_loss(output, target)
                loss.backward()
                optimizer.step()

            # Validation of the model.
            model.eval()
            correct = 0
            pred=torch.tensor([])
            targs=torch.tensor([])
            with torch.no_grad():
                for batch_idx, (data, target) in enumerate(valid_loader):
                    # Limiting validation data.
                    if batch_idx * BATCHSIZE >= N_VALID_EXAMPLES:
                        break
                    data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)
                    
                    output = model(data)
                    # print(output,target)
                    # print(output.squeeze()-target)
                    # # Get the index of the max log-probability.
                    pred = torch.cat((pred,output.squeeze()))
                    targs = torch.cat((targs,target))
                    # correct += pred.eq(target.view_as(pred)).sum().item()
            
            accuracy = mape(pred,targs)

            trial.report(accuracy, epoch)

            # Handle pruning based on the intermediate value.
            if trial.should_prune():
                raise optuna.exceptions.TrialPruned()

        return accuracy
    
    def start_study(self,n_trials:int=100,neptune_project:str=None,neptune_api:str=None):
        """ Starts study. Optionally provide your neptune repo and token for report generation.

        Args:
            n_trials (int, optional): Number of iterations. Defaults to 100.
            neptune_project (str, optional): . Defaults to None.
            neptune_api (str, optional):. Defaults to None.

        Returns:
            dict: quick report of results
        """
        
        study = optuna.create_study(direction="maximize")
        if neptune_project and neptune_api:
            run = neptune.init_run(
                project=neptune_project,
                api_token=neptune_api,
            )
            neptune_callback = outils.NeptuneCallback(run)
            study.optimize(self.objective, n_trials=n_trials, timeout=600,callbacks=[neptune_callback])
        else:
            study.optimize(self.objective, n_trials=n_trials, timeout=600)
        
       

        pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
        complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

        print("Study statistics: ")
        print("  Number of finished trials: ", len(study.trials))
        print("  Number of pruned trials: ", len(pruned_trials))
        print("  Number of complete trials: ", len(complete_trials))

        print("Best trial:")
        self.trial = study.best_trial

        print("  Value: ", self.trial.value)

        print("  Params: ")
        for key, value in self.trial.params.items():
            print("    {}: {}".format(key, value))
            
        if neptune_api and neptune_project:
            run.stop()
            
        return {"  Number of finished trials: ":len(study.trials),
                "  Number of pruned trials: ": len(pruned_trials),
                "  Number of complete trials: ": len(complete_trials),
                "Best trial score" : self.trial.value,
                "  Params: ": self.trial.params
        },plot_contour(study, params=["lr", "n_layers"]),\
        plot_optimization_history(study),\
        plot_parallel_coordinate(study)