File size: 6,284 Bytes
783053f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b63fd37
783053f
b63fd37
783053f
b63fd37
783053f
b63fd37
783053f
b63fd37
783053f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b63fd37
 
783053f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b63fd37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
783053f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
from fake_face_detection.utils.generation import PI_generate_sample as generate_sample
from fake_face_detection.utils.acquisitions import PI_acquisition as acquisition
from fake_face_detection.utils.sampling import get_random_samples
from sklearn.gaussian_process import GaussianProcessRegressor
from functools import partial
from typing import *
import pandas as pd
import numpy as np
import string
import random
import pickle
import os

letters = string.ascii_letters + string.digits

class SimpleBayesianOptimizationForFakeReal:
    
    def __init__(self, objective: Callable, search_spaces: dict, maximize: bool = True, random_kwargs: dict = {}, kwargs: dict = {}, checkpoint: str = "data/trials/checkpoint.txt"):
        
        # recuperate the optimization strategy
        self.maximize = maximize
        
        # checkpoint where the data and score will be saved
        self.checkpoint = checkpoint
        
        # initialize the search spaces
        self.search_spaces = search_spaces
        
        # recuperate the random kwargs
        self.random_kwargs = random_kwargs
        
        # initialize the objective function
        self.objective = objective
        
        # initialize the kwargs
        self.kwargs = kwargs
        
        # initialize the model
        self.model = GaussianProcessRegressor()
        
        # initialize the random kwargs with a random values
        random_kwargs = {key: value + ''.join(random.choice(letters) for i in range(7)) for key, value in self.random_kwargs.items()}
        
        # add random kwargs to the kwargs
        self.kwargs.update(random_kwargs)
        
        # recuperate random sample
        config = get_random_samples(search_spaces)
        
        if os.path.exists(self.checkpoint):
            
            with open(self.checkpoint, 'rb') as f:
            
                pickler = pickle.Unpickler(f)
                
                checkpoint = pickler.load()
                
                self.data = checkpoint['data']
                
                self.scores = checkpoint['scores']
                
                self.model = checkpoint['model']
                
                self.current_trial = checkpoint['trial']
                
                print(f"Checkpoint loaded at trial {self.current_trial}")
        
        else:
            
            # add config to kwargs
            self.kwargs['config'] = config
            
            # calculate the first score
            score = self.objective(**self.kwargs)
            
            # initialize the input data
            self.data = [list(config.values())]
            
            # initialize the scores
            self.scores = [[score]]
            
            # fit the model with the input data and the target
            self.model.fit(self.data, self.scores)
            
            # initialize the number of trials to zero
            self.current_trial = 0
            
            with open(self.checkpoint, 'wb') as f:
                
                pickler = pickle.Pickler(f)
                
                checkpoint = {
                    'data': self.data,
                    'scores': self.scores,
                    'model': self.model,
                    'trial': self.current_trial
                }
                
                pickler.dump(checkpoint)
    
    def optimize(self, n_trials: int = 50, n_tests: int = 100):
        """Finding the best hyperparameters with the Bayesian Optimization

        Args:
            n_trials (int, optional): The number of trials. Defaults to 50.
            n_tests (int, optional): The number of random samples to test for each trial. Defaults to 100.
        """
        
        # let us make multiple trials in order to find the best params
        for trial in range(self.current_trial + 1, self.current_trial + n_trials + 1):
            
            # let us generate new samples with the acquisition and the surrogate functions
            new_sample = generate_sample(self.data, self.model, self.search_spaces, n_tests, maximize = self.maximize)
            config = {key: new_sample[i] for i, key in enumerate(self.search_spaces)}
            
            # recuperate a new score
            new_score = self.get_score(config)
            
            # let us add the new sample, target and score to their lists
            self.data.append(new_sample)
            
            self.scores.append([new_score])
            
            # let us train again the model
            self.model.fit(self.data, self.scores)
            
            # recuperate the current trial
            self.current_trial = trial
        
            with open(self.checkpoint, 'wb') as f:
                
                pickler = pickle.Pickler(f)
                
                checkpoint = {
                    'data': self.data,
                    'scores': self.scores,
                    'model': self.model,
                    'trial': self.current_trial
                }
                
                pickler.dump(checkpoint)
    
    def get_score(self, config: dict):
        
        # add random seed (since we have always the same problem of randomizing the seed)
        random.seed(None)
        
        # initialize the random kwargs with a random values
        random_kwargs = {key: value + ''.join(random.choice(letters) for i in range(7)) for key, value in self.random_kwargs.items()}
        print(random_kwargs)
        # add random kwargs to the kwargs
        self.kwargs.update(random_kwargs)
        
        # add config to kwargs
        self.kwargs['config'] = config
        
        # calculate the first score
        new_score = self.objective(**self.kwargs)
        
        return new_score
        
    def get_results(self):
        """Recuperate the generated samples and the scores

        Returns:
            pd.DataFrame: A data frame containing the results
        """
        # let us return the results as a data frame
        data = {key: np.array(self.data, dtype = object)[:, i] for i, key in enumerate(self.search_spaces)}
        
        data.update({'score': np.array(self.scores)[:, 0]})
        
        return pd.DataFrame(data)