Spaces:
Build error
Build error
File size: 6,284 Bytes
783053f b63fd37 783053f b63fd37 783053f b63fd37 783053f b63fd37 783053f b63fd37 783053f b63fd37 783053f b63fd37 783053f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 |
from fake_face_detection.utils.generation import PI_generate_sample as generate_sample
from fake_face_detection.utils.acquisitions import PI_acquisition as acquisition
from fake_face_detection.utils.sampling import get_random_samples
from sklearn.gaussian_process import GaussianProcessRegressor
from functools import partial
from typing import *
import pandas as pd
import numpy as np
import string
import random
import pickle
import os
letters = string.ascii_letters + string.digits
class SimpleBayesianOptimizationForFakeReal:
def __init__(self, objective: Callable, search_spaces: dict, maximize: bool = True, random_kwargs: dict = {}, kwargs: dict = {}, checkpoint: str = "data/trials/checkpoint.txt"):
# recuperate the optimization strategy
self.maximize = maximize
# checkpoint where the data and score will be saved
self.checkpoint = checkpoint
# initialize the search spaces
self.search_spaces = search_spaces
# recuperate the random kwargs
self.random_kwargs = random_kwargs
# initialize the objective function
self.objective = objective
# initialize the kwargs
self.kwargs = kwargs
# initialize the model
self.model = GaussianProcessRegressor()
# initialize the random kwargs with a random values
random_kwargs = {key: value + ''.join(random.choice(letters) for i in range(7)) for key, value in self.random_kwargs.items()}
# add random kwargs to the kwargs
self.kwargs.update(random_kwargs)
# recuperate random sample
config = get_random_samples(search_spaces)
if os.path.exists(self.checkpoint):
with open(self.checkpoint, 'rb') as f:
pickler = pickle.Unpickler(f)
checkpoint = pickler.load()
self.data = checkpoint['data']
self.scores = checkpoint['scores']
self.model = checkpoint['model']
self.current_trial = checkpoint['trial']
print(f"Checkpoint loaded at trial {self.current_trial}")
else:
# add config to kwargs
self.kwargs['config'] = config
# calculate the first score
score = self.objective(**self.kwargs)
# initialize the input data
self.data = [list(config.values())]
# initialize the scores
self.scores = [[score]]
# fit the model with the input data and the target
self.model.fit(self.data, self.scores)
# initialize the number of trials to zero
self.current_trial = 0
with open(self.checkpoint, 'wb') as f:
pickler = pickle.Pickler(f)
checkpoint = {
'data': self.data,
'scores': self.scores,
'model': self.model,
'trial': self.current_trial
}
pickler.dump(checkpoint)
def optimize(self, n_trials: int = 50, n_tests: int = 100):
"""Finding the best hyperparameters with the Bayesian Optimization
Args:
n_trials (int, optional): The number of trials. Defaults to 50.
n_tests (int, optional): The number of random samples to test for each trial. Defaults to 100.
"""
# let us make multiple trials in order to find the best params
for trial in range(self.current_trial + 1, self.current_trial + n_trials + 1):
# let us generate new samples with the acquisition and the surrogate functions
new_sample = generate_sample(self.data, self.model, self.search_spaces, n_tests, maximize = self.maximize)
config = {key: new_sample[i] for i, key in enumerate(self.search_spaces)}
# recuperate a new score
new_score = self.get_score(config)
# let us add the new sample, target and score to their lists
self.data.append(new_sample)
self.scores.append([new_score])
# let us train again the model
self.model.fit(self.data, self.scores)
# recuperate the current trial
self.current_trial = trial
with open(self.checkpoint, 'wb') as f:
pickler = pickle.Pickler(f)
checkpoint = {
'data': self.data,
'scores': self.scores,
'model': self.model,
'trial': self.current_trial
}
pickler.dump(checkpoint)
def get_score(self, config: dict):
# add random seed (since we have always the same problem of randomizing the seed)
random.seed(None)
# initialize the random kwargs with a random values
random_kwargs = {key: value + ''.join(random.choice(letters) for i in range(7)) for key, value in self.random_kwargs.items()}
print(random_kwargs)
# add random kwargs to the kwargs
self.kwargs.update(random_kwargs)
# add config to kwargs
self.kwargs['config'] = config
# calculate the first score
new_score = self.objective(**self.kwargs)
return new_score
def get_results(self):
"""Recuperate the generated samples and the scores
Returns:
pd.DataFrame: A data frame containing the results
"""
# let us return the results as a data frame
data = {key: np.array(self.data, dtype = object)[:, i] for i, key in enumerate(self.search_spaces)}
data.update({'score': np.array(self.scores)[:, 0]})
return pd.DataFrame(data)
|