Spaces:

Oumar199
/

Fake-Real-Face-Detection

Build error

File size: 6,284 Bytes

from fake_face_detection.utils.generation import PI_generate_sample as generate_sample
from fake_face_detection.utils.acquisitions import PI_acquisition as acquisition
from fake_face_detection.utils.sampling import get_random_samples
from sklearn.gaussian_process import GaussianProcessRegressor
from functools import partial
from typing import *
import pandas as pd
import numpy as np
import string
import random
import pickle
import os

letters = string.ascii_letters + string.digits

class SimpleBayesianOptimizationForFakeReal:
    
    def __init__(self, objective: Callable, search_spaces: dict, maximize: bool = True, random_kwargs: dict = {}, kwargs: dict = {}, checkpoint: str = "data/trials/checkpoint.txt"):
        
        # recuperate the optimization strategy
        self.maximize = maximize
        
        # checkpoint where the data and score will be saved
        self.checkpoint = checkpoint
        
        # initialize the search spaces
        self.search_spaces = search_spaces
        
        # recuperate the random kwargs
        self.random_kwargs = random_kwargs
        
        # initialize the objective function
        self.objective = objective
        
        # initialize the kwargs
        self.kwargs = kwargs
        
        # initialize the model
        self.model = GaussianProcessRegressor()
        
        # initialize the random kwargs with a random values
        random_kwargs = {key: value + ''.join(random.choice(letters) for i in range(7)) for key, value in self.random_kwargs.items()}
        
        # add random kwargs to the kwargs
        self.kwargs.update(random_kwargs)
        
        # recuperate random sample
        config = get_random_samples(search_spaces)
        
        if os.path.exists(self.checkpoint):
            
            with open(self.checkpoint, 'rb') as f:
            
                pickler = pickle.Unpickler(f)
                
                checkpoint = pickler.load()
                
                self.data = checkpoint['data']
                
                self.scores = checkpoint['scores']
                
                self.model = checkpoint['model']
                
                self.current_trial = checkpoint['trial']
                
                print(f"Checkpoint loaded at trial {self.current_trial}")
        
        else:
            
            # add config to kwargs
            self.kwargs['config'] = config
            
            # calculate the first score
            score = self.objective(**self.kwargs)
            
            # initialize the input data
            self.data = [list(config.values())]
            
            # initialize the scores
            self.scores = [[score]]
            
            # fit the model with the input data and the target
            self.model.fit(self.data, self.scores)
            
            # initialize the number of trials to zero
            self.current_trial = 0
            
            with open(self.checkpoint, 'wb') as f:
                
                pickler = pickle.Pickler(f)
                
                checkpoint = {
                    'data': self.data,
                    'scores': self.scores,
                    'model': self.model,
                    'trial': self.current_trial
                }
                
                pickler.dump(checkpoint)
    
    def optimize(self, n_trials: int = 50, n_tests: int = 100):
        """Finding the best hyperparameters with the Bayesian Optimization

        Args:
            n_trials (int, optional): The number of trials. Defaults to 50.
            n_tests (int, optional): The number of random samples to test for each trial. Defaults to 100.
        """
        
        # let us make multiple trials in order to find the best params
        for trial in range(self.current_trial + 1, self.current_trial + n_trials + 1):
            
            # let us generate new samples with the acquisition and the surrogate functions
            new_sample = generate_sample(self.data, self.model, self.search_spaces, n_tests, maximize = self.maximize)
            config = {key: new_sample[i] for i, key in enumerate(self.search_spaces)}
            
            # recuperate a new score
            new_score = self.get_score(config)
            
            # let us add the new sample, target and score to their lists
            self.data.append(new_sample)
            
            self.scores.append([new_score])
            
            # let us train again the model
            self.model.fit(self.data, self.scores)
            
            # recuperate the current trial
            self.current_trial = trial
        
            with open(self.checkpoint, 'wb') as f:
                
                pickler = pickle.Pickler(f)
                
                checkpoint = {
                    'data': self.data,
                    'scores': self.scores,
                    'model': self.model,
                    'trial': self.current_trial
                }
                
                pickler.dump(checkpoint)
    
    def get_score(self, config: dict):
        
        # add random seed (since we have always the same problem of randomizing the seed)
        random.seed(None)
        
        # initialize the random kwargs with a random values
        random_kwargs = {key: value + ''.join(random.choice(letters) for i in range(7)) for key, value in self.random_kwargs.items()}
        print(random_kwargs)
        # add random kwargs to the kwargs
        self.kwargs.update(random_kwargs)
        
        # add config to kwargs
        self.kwargs['config'] = config
        
        # calculate the first score
        new_score = self.objective(**self.kwargs)
        
        return new_score
        
    def get_results(self):
        """Recuperate the generated samples and the scores

        Returns:
            pd.DataFrame: A data frame containing the results
        """
        # let us return the results as a data frame
        data = {key: np.array(self.data, dtype = object)[:, i] for i, key in enumerate(self.search_spaces)}
        
        data.update({'score': np.array(self.scores)[:, 0]})
        
        return pd.DataFrame(data)