|
|
|
|
|
|
|
import collections |
|
import dataclasses |
|
import logging |
|
import random |
|
import time |
|
|
|
from ..execution import query_available_resources, submit_models |
|
from ..graph import ModelStatus |
|
from .base import BaseStrategy |
|
from .utils import dry_run_for_search_space, get_targeted_model |
|
|
|
|
|
_logger = logging.getLogger(__name__) |
|
|
|
|
|
@dataclasses.dataclass |
|
class Individual: |
|
""" |
|
A class that represents an individual. |
|
Holds two attributes, where ``x`` is the model and ``y`` is the metric (e.g., accuracy). |
|
""" |
|
x: dict |
|
y: float |
|
|
|
|
|
class RegularizedEvolution(BaseStrategy): |
|
""" |
|
Algorithm for regularized evolution (i.e. aging evolution). |
|
Follows "Algorithm 1" in Real et al. "Regularized Evolution for Image Classifier Architecture Search". |
|
|
|
Parameters |
|
---------- |
|
optimize_mode : str |
|
Can be one of "maximize" and "minimize". Default: maximize. |
|
population_size : int |
|
The number of individuals to keep in the population. Default: 100. |
|
cycles : int |
|
The number of cycles (trials) the algorithm should run for. Default: 20000. |
|
sample_size : int |
|
The number of individuals that should participate in each tournament. Default: 25. |
|
mutation_prob : float |
|
Probability that mutation happens in each dim. Default: 0.05 |
|
on_failure : str |
|
Can be one of "ignore" and "worst". If "ignore", simply give up the model and find a new one. |
|
If "worst", mark the model as -inf (if maximize, inf if minimize), so that the algorithm "learns" to avoid such model. |
|
Default: ignore. |
|
""" |
|
|
|
def __init__(self, optimize_mode='maximize', population_size=100, sample_size=25, cycles=20000, |
|
mutation_prob=0.05, on_failure='ignore'): |
|
assert optimize_mode in ['maximize', 'minimize'] |
|
assert on_failure in ['ignore', 'worst'] |
|
assert sample_size < population_size |
|
self.optimize_mode = optimize_mode |
|
self.population_size = population_size |
|
self.sample_size = sample_size |
|
self.cycles = cycles |
|
self.mutation_prob = mutation_prob |
|
self.on_failure = on_failure |
|
|
|
self._worst = float('-inf') if self.optimize_mode == 'maximize' else float('inf') |
|
|
|
self._success_count = 0 |
|
self._population = collections.deque() |
|
self._running_models = [] |
|
self._polling_interval = 2. |
|
|
|
def random(self, search_space): |
|
return {k: random.choice(v) for k, v in search_space.items()} |
|
|
|
def mutate(self, parent, search_space): |
|
child = {} |
|
for k, v in parent.items(): |
|
if random.uniform(0, 1) < self.mutation_prob: |
|
|
|
|
|
child[k] = random.choice(search_space[k]) |
|
else: |
|
child[k] = v |
|
return child |
|
|
|
def best_parent(self): |
|
samples = [p for p in self._population] |
|
random.shuffle(samples) |
|
samples = list(samples)[:self.sample_size] |
|
if self.optimize_mode == 'maximize': |
|
parent = max(samples, key=lambda sample: sample.y) |
|
else: |
|
parent = min(samples, key=lambda sample: sample.y) |
|
return parent.x |
|
|
|
def run(self, base_model, applied_mutators): |
|
search_space = dry_run_for_search_space(base_model, applied_mutators) |
|
|
|
_logger.info('Initializing the first population.') |
|
while len(self._population) + len(self._running_models) <= self.population_size: |
|
|
|
while len(self._population) + len(self._running_models) < self.population_size: |
|
config = self.random(search_space) |
|
self._submit_config(config, base_model, applied_mutators) |
|
|
|
self._move_succeeded_models_to_population() |
|
self._remove_failed_models_from_running_list() |
|
time.sleep(self._polling_interval) |
|
|
|
if len(self._population) >= self.population_size: |
|
break |
|
|
|
|
|
_logger.info('Running mutations.') |
|
while self._success_count + len(self._running_models) <= self.cycles: |
|
|
|
while query_available_resources() > 0 and self._success_count + len(self._running_models) < self.cycles: |
|
config = self.mutate(self.best_parent(), search_space) |
|
self._submit_config(config, base_model, applied_mutators) |
|
|
|
self._move_succeeded_models_to_population() |
|
self._remove_failed_models_from_running_list() |
|
time.sleep(self._polling_interval) |
|
|
|
if self._success_count >= self.cycles: |
|
break |
|
|
|
def _submit_config(self, config, base_model, mutators): |
|
_logger.debug('Model submitted to running queue: %s', config) |
|
model = get_targeted_model(base_model, mutators, config) |
|
submit_models(model) |
|
self._running_models.append((config, model)) |
|
return model |
|
|
|
def _move_succeeded_models_to_population(self): |
|
completed_indices = [] |
|
for i, (config, model) in enumerate(self._running_models): |
|
metric = None |
|
if self.on_failure == 'worst' and model.status == ModelStatus.Failed: |
|
metric = self._worst |
|
elif model.status == ModelStatus.Trained: |
|
metric = model.metric |
|
if metric is not None: |
|
individual = Individual(config, metric) |
|
_logger.debug('Individual created: %s', str(individual)) |
|
self._population.append(individual) |
|
if len(self._population) > self.population_size: |
|
self._population.popleft() |
|
completed_indices.append(i) |
|
for i in completed_indices[::-1]: |
|
|
|
self._success_count += 1 |
|
self._running_models.pop(i) |
|
|
|
def _remove_failed_models_from_running_list(self): |
|
|
|
|
|
if self.on_failure == 'ignore': |
|
number_of_failed_models = len([g for g in self._running_models if g[1].status == ModelStatus.Failed]) |
|
self._running_models = [g for g in self._running_models if g[1].status != ModelStatus.Failed] |
|
if number_of_failed_models > 0: |
|
_logger.info('%d failed models are ignored. Will retry.', number_of_failed_models) |
|
|