Spaces:

LINC-BIT
/

EdgeTA

Running

App Files Files Community

EdgeTA / utils /third_party /nni_new /retiarii /strategy /evolution.py

LINC-BIT

Upload 1912 files

b84549f verified over 1 year ago

raw

history blame contribute delete

6.84 kB

	# Copyright (c) Microsoft Corporation.
	# Licensed under the MIT license.

	import collections
	import dataclasses
	import logging
	import random
	import time

	from ..execution import query_available_resources, submit_models
	from ..graph import ModelStatus
	from .base import BaseStrategy
	from .utils import dry_run_for_search_space, get_targeted_model


	_logger = logging.getLogger(__name__)


	@dataclasses.dataclass
	class Individual:
	"""
	A class that represents an individual.
	Holds two attributes, where ``x`` is the model and ``y`` is the metric (e.g., accuracy).
	"""
	x: dict
	y: float


	class RegularizedEvolution(BaseStrategy):
	"""
	Algorithm for regularized evolution (i.e. aging evolution).
	Follows "Algorithm 1" in Real et al. "Regularized Evolution for Image Classifier Architecture Search".

	Parameters
	----------
	optimize_mode : str
	Can be one of "maximize" and "minimize". Default: maximize.
	population_size : int
	The number of individuals to keep in the population. Default: 100.
	cycles : int
	The number of cycles (trials) the algorithm should run for. Default: 20000.
	sample_size : int
	The number of individuals that should participate in each tournament. Default: 25.
	mutation_prob : float
	Probability that mutation happens in each dim. Default: 0.05
	on_failure : str
	Can be one of "ignore" and "worst". If "ignore", simply give up the model and find a new one.
	If "worst", mark the model as -inf (if maximize, inf if minimize), so that the algorithm "learns" to avoid such model.
	Default: ignore.
	"""

	def __init__(self, optimize_mode='maximize', population_size=100, sample_size=25, cycles=20000,
	mutation_prob=0.05, on_failure='ignore'):
	assert optimize_mode in ['maximize', 'minimize']
	assert on_failure in ['ignore', 'worst']
	assert sample_size < population_size
	self.optimize_mode = optimize_mode
	self.population_size = population_size
	self.sample_size = sample_size
	self.cycles = cycles
	self.mutation_prob = mutation_prob
	self.on_failure = on_failure

	self._worst = float('-inf') if self.optimize_mode == 'maximize' else float('inf')

	self._success_count = 0
	self._population = collections.deque()
	self._running_models = []
	self._polling_interval = 2.

	def random(self, search_space):
	return {k: random.choice(v) for k, v in search_space.items()}

	def mutate(self, parent, search_space):
	child = {}
	for k, v in parent.items():
	if random.uniform(0, 1) < self.mutation_prob:
	# NOTE: we do not exclude the original choice here for simplicity,
	# which is slightly different from the original paper.
	child[k] = random.choice(search_space[k])
	else:
	child[k] = v
	return child

	def best_parent(self):
	samples = [p for p in self._population] # copy population
	random.shuffle(samples)
	samples = list(samples)[:self.sample_size]
	if self.optimize_mode == 'maximize':
	parent = max(samples, key=lambda sample: sample.y)
	else:
	parent = min(samples, key=lambda sample: sample.y)
	return parent.x

	def run(self, base_model, applied_mutators):
	search_space = dry_run_for_search_space(base_model, applied_mutators)
	# Run the first population regardless concurrency
	_logger.info('Initializing the first population.')
	while len(self._population) + len(self._running_models) <= self.population_size:
	# try to submit new models
	while len(self._population) + len(self._running_models) < self.population_size:
	config = self.random(search_space)
	self._submit_config(config, base_model, applied_mutators)
	# collect results
	self._move_succeeded_models_to_population()
	self._remove_failed_models_from_running_list()
	time.sleep(self._polling_interval)

	if len(self._population) >= self.population_size:
	break

	# Resource-aware mutation of models
	_logger.info('Running mutations.')
	while self._success_count + len(self._running_models) <= self.cycles:
	# try to submit new models
	while query_available_resources() > 0 and self._success_count + len(self._running_models) < self.cycles:
	config = self.mutate(self.best_parent(), search_space)
	self._submit_config(config, base_model, applied_mutators)
	# collect results
	self._move_succeeded_models_to_population()
	self._remove_failed_models_from_running_list()
	time.sleep(self._polling_interval)

	if self._success_count >= self.cycles:
	break

	def _submit_config(self, config, base_model, mutators):
	_logger.debug('Model submitted to running queue: %s', config)
	model = get_targeted_model(base_model, mutators, config)
	submit_models(model)
	self._running_models.append((config, model))
	return model

	def _move_succeeded_models_to_population(self):
	completed_indices = []
	for i, (config, model) in enumerate(self._running_models):
	metric = None
	if self.on_failure == 'worst' and model.status == ModelStatus.Failed:
	metric = self._worst
	elif model.status == ModelStatus.Trained:
	metric = model.metric
	if metric is not None:
	individual = Individual(config, metric)
	_logger.debug('Individual created: %s', str(individual))
	self._population.append(individual)
	if len(self._population) > self.population_size:
	self._population.popleft()
	completed_indices.append(i)
	for i in completed_indices[::-1]:
	# delete from end to start so that the index number will not be affected.
	self._success_count += 1
	self._running_models.pop(i)

	def _remove_failed_models_from_running_list(self):
	# This is only done when on_failure policy is set to "ignore".
	# Otherwise, failed models will be treated as inf when processed.
	if self.on_failure == 'ignore':
	number_of_failed_models = len([g for g in self._running_models if g[1].status == ModelStatus.Failed])
	self._running_models = [g for g in self._running_models if g[1].status != ModelStatus.Failed]
	if number_of_failed_models > 0:
	_logger.info('%d failed models are ignored. Will retry.', number_of_failed_models)