from __future__ import absolute_import from __future__ import division from __future__ import print_function r"""Run grid search. Look at launch_tuning.sh for details on how to tune at scale. Usage example: Tune with one worker on the local machine. CONFIG="agent=c(algorithm='pg')," CONFIG+="env=c(task_cycle=['reverse-tune', 'remove-tune'])" HPARAM_SPACE_TYPE="pg" OUT_DIR="/tmp/bf_pg_tune" MAX_NPE=5000000 NUM_REPETITIONS=50 rm -rf $OUT_DIR mkdir $OUT_DIR bazel run -c opt single_task:tune -- \ --alsologtostderr \ --config="$CONFIG" \ --max_npe="$MAX_NPE" \ --num_repetitions="$NUM_REPETITIONS" \ --logdir="$OUT_DIR" \ --summary_interval=1 \ --model_v=0 \ --hparam_space="$HPARAM_SPACE_TYPE" \ --tuner_id=0 \ --num_tuners=1 \ 2>&1 >"$OUT_DIR/tuner_0.log" learning/brain/tensorboard/tensorboard.sh --port 12345 --logdir "$OUT_DIR" """ import ast import os from absl import app from absl import flags from absl import logging import numpy as np from six.moves import xrange import tensorflow as tf from single_task import defaults # brain coder from single_task import run as run_lib # brain coder FLAGS = flags.FLAGS flags.DEFINE_integer( 'tuner_id', 0, 'The unique ID for this tuning worker.') flags.DEFINE_integer( 'num_tuners', 1, 'How many tuners are there.') flags.DEFINE_string( 'hparam_space', 'default', 'String name which denotes the hparam space to tune over. This is ' 'algorithm dependent.') flags.DEFINE_string( 'fixed_hparams', '', 'HParams string. Used to fix hparams during tuning.') flags.DEFINE_float( 'success_rate_objective_weight', 1.0, 'How much to weight success rate vs num programs seen. By default, only ' 'success rate is optimized (this is the setting used in the paper).') def parse_hparams_string(hparams_str): hparams = {} for term in hparams_str.split(','): if not term: continue name, value = term.split('=') hparams[name.strip()] = ast.literal_eval(value) return hparams def int_to_multibase(n, bases): digits = [0] * len(bases) for i, b in enumerate(bases): n, d = divmod(n, b) digits[i] = d return digits def hparams_for_index(index, tuning_space): keys = sorted(tuning_space.keys()) indices = int_to_multibase(index, [len(tuning_space[k]) for k in keys]) return tf.contrib.training.HParams( **{k: tuning_space[k][i] for k, i in zip(keys, indices)}) def run_tuner_loop(ns): """Run tuning loop for this worker.""" is_chief = FLAGS.task_id == 0 tuning_space = ns.define_tuner_hparam_space( hparam_space_type=FLAGS.hparam_space) fixed_hparams = parse_hparams_string(FLAGS.fixed_hparams) for name, value in fixed_hparams.iteritems(): tuning_space[name] = [value] tuning_space_size = np.prod([len(values) for values in tuning_space.values()]) num_local_trials, remainder = divmod(tuning_space_size, FLAGS.num_tuners) if FLAGS.tuner_id < remainder: num_local_trials += 1 starting_trial_id = ( num_local_trials * FLAGS.tuner_id + min(remainder, FLAGS.tuner_id)) logging.info('tuning_space_size: %d', tuning_space_size) logging.info('num_local_trials: %d', num_local_trials) logging.info('starting_trial_id: %d', starting_trial_id) for local_trial_index in xrange(num_local_trials): trial_config = defaults.default_config_with_updates(FLAGS.config) global_trial_index = local_trial_index + starting_trial_id trial_name = 'trial_' + str(global_trial_index) trial_dir = os.path.join(FLAGS.logdir, trial_name) hparams = hparams_for_index(global_trial_index, tuning_space) ns.write_hparams_to_config( trial_config, hparams, hparam_space_type=FLAGS.hparam_space) results_list = ns.run_training( config=trial_config, tuner=None, logdir=trial_dir, is_chief=is_chief, trial_name=trial_name) if not is_chief: # Only chief worker needs to write tuning results to disk. continue objective, metrics = compute_tuning_objective( results_list, hparams, trial_name, num_trials=tuning_space_size) logging.info('metrics:\n%s', metrics) logging.info('objective: %s', objective) logging.info('programs_seen_fraction: %s', metrics['programs_seen_fraction']) logging.info('success_rate: %s', metrics['success_rate']) logging.info('success_rate_objective_weight: %s', FLAGS.success_rate_objective_weight) tuning_results_file = os.path.join(trial_dir, 'tuning_results.txt') with tf.gfile.FastGFile(tuning_results_file, 'a') as writer: writer.write(str(metrics) + '\n') logging.info('Trial %s complete.', trial_name) def compute_tuning_objective(results_list, hparams, trial_name, num_trials): """Compute tuning objective and metrics given results and trial information. Args: results_list: List of results dicts read from disk. These are written by workers. hparams: tf.contrib.training.HParams instance containing the hparams used in this trial (only the hparams which are being tuned). trial_name: Name of this trial. Used to create a trial directory. num_trials: Total number of trials that need to be run. This is saved in the metrics dict for future reference. Returns: objective: The objective computed for this trial. Choose the hparams for the trial with the largest objective value. metrics: Information about this trial. A dict. """ found_solution = [r['found_solution'] for r in results_list] successful_program_counts = [ r['npe'] for r in results_list if r['found_solution']] success_rate = sum(found_solution) / float(len(results_list)) max_programs = FLAGS.max_npe # Per run. all_program_counts = [ r['npe'] if r['found_solution'] else max_programs for r in results_list] programs_seen_fraction = ( float(sum(all_program_counts)) / (max_programs * len(all_program_counts))) # min/max/avg stats are over successful runs. metrics = { 'num_runs': len(results_list), 'num_succeeded': sum(found_solution), 'success_rate': success_rate, 'programs_seen_fraction': programs_seen_fraction, 'avg_programs': np.mean(successful_program_counts), 'max_possible_programs_per_run': max_programs, 'global_step': sum([r['num_batches'] for r in results_list]), 'hparams': hparams.values(), 'trial_name': trial_name, 'num_trials': num_trials} # Report stats per tasks. tasks = [r['task'] for r in results_list] for task in set(tasks): task_list = [r for r in results_list if r['task'] == task] found_solution = [r['found_solution'] for r in task_list] successful_rewards = [ r['best_reward'] for r in task_list if r['found_solution']] successful_num_batches = [ r['num_batches'] for r in task_list if r['found_solution']] successful_program_counts = [ r['npe'] for r in task_list if r['found_solution']] metrics_append = { task + '__num_runs': len(task_list), task + '__num_succeeded': sum(found_solution), task + '__success_rate': ( sum(found_solution) / float(len(task_list)))} metrics.update(metrics_append) if any(found_solution): metrics_append = { task + '__min_reward': min(successful_rewards), task + '__max_reward': max(successful_rewards), task + '__avg_reward': np.median(successful_rewards), task + '__min_programs': min(successful_program_counts), task + '__max_programs': max(successful_program_counts), task + '__avg_programs': np.mean(successful_program_counts), task + '__min_batches': min(successful_num_batches), task + '__max_batches': max(successful_num_batches), task + '__avg_batches': np.mean(successful_num_batches)} metrics.update(metrics_append) # Objective will be maximized. # Maximize success rate, minimize num programs seen. # Max objective is always 1. weight = FLAGS.success_rate_objective_weight objective = ( weight * success_rate + (1 - weight) * (1 - programs_seen_fraction)) metrics['objective'] = objective return objective, metrics def main(argv): del argv logging.set_verbosity(FLAGS.log_level) if not FLAGS.logdir: raise ValueError('logdir flag must be provided.') if FLAGS.num_workers <= 0: raise ValueError('num_workers flag must be greater than 0.') if FLAGS.task_id < 0: raise ValueError('task_id flag must be greater than or equal to 0.') if FLAGS.task_id >= FLAGS.num_workers: raise ValueError( 'task_id flag must be strictly less than num_workers flag.') if FLAGS.num_tuners <= 0: raise ValueError('num_tuners flag must be greater than 0.') if FLAGS.tuner_id < 0: raise ValueError('tuner_id flag must be greater than or equal to 0.') if FLAGS.tuner_id >= FLAGS.num_tuners: raise ValueError( 'tuner_id flag must be strictly less than num_tuners flag.') ns, _ = run_lib.get_namespace(FLAGS.config) run_tuner_loop(ns) if __name__ == '__main__': app.run(main)