|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Executes Keras benchmarks and accuracy tests.""" |
|
|
|
from __future__ import print_function |
|
|
|
import json |
|
import os |
|
import time |
|
|
|
from typing import Any, MutableMapping, Optional |
|
|
|
from absl import flags |
|
import tensorflow as tf |
|
|
|
from official.benchmark import benchmark_wrappers |
|
from official.benchmark import keras_benchmark |
|
from official.benchmark.models import resnet_imagenet_main |
|
from official.vision.image_classification import classifier_trainer |
|
|
|
MIN_TOP_1_ACCURACY = 0.76 |
|
MAX_TOP_1_ACCURACY = 0.77 |
|
|
|
MOBILENET_V1_MIN_TOP_1_ACCURACY = 0.65 |
|
MOBILENET_V1_MAX_TOP_1_ACCURACY = 0.68 |
|
|
|
|
|
|
|
MODEL_OPTIMIZATION_TOP_1_ACCURACY = { |
|
'RESNET50_FINETUNE_PRUNING': (0.76, 0.77), |
|
'MOBILENET_V1_FINETUNE_PRUNING': (0.67, 0.68), |
|
} |
|
|
|
FLAGS = flags.FLAGS |
|
|
|
|
|
def _get_classifier_parameters( |
|
num_gpus: int = 0, |
|
builder: str = 'records', |
|
skip_eval: bool = False, |
|
distribution_strategy: str = 'mirrored', |
|
per_replica_batch_size: int = 128, |
|
epochs: int = 90, |
|
steps: int = 0, |
|
epochs_between_evals: int = 1, |
|
dtype: str = 'float32', |
|
enable_xla: bool = False, |
|
run_eagerly: bool = False, |
|
gpu_thread_mode: Optional[str] = None, |
|
dataset_num_private_threads: Optional[int] = None, |
|
loss_scale: Optional[str] = None, |
|
report_metrics: bool = True, |
|
batchnorm_spatial_persistent: bool = False) -> MutableMapping[str, Any]: |
|
"""Gets classifier trainer's ResNet parameters.""" |
|
return { |
|
'runtime': { |
|
'num_gpus': num_gpus, |
|
'distribution_strategy': distribution_strategy, |
|
'run_eagerly': run_eagerly, |
|
'enable_xla': enable_xla, |
|
'dataset_num_private_threads': dataset_num_private_threads, |
|
'gpu_thread_mode': gpu_thread_mode, |
|
'loss_scale': loss_scale, |
|
'batchnorm_spatial_persistent': batchnorm_spatial_persistent, |
|
}, |
|
'train_dataset': { |
|
'builder': builder, |
|
'use_per_replica_batch_size': True, |
|
'batch_size': per_replica_batch_size, |
|
'image_size': 224, |
|
'dtype': dtype, |
|
}, |
|
'validation_dataset': { |
|
'builder': builder, |
|
'batch_size': per_replica_batch_size, |
|
'use_per_replica_batch_size': True, |
|
'image_size': 224, |
|
'dtype': dtype, |
|
}, |
|
'train': { |
|
'epochs': epochs, |
|
'steps': steps, |
|
'callbacks': { |
|
'enable_tensorboard': False, |
|
'enable_checkpoint_and_export': False, |
|
'enable_time_history': True, |
|
}, |
|
'metrics': ['accuracy'] if report_metrics else [], |
|
}, |
|
'model': { |
|
'loss': { |
|
'label_smoothing': 0.1, |
|
}, |
|
}, |
|
'evaluation': { |
|
'epochs_between_evals': epochs_between_evals, |
|
'skip_eval': skip_eval, |
|
}, |
|
} |
|
|
|
|
|
class Resnet50KerasAccuracy(keras_benchmark.KerasBenchmark): |
|
"""Benchmark accuracy tests for ResNet50 in Keras.""" |
|
|
|
def __init__(self, |
|
output_dir: Optional[str] = None, |
|
root_data_dir: Optional[str] = None, |
|
**kwargs): |
|
"""A benchmark class. |
|
|
|
Args: |
|
output_dir: directory where to output e.g. log files |
|
root_data_dir: directory under which to look for dataset |
|
**kwargs: arbitrary named arguments. This is needed to make the |
|
constructor forward compatible in case PerfZero provides more |
|
named arguments before updating the constructor. |
|
""" |
|
|
|
flag_methods = [classifier_trainer.define_classifier_flags] |
|
|
|
self.data_dir = os.path.join(root_data_dir, 'imagenet') |
|
super(Resnet50KerasAccuracy, self).__init__( |
|
output_dir=output_dir, flag_methods=flag_methods) |
|
|
|
@benchmark_wrappers.enable_runtime_flags |
|
def _run_and_report_benchmark( |
|
self, |
|
experiment_name: str, |
|
top_1_min: float = MIN_TOP_1_ACCURACY, |
|
top_1_max: float = MAX_TOP_1_ACCURACY, |
|
num_gpus: int = 0, |
|
distribution_strategy: str = 'mirrored', |
|
per_replica_batch_size: int = 128, |
|
epochs: int = 90, |
|
steps: int = 0, |
|
epochs_between_evals: int = 1, |
|
dtype: str = 'float32', |
|
enable_xla: bool = False, |
|
run_eagerly: bool = False, |
|
gpu_thread_mode: Optional[str] = None, |
|
dataset_num_private_threads: Optional[int] = None, |
|
loss_scale: Optional[str] = None): |
|
"""Runs and reports the benchmark given the provided configuration.""" |
|
FLAGS.model_type = 'resnet' |
|
FLAGS.dataset = 'imagenet' |
|
FLAGS.mode = 'train_and_eval' |
|
FLAGS.data_dir = self.data_dir |
|
FLAGS.model_dir = self._get_model_dir(experiment_name) |
|
parameters = _get_classifier_parameters( |
|
num_gpus=num_gpus, |
|
distribution_strategy=distribution_strategy, |
|
per_replica_batch_size=per_replica_batch_size, |
|
epochs=epochs, |
|
steps=steps, |
|
epochs_between_evals=epochs_between_evals, |
|
dtype=dtype, |
|
enable_xla=enable_xla, |
|
run_eagerly=run_eagerly, |
|
gpu_thread_mode=gpu_thread_mode, |
|
dataset_num_private_threads=dataset_num_private_threads, |
|
report_metrics=True, |
|
loss_scale=loss_scale, |
|
batchnorm_spatial_persistent=True) |
|
FLAGS.params_override = json.dumps(parameters) |
|
total_batch_size = num_gpus * per_replica_batch_size |
|
|
|
start_time_sec = time.time() |
|
stats = classifier_trainer.run(flags.FLAGS) |
|
wall_time_sec = time.time() - start_time_sec |
|
|
|
super(Resnet50KerasAccuracy, self)._report_benchmark( |
|
stats, |
|
wall_time_sec, |
|
top_1_min=top_1_min, |
|
top_1_max=top_1_max, |
|
total_batch_size=total_batch_size, |
|
log_steps=100) |
|
|
|
def benchmark_8_gpu(self): |
|
"""Tests Keras model with eager, dist_strat and 8 GPUs.""" |
|
self._setup() |
|
self._run_and_report_benchmark( |
|
experiment_name='benchmark_8_gpu', |
|
num_gpus=8, |
|
per_replica_batch_size=128, |
|
epochs=90, |
|
epochs_between_evals=10, |
|
dtype='float32') |
|
|
|
def benchmark_8_gpu_fp16(self): |
|
"""Tests Keras model with eager, dist_strat, 8 GPUs, and fp16.""" |
|
self._setup() |
|
self._run_and_report_benchmark( |
|
experiment_name='benchmark_8_gpu_fp16', |
|
num_gpus=8, |
|
per_replica_batch_size=256, |
|
epochs=90, |
|
epochs_between_evals=10, |
|
dtype='float16') |
|
|
|
def benchmark_xla_8_gpu_fp16(self): |
|
"""Tests Keras model with XLA, eager, dist_strat, 8 GPUs and fp16.""" |
|
self._setup() |
|
self._run_and_report_benchmark( |
|
experiment_name='benchmark_xla_8_gpu_fp16', |
|
num_gpus=8, |
|
per_replica_batch_size=256, |
|
epochs=90, |
|
epochs_between_evals=10, |
|
dtype='float16', |
|
enable_xla=True) |
|
|
|
def benchmark_xla_8_gpu_fp16_dynamic(self): |
|
"""Tests Keras model with XLA, eager, dist_strat, 8 GPUs, dynamic fp16.""" |
|
self._setup() |
|
self._run_and_report_benchmark( |
|
experiment_name='benchmark_xla_8_gpu_fp16_dynamic', |
|
top_1_min=0.736, |
|
num_gpus=8, |
|
per_replica_batch_size=256, |
|
epochs=90, |
|
epochs_between_evals=10, |
|
dtype='float16', |
|
loss_scale='dynamic') |
|
|
|
def _get_model_dir(self, folder_name): |
|
return os.path.join(self.output_dir, folder_name) |
|
|
|
|
|
class MobilenetV1KerasAccuracy(keras_benchmark.KerasBenchmark): |
|
"""Benchmark accuracy tests for MobilenetV1 in Keras.""" |
|
|
|
def __init__(self, output_dir=None, root_data_dir=None, **kwargs): |
|
"""A benchmark class. |
|
|
|
Args: |
|
output_dir: directory where to output e.g. log files |
|
root_data_dir: directory under which to look for dataset |
|
**kwargs: arbitrary named arguments. This is needed to make the |
|
constructor forward compatible in case PerfZero provides more |
|
named arguments before updating the constructor. |
|
""" |
|
|
|
flag_methods = [resnet_imagenet_main.define_imagenet_keras_flags] |
|
|
|
self.data_dir = os.path.join(root_data_dir, 'imagenet') |
|
super(MobilenetV1KerasAccuracy, self).__init__( |
|
output_dir=output_dir, |
|
flag_methods=flag_methods, |
|
default_flags={ |
|
'model': 'mobilenet', |
|
'optimizer': 'mobilenet_default', |
|
'initial_learning_rate_per_sample': 0.00039, |
|
}) |
|
|
|
def benchmark_8_gpu(self): |
|
"""Test Keras model with eager, dist_strat and 8 GPUs.""" |
|
self._setup() |
|
FLAGS.num_gpus = 8 |
|
FLAGS.data_dir = self.data_dir |
|
FLAGS.batch_size = 128 * 8 |
|
FLAGS.train_epochs = 90 |
|
FLAGS.epochs_between_evals = 10 |
|
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu') |
|
FLAGS.dtype = 'fp32' |
|
FLAGS.enable_eager = True |
|
self._run_and_report_benchmark() |
|
|
|
@benchmark_wrappers.enable_runtime_flags |
|
def _run_and_report_benchmark(self, |
|
top_1_min=MOBILENET_V1_MIN_TOP_1_ACCURACY, |
|
top_1_max=MOBILENET_V1_MAX_TOP_1_ACCURACY): |
|
start_time_sec = time.time() |
|
stats = resnet_imagenet_main.run(flags.FLAGS) |
|
wall_time_sec = time.time() - start_time_sec |
|
|
|
super(MobilenetV1KerasAccuracy, self)._report_benchmark( |
|
stats, |
|
wall_time_sec, |
|
top_1_min=top_1_min, |
|
top_1_max=top_1_max, |
|
total_batch_size=FLAGS.batch_size, |
|
log_steps=100) |
|
|
|
def _get_model_dir(self, folder_name): |
|
return os.path.join(self.output_dir, folder_name) |
|
|
|
|
|
class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark): |
|
"""Resnet50 (classifier_trainer) benchmarks.""" |
|
|
|
def __init__(self, output_dir=None, default_flags=None, |
|
tpu=None, dataset_builder='records', train_epochs=1, |
|
train_steps=110, data_dir=None): |
|
flag_methods = [classifier_trainer.define_classifier_flags] |
|
|
|
self.dataset_builder = dataset_builder |
|
self.train_epochs = train_epochs |
|
self.train_steps = train_steps |
|
self.data_dir = data_dir |
|
|
|
super(Resnet50KerasClassifierBenchmarkBase, self).__init__( |
|
output_dir=output_dir, |
|
flag_methods=flag_methods, |
|
default_flags=default_flags, |
|
tpu=tpu) |
|
|
|
@benchmark_wrappers.enable_runtime_flags |
|
def _run_and_report_benchmark( |
|
self, |
|
experiment_name: str, |
|
skip_steps: Optional[int] = None, |
|
top_1_min: float = MIN_TOP_1_ACCURACY, |
|
top_1_max: float = MAX_TOP_1_ACCURACY, |
|
num_gpus: int = 0, |
|
num_tpus: int = 0, |
|
distribution_strategy: str = 'mirrored', |
|
per_replica_batch_size: int = 128, |
|
epochs_between_evals: int = 1, |
|
dtype: str = 'float32', |
|
enable_xla: bool = False, |
|
run_eagerly: bool = False, |
|
gpu_thread_mode: Optional[str] = None, |
|
dataset_num_private_threads: Optional[int] = None, |
|
loss_scale: Optional[str] = None): |
|
"""Runs and reports the benchmark given the provided configuration.""" |
|
FLAGS.model_type = 'resnet' |
|
FLAGS.dataset = 'imagenet' |
|
FLAGS.mode = 'train_and_eval' |
|
FLAGS.data_dir = self.data_dir |
|
FLAGS.model_dir = self._get_model_dir(experiment_name) |
|
parameters = _get_classifier_parameters( |
|
builder=self.dataset_builder, |
|
skip_eval=True, |
|
num_gpus=num_gpus, |
|
distribution_strategy=distribution_strategy, |
|
per_replica_batch_size=per_replica_batch_size, |
|
epochs=self.train_epochs, |
|
steps=self.train_steps, |
|
epochs_between_evals=epochs_between_evals, |
|
dtype=dtype, |
|
enable_xla=enable_xla, |
|
gpu_thread_mode=gpu_thread_mode, |
|
dataset_num_private_threads=dataset_num_private_threads, |
|
loss_scale=loss_scale, |
|
report_metrics=False, |
|
batchnorm_spatial_persistent=True) |
|
FLAGS.params_override = json.dumps(parameters) |
|
if distribution_strategy == 'tpu': |
|
total_batch_size = num_tpus * per_replica_batch_size |
|
else: |
|
total_batch_size = num_gpus * per_replica_batch_size |
|
|
|
start_time_sec = time.time() |
|
stats = classifier_trainer.run(flags.FLAGS) |
|
wall_time_sec = time.time() - start_time_sec |
|
|
|
|
|
|
|
warmup = (skip_steps or (self.train_steps - 100)) // FLAGS.log_steps |
|
|
|
super(Resnet50KerasClassifierBenchmarkBase, self)._report_benchmark( |
|
stats, |
|
wall_time_sec, |
|
total_batch_size=total_batch_size, |
|
log_steps=FLAGS.log_steps, |
|
warmup=warmup, |
|
start_time_sec=start_time_sec) |
|
|
|
def benchmark_1_gpu_no_dist_strat(self): |
|
"""Tests Keras model with 1 GPU, no distribution strategy.""" |
|
self._setup() |
|
self._run_and_report_benchmark( |
|
experiment_name='benchmark_1_gpu_no_dist_strat', |
|
num_gpus=1, |
|
distribution_strategy='off', |
|
per_replica_batch_size=128) |
|
|
|
def benchmark_1_gpu_no_dist_strat_run_eagerly(self): |
|
"""Tests Keras model with 1 GPU, no distribution strategy, run eagerly.""" |
|
self._setup() |
|
self._run_and_report_benchmark( |
|
experiment_name='benchmark_1_gpu_no_dist_strat_run_eagerly', |
|
num_gpus=1, |
|
run_eagerly=True, |
|
distribution_strategy='off', |
|
per_replica_batch_size=64) |
|
|
|
def benchmark_1_gpu_no_dist_strat_run_eagerly_fp16(self): |
|
"""Tests with 1 GPU, no distribution strategy, fp16, run eagerly.""" |
|
self._setup() |
|
self._run_and_report_benchmark( |
|
experiment_name='benchmark_1_gpu_no_dist_strat_run_eagerly_fp16', |
|
num_gpus=1, |
|
run_eagerly=True, |
|
distribution_strategy='off', |
|
dtype='float16', |
|
per_replica_batch_size=128) |
|
|
|
def benchmark_1_gpu(self): |
|
"""Tests Keras model with 1 GPU.""" |
|
self._setup() |
|
self._run_and_report_benchmark( |
|
experiment_name='benchmark_1_gpu', |
|
num_gpus=1, |
|
distribution_strategy='one_device', |
|
per_replica_batch_size=128) |
|
|
|
def benchmark_xla_1_gpu(self): |
|
"""Tests Keras model with XLA and 1 GPU.""" |
|
self._setup() |
|
self._run_and_report_benchmark( |
|
experiment_name='benchmark_xla_1_gpu', |
|
num_gpus=1, |
|
enable_xla=True, |
|
distribution_strategy='one_device', |
|
per_replica_batch_size=128) |
|
|
|
def benchmark_1_gpu_fp16(self): |
|
"""Tests Keras model with 1 GPU and fp16.""" |
|
self._setup() |
|
self._run_and_report_benchmark( |
|
experiment_name='benchmark_1_gpu_fp16', |
|
num_gpus=1, |
|
distribution_strategy='one_device', |
|
dtype='float16', |
|
per_replica_batch_size=256) |
|
|
|
def benchmark_1_gpu_fp16_dynamic(self): |
|
"""Tests Keras model with 1 GPU, fp16, and dynamic loss scaling.""" |
|
self._setup() |
|
self._run_and_report_benchmark( |
|
experiment_name='benchmark_1_gpu_fp16_dynamic', |
|
num_gpus=1, |
|
distribution_strategy='one_device', |
|
dtype='float16', |
|
per_replica_batch_size=256, |
|
loss_scale='dynamic') |
|
|
|
def benchmark_xla_1_gpu_fp16(self): |
|
"""Tests Keras model with XLA, 1 GPU and fp16.""" |
|
self._setup() |
|
self._run_and_report_benchmark( |
|
experiment_name='benchmark_xla_1_gpu_fp16', |
|
num_gpus=1, |
|
enable_xla=True, |
|
distribution_strategy='one_device', |
|
dtype='float16', |
|
per_replica_batch_size=256) |
|
|
|
def benchmark_xla_1_gpu_fp16_tweaked(self): |
|
"""Tests Keras model with XLA, 1 GPU, fp16, and manual config tuning.""" |
|
self._setup() |
|
self._run_and_report_benchmark( |
|
experiment_name='benchmark_xla_1_gpu_fp16_tweaked', |
|
num_gpus=1, |
|
enable_xla=True, |
|
distribution_strategy='one_device', |
|
dtype='float16', |
|
per_replica_batch_size=256, |
|
gpu_thread_mode='gpu_private') |
|
|
|
def benchmark_xla_1_gpu_fp16_dynamic(self): |
|
"""Tests Keras model with XLA, 1 GPU, fp16, and dynamic loss scaling.""" |
|
self._setup() |
|
self._run_and_report_benchmark( |
|
experiment_name='benchmark_xla_1_gpu_fp16_dynamic', |
|
num_gpus=1, |
|
enable_xla=True, |
|
distribution_strategy='one_device', |
|
dtype='float16', |
|
per_replica_batch_size=256, |
|
loss_scale='dynamic') |
|
|
|
def benchmark_8_gpu(self): |
|
"""Tests Keras model with 8 GPUs.""" |
|
self._setup() |
|
self._run_and_report_benchmark( |
|
experiment_name='benchmark_8_gpu', |
|
num_gpus=8, |
|
distribution_strategy='mirrored', |
|
per_replica_batch_size=128) |
|
|
|
def benchmark_8_gpu_tweaked(self): |
|
"""Tests Keras model with manual config tuning and 8 GPUs.""" |
|
self._setup() |
|
self._run_and_report_benchmark( |
|
experiment_name='benchmark_8_gpu_tweaked', |
|
num_gpus=8, |
|
distribution_strategy='mirrored', |
|
per_replica_batch_size=128, |
|
dataset_num_private_threads=14) |
|
|
|
def benchmark_xla_8_gpu(self): |
|
"""Tests Keras model with XLA and 8 GPUs.""" |
|
self._setup() |
|
self._run_and_report_benchmark( |
|
experiment_name='benchmark_xla_8_gpu', |
|
num_gpus=8, |
|
enable_xla=True, |
|
distribution_strategy='mirrored', |
|
per_replica_batch_size=128) |
|
|
|
def benchmark_xla_8_gpu_tweaked(self): |
|
"""Tests Keras model with manual config tuning, 8 GPUs, and XLA.""" |
|
self._setup() |
|
self._run_and_report_benchmark( |
|
experiment_name='benchmark_xla_8_gpu_tweaked', |
|
num_gpus=8, |
|
enable_xla=True, |
|
distribution_strategy='mirrored', |
|
per_replica_batch_size=128, |
|
gpu_thread_mode='gpu_private', |
|
dataset_num_private_threads=24) |
|
|
|
def benchmark_8_gpu_fp16(self): |
|
"""Tests Keras model with 8 GPUs and fp16.""" |
|
self._setup() |
|
self._run_and_report_benchmark( |
|
experiment_name='benchmark_8_gpu_fp16', |
|
num_gpus=8, |
|
dtype='float16', |
|
distribution_strategy='mirrored', |
|
per_replica_batch_size=256) |
|
|
|
def benchmark_8_gpu_fp16_tweaked(self): |
|
"""Tests Keras model with 8 GPUs, fp16, and manual config tuning.""" |
|
self._setup() |
|
self._run_and_report_benchmark( |
|
experiment_name='benchmark_8_gpu_fp16_tweaked', |
|
num_gpus=8, |
|
dtype='float16', |
|
distribution_strategy='mirrored', |
|
per_replica_batch_size=256, |
|
gpu_thread_mode='gpu_private', |
|
dataset_num_private_threads=40) |
|
|
|
def benchmark_8_gpu_fp16_dynamic_tweaked(self): |
|
"""Tests Keras model with 8 GPUs, fp16, dynamic loss scaling, and tuned.""" |
|
self._setup() |
|
self._run_and_report_benchmark( |
|
experiment_name='benchmark_8_gpu_fp16_dynamic_tweaked', |
|
num_gpus=8, |
|
dtype='float16', |
|
distribution_strategy='mirrored', |
|
per_replica_batch_size=256, |
|
loss_scale='dynamic', |
|
gpu_thread_mode='gpu_private', |
|
dataset_num_private_threads=40) |
|
|
|
def benchmark_xla_8_gpu_fp16(self): |
|
"""Tests Keras model with XLA, 8 GPUs and fp16.""" |
|
self._setup() |
|
self._run_and_report_benchmark( |
|
experiment_name='benchmark_xla_8_gpu_fp16', |
|
dtype='float16', |
|
num_gpus=8, |
|
enable_xla=True, |
|
distribution_strategy='mirrored', |
|
per_replica_batch_size=256) |
|
|
|
def benchmark_xla_8_gpu_fp16_tweaked(self): |
|
"""Test Keras model with manual config tuning, XLA, 8 GPUs and fp16.""" |
|
self._setup() |
|
self._run_and_report_benchmark( |
|
experiment_name='benchmark_xla_8_gpu_fp16_tweaked', |
|
dtype='float16', |
|
num_gpus=8, |
|
enable_xla=True, |
|
distribution_strategy='mirrored', |
|
per_replica_batch_size=256, |
|
gpu_thread_mode='gpu_private', |
|
dataset_num_private_threads=48) |
|
|
|
def benchmark_xla_8_gpu_fp16_tweaked_delay_measure(self): |
|
"""Tests with manual config tuning, XLA, 8 GPUs and fp16. |
|
|
|
Delay performance measurement for stable performance on 96 vCPU platforms. |
|
""" |
|
self._setup() |
|
self._run_and_report_benchmark( |
|
experiment_name='benchmark_xla_8_gpu_fp16_tweaked_delay_measure', |
|
dtype='float16', |
|
num_gpus=8, |
|
enable_xla=True, |
|
distribution_strategy='mirrored', |
|
per_replica_batch_size=256, |
|
gpu_thread_mode='gpu_private', |
|
dataset_num_private_threads=48, |
|
steps=310) |
|
|
|
def benchmark_xla_8_gpu_fp16_dynamic_tweaked(self): |
|
"""Tests Keras model with config tuning, XLA, 8 GPUs and dynamic fp16.""" |
|
self._setup() |
|
self._run_and_report_benchmark( |
|
experiment_name='benchmark_xla_8_gpu_fp16_dynamic_tweaked', |
|
dtype='float16', |
|
num_gpus=8, |
|
enable_xla=True, |
|
distribution_strategy='mirrored', |
|
per_replica_batch_size=256, |
|
gpu_thread_mode='gpu_private', |
|
loss_scale='dynamic', |
|
dataset_num_private_threads=48) |
|
|
|
def benchmark_2x2_tpu_bf16(self): |
|
"""Test Keras model with 2x2 TPU, bf16.""" |
|
self._setup() |
|
self._run_and_report_benchmark( |
|
experiment_name='benchmark_2x2_tpu_bf16', |
|
dtype='bfloat16', |
|
num_tpus=8, |
|
distribution_strategy='tpu', |
|
per_replica_batch_size=128) |
|
|
|
def benchmark_4x4_tpu_bf16(self): |
|
"""Test Keras model with 4x4 TPU, bf16.""" |
|
self._setup() |
|
self._run_and_report_benchmark( |
|
experiment_name='benchmark_4x4_tpu_bf16', |
|
dtype='bfloat16', |
|
num_tpus=32, |
|
distribution_strategy='tpu', |
|
per_replica_batch_size=128) |
|
|
|
def benchmark_8x8_tpu_bf16(self): |
|
"""Test Keras model with 8x8 TPU, bf16.""" |
|
self._setup() |
|
self._run_and_report_benchmark( |
|
experiment_name='benchmark_8x8_tpu_bf16', |
|
dtype='bfloat16', |
|
num_tpus=128, |
|
distribution_strategy='tpu', |
|
per_replica_batch_size=64) |
|
|
|
def fill_report_object(self, stats): |
|
super(Resnet50KerasClassifierBenchmarkBase, self).fill_report_object( |
|
stats, |
|
total_batch_size=FLAGS.batch_size, |
|
log_steps=FLAGS.log_steps) |
|
|
|
|
|
class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): |
|
"""Resnet50 benchmarks.""" |
|
|
|
def __init__(self, output_dir=None, default_flags=None, tpu=None): |
|
flag_methods = [resnet_imagenet_main.define_imagenet_keras_flags] |
|
|
|
super(Resnet50KerasBenchmarkBase, self).__init__( |
|
output_dir=output_dir, |
|
flag_methods=flag_methods, |
|
default_flags=default_flags, |
|
tpu=tpu) |
|
|
|
@benchmark_wrappers.enable_runtime_flags |
|
def _run_and_report_benchmark(self, skip_steps=None): |
|
start_time_sec = time.time() |
|
stats = resnet_imagenet_main.run(FLAGS) |
|
wall_time_sec = time.time() - start_time_sec |
|
|
|
|
|
|
|
warmup = (skip_steps or (FLAGS.train_steps - 100)) // FLAGS.log_steps |
|
|
|
super(Resnet50KerasBenchmarkBase, self)._report_benchmark( |
|
stats, |
|
wall_time_sec, |
|
total_batch_size=FLAGS.batch_size, |
|
log_steps=FLAGS.log_steps, |
|
warmup=warmup, |
|
start_time_sec=start_time_sec) |
|
|
|
def benchmark_1_gpu_no_dist_strat(self): |
|
"""Test Keras model with 1 GPU, no distribution strategy.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 1 |
|
FLAGS.enable_eager = True |
|
FLAGS.distribution_strategy = 'off' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_dist_strat') |
|
FLAGS.batch_size = 128 |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_1_gpu_no_dist_strat_run_eagerly(self): |
|
"""Test Keras model with 1 GPU, no distribution strategy, run eagerly.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 1 |
|
FLAGS.enable_eager = True |
|
FLAGS.run_eagerly = True |
|
FLAGS.distribution_strategy = 'off' |
|
FLAGS.model_dir = self._get_model_dir( |
|
'benchmark_1_gpu_no_dist_strat_run_eagerly') |
|
FLAGS.batch_size = 64 |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_1_gpu_no_dist_strat_run_eagerly_tweaked(self): |
|
"""Test Keras model with 1 GPU, no distribution strategy, run eagerly.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 1 |
|
FLAGS.enable_eager = True |
|
FLAGS.run_eagerly = True |
|
FLAGS.explicit_gpu_placement = True |
|
FLAGS.distribution_strategy = 'off' |
|
FLAGS.model_dir = self._get_model_dir( |
|
'benchmark_1_gpu_no_dist_strat_run_eagerly_tweaked') |
|
FLAGS.batch_size = 64 |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_1_gpu_no_dist_strat_run_eagerly_fp16(self): |
|
"""Test with 1 GPU, no distribution strategy, fp16, run eagerly.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 1 |
|
FLAGS.enable_eager = True |
|
FLAGS.run_eagerly = True |
|
FLAGS.distribution_strategy = 'off' |
|
FLAGS.model_dir = self._get_model_dir( |
|
'benchmark_1_gpu_no_dist_strat_run_eagerly_fp16') |
|
FLAGS.dtype = 'fp16' |
|
FLAGS.batch_size = 128 |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_1_gpu_no_dist_strat_run_eagerly_fp16_tweaked(self): |
|
"""Test with 1 GPU, no distribution strategy, fp16, run eagerly.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 1 |
|
FLAGS.enable_eager = True |
|
FLAGS.run_eagerly = True |
|
FLAGS.explicit_gpu_placement = True |
|
FLAGS.distribution_strategy = 'off' |
|
FLAGS.model_dir = self._get_model_dir( |
|
'benchmark_1_gpu_no_dist_strat_run_eagerly_fp16_tweaked') |
|
FLAGS.dtype = 'fp16' |
|
FLAGS.batch_size = 128 |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_1_gpu(self): |
|
"""Test Keras model with 1 GPU.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 1 |
|
FLAGS.enable_eager = True |
|
FLAGS.distribution_strategy = 'one_device' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu') |
|
FLAGS.batch_size = 128 |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_1_gpu_amp(self): |
|
"""Test Keras model with 1 GPU with automatic mixed precision.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 1 |
|
FLAGS.enable_eager = True |
|
FLAGS.dtype = 'fp16' |
|
FLAGS.fp16_implementation = 'graph_rewrite' |
|
FLAGS.distribution_strategy = 'one_device' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_amp') |
|
FLAGS.batch_size = 256 |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_xla_1_gpu(self): |
|
"""Test Keras model with XLA and 1 GPU.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 1 |
|
FLAGS.enable_eager = True |
|
FLAGS.enable_xla = True |
|
FLAGS.distribution_strategy = 'one_device' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu') |
|
FLAGS.batch_size = 128 |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_xla_1_gpu_amp(self): |
|
"""Test Keras model with XLA and 1 GPU with automatic mixed precision.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 1 |
|
FLAGS.enable_eager = True |
|
FLAGS.dtype = 'fp16' |
|
FLAGS.fp16_implementation = 'graph_rewrite' |
|
FLAGS.enable_xla = True |
|
FLAGS.distribution_strategy = 'one_device' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_amp') |
|
FLAGS.batch_size = 256 |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_1_gpu_fp16(self): |
|
"""Test Keras model with 1 GPU and fp16.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 1 |
|
FLAGS.enable_eager = True |
|
FLAGS.distribution_strategy = 'one_device' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_fp16') |
|
FLAGS.dtype = 'fp16' |
|
FLAGS.batch_size = 256 |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_1_gpu_fp16_dynamic(self): |
|
"""Test Keras model with 1 GPU, fp16, and dynamic loss scaling.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 1 |
|
FLAGS.enable_eager = True |
|
FLAGS.distribution_strategy = 'one_device' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_fp16_dynamic') |
|
FLAGS.dtype = 'fp16' |
|
FLAGS.batch_size = 256 |
|
FLAGS.loss_scale = 'dynamic' |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_xla_1_gpu_fp16(self): |
|
"""Test Keras model with XLA, 1 GPU and fp16.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 1 |
|
FLAGS.enable_eager = True |
|
FLAGS.enable_xla = True |
|
FLAGS.distribution_strategy = 'one_device' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_fp16') |
|
FLAGS.dtype = 'fp16' |
|
FLAGS.batch_size = 256 |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_xla_1_gpu_fp16_tweaked(self): |
|
"""Test Keras model with XLA, 1 GPU, fp16, and manual config tuning.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 1 |
|
FLAGS.enable_eager = True |
|
FLAGS.enable_xla = True |
|
FLAGS.distribution_strategy = 'one_device' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_fp16_tweaked') |
|
FLAGS.dtype = 'fp16' |
|
FLAGS.batch_size = 256 |
|
FLAGS.tf_gpu_thread_mode = 'gpu_private' |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_xla_1_gpu_fp16_dynamic(self): |
|
"""Test Keras model with XLA, 1 GPU, fp16, and dynamic loss scaling.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 1 |
|
FLAGS.enable_eager = True |
|
FLAGS.enable_xla = True |
|
FLAGS.distribution_strategy = 'one_device' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_fp16_dynamic') |
|
FLAGS.dtype = 'fp16' |
|
FLAGS.batch_size = 256 |
|
FLAGS.loss_scale = 'dynamic' |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_8_gpu(self): |
|
"""Test Keras model with 8 GPUs.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 8 |
|
FLAGS.enable_eager = True |
|
FLAGS.distribution_strategy = 'mirrored' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu') |
|
FLAGS.batch_size = 128 * 8 |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_8_gpu_amp(self): |
|
"""Test Keras model with 8 GPUs with automatic mixed precision.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 8 |
|
FLAGS.enable_eager = True |
|
FLAGS.dtype = 'fp16' |
|
FLAGS.fp16_implementation = 'graph_rewrite' |
|
FLAGS.distribution_strategy = 'mirrored' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_amp') |
|
FLAGS.batch_size = 256 * 8 |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_8_gpu_tweaked(self): |
|
"""Test Keras model with manual config tuning and 8 GPUs.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 8 |
|
FLAGS.enable_eager = True |
|
FLAGS.distribution_strategy = 'mirrored' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_tweaked') |
|
FLAGS.batch_size = 128 * 8 |
|
FLAGS.datasets_num_private_threads = 14 |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_xla_8_gpu(self): |
|
"""Test Keras model with XLA and 8 GPUs.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 8 |
|
FLAGS.enable_eager = True |
|
FLAGS.enable_xla = True |
|
FLAGS.distribution_strategy = 'mirrored' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu') |
|
FLAGS.batch_size = 128 * 8 |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_xla_8_gpu_amp(self): |
|
"""Test Keras model with XLA and 8 GPUs with automatic mixed precision.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 8 |
|
FLAGS.enable_eager = True |
|
FLAGS.dtype = 'fp16' |
|
FLAGS.fp16_implementation = 'graph_rewrite' |
|
FLAGS.enable_xla = True |
|
FLAGS.distribution_strategy = 'mirrored' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_amp') |
|
FLAGS.batch_size = 256 * 8 |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_xla_8_gpu_tweaked(self): |
|
"""Test Keras model with manual config tuning, 8 GPUs, and XLA.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 8 |
|
FLAGS.enable_eager = True |
|
FLAGS.enable_xla = True |
|
FLAGS.distribution_strategy = 'mirrored' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_tweaked') |
|
FLAGS.batch_size = 128 * 8 |
|
FLAGS.tf_gpu_thread_mode = 'gpu_private' |
|
FLAGS.datasets_num_private_threads = 24 |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_8_gpu_fp16(self): |
|
"""Test Keras model with 8 GPUs and fp16.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 8 |
|
FLAGS.dtype = 'fp16' |
|
FLAGS.enable_eager = True |
|
FLAGS.distribution_strategy = 'mirrored' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16') |
|
FLAGS.batch_size = 256 * 8 |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_8_gpu_fp16_tweaked(self): |
|
"""Test Keras model with 8 GPUs, fp16, and manual config tuning.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 8 |
|
FLAGS.dtype = 'fp16' |
|
FLAGS.enable_eager = True |
|
FLAGS.distribution_strategy = 'mirrored' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16_tweaked') |
|
FLAGS.batch_size = 256 * 8 |
|
FLAGS.tf_gpu_thread_mode = 'gpu_private' |
|
FLAGS.dataset_num_private_threads = 40 |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_8_gpu_fp16_dynamic_tweaked(self): |
|
"""Test Keras model with 8 GPUs, fp16, dynamic loss scaling, and tuned.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 8 |
|
FLAGS.dtype = 'fp16' |
|
FLAGS.enable_eager = True |
|
FLAGS.distribution_strategy = 'mirrored' |
|
FLAGS.model_dir = self._get_model_dir( |
|
'benchmark_8_gpu_fp16_dynamic_tweaked') |
|
FLAGS.batch_size = 256 * 8 |
|
FLAGS.loss_scale = 'dynamic' |
|
FLAGS.tf_gpu_thread_mode = 'gpu_private' |
|
FLAGS.dataset_num_private_threads = 40 |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_xla_8_gpu_fp16(self): |
|
"""Test Keras model with XLA, 8 GPUs and fp16.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 8 |
|
FLAGS.dtype = 'fp16' |
|
FLAGS.enable_eager = True |
|
FLAGS.enable_xla = True |
|
FLAGS.distribution_strategy = 'mirrored' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16') |
|
FLAGS.batch_size = 256 * 8 |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_xla_8_gpu_fp16_tweaked(self): |
|
"""Test Keras model with manual config tuning, XLA, 8 GPUs and fp16.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 8 |
|
FLAGS.dtype = 'fp16' |
|
FLAGS.enable_eager = True |
|
FLAGS.enable_xla = True |
|
FLAGS.distribution_strategy = 'mirrored' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16_tweaked') |
|
FLAGS.batch_size = 256 * 8 |
|
FLAGS.tf_gpu_thread_mode = 'gpu_private' |
|
FLAGS.datasets_num_private_threads = 48 |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_xla_8_gpu_fp16_tweaked_delay_measure(self): |
|
"""Test with manual config tuning, XLA, 8 GPUs and fp16. |
|
|
|
Delay performance measurement for stable performance on 96 vCPU platforms. |
|
""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 8 |
|
FLAGS.dtype = 'fp16' |
|
FLAGS.enable_eager = True |
|
FLAGS.enable_xla = True |
|
FLAGS.distribution_strategy = 'mirrored' |
|
FLAGS.model_dir = self._get_model_dir( |
|
'benchmark_xla_8_gpu_fp16_tweaked_delay_measure') |
|
FLAGS.batch_size = 256 * 8 |
|
FLAGS.tf_gpu_thread_mode = 'gpu_private' |
|
FLAGS.datasets_num_private_threads = 48 |
|
FLAGS.train_steps = 310 |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_xla_8_gpu_fp16_dynamic_tweaked(self): |
|
"""Test Keras model with config tuning, XLA, 8 GPUs and dynamic fp16.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 8 |
|
FLAGS.dtype = 'fp16' |
|
FLAGS.enable_eager = True |
|
FLAGS.enable_xla = True |
|
FLAGS.distribution_strategy = 'mirrored' |
|
FLAGS.model_dir = self._get_model_dir( |
|
'benchmark_xla_8_gpu_fp16_dynamic_tweaked') |
|
FLAGS.batch_size = 256 * 8 |
|
FLAGS.loss_scale = 'dynamic' |
|
FLAGS.tf_gpu_thread_mode = 'gpu_private' |
|
FLAGS.datasets_num_private_threads = 48 |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_2x2_tpu_bf16(self): |
|
"""Test Keras model with 2x2 TPU, bf16.""" |
|
self._setup() |
|
|
|
FLAGS.dtype = 'bf16' |
|
FLAGS.distribution_strategy = 'tpu' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_bf16') |
|
FLAGS.batch_size = 1024 |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_4x4_tpu_bf16(self): |
|
"""Test Keras model with 4x4 TPU, bf16.""" |
|
self._setup() |
|
|
|
FLAGS.dtype = 'bf16' |
|
FLAGS.distribution_strategy = 'tpu' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_4x4_tpu_bf16') |
|
FLAGS.batch_size = 4096 |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_8x8_tpu_bf16(self): |
|
"""Test Keras model with 8x8 TPU, bf16.""" |
|
self._setup() |
|
|
|
FLAGS.dtype = 'bf16' |
|
FLAGS.distribution_strategy = 'tpu' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_8x8_tpu_bf16') |
|
FLAGS.batch_size = 8192 |
|
self._run_and_report_benchmark() |
|
|
|
def fill_report_object(self, stats): |
|
super(Resnet50KerasBenchmarkBase, self).fill_report_object( |
|
stats, |
|
total_batch_size=FLAGS.batch_size, |
|
log_steps=FLAGS.log_steps) |
|
|
|
|
|
class Resnet50KerasBenchmarkSynth(Resnet50KerasClassifierBenchmarkBase): |
|
"""Resnet50 synthetic benchmark tests.""" |
|
|
|
def __init__(self, output_dir=None, root_data_dir=None, tpu=None, **kwargs): |
|
def_flags = {} |
|
def_flags['log_steps'] = 10 |
|
|
|
super(Resnet50KerasBenchmarkSynth, self).__init__( |
|
output_dir=output_dir, default_flags=def_flags, tpu=tpu, |
|
dataset_builder='synthetic', train_epochs=1, train_steps=110) |
|
|
|
|
|
class Resnet50KerasBenchmarkReal(Resnet50KerasClassifierBenchmarkBase): |
|
"""Resnet50 real data benchmark tests.""" |
|
|
|
def __init__(self, output_dir=None, root_data_dir=None, tpu=None, **kwargs): |
|
data_dir = os.path.join(root_data_dir, 'imagenet') |
|
def_flags = {} |
|
def_flags['log_steps'] = 10 |
|
|
|
super(Resnet50KerasBenchmarkReal, self).__init__( |
|
output_dir=output_dir, default_flags=def_flags, tpu=tpu, |
|
dataset_builder='records', train_epochs=1, train_steps=110, |
|
data_dir=data_dir) |
|
|
|
|
|
class Resnet50KerasBenchmarkRemoteData(Resnet50KerasBenchmarkBase): |
|
"""Resnet50 real data (stored in remote storage) benchmark tests.""" |
|
|
|
def __init__(self, output_dir=None, root_data_dir=None, **kwargs): |
|
def_flags = {} |
|
def_flags['skip_eval'] = True |
|
def_flags['report_accuracy_metrics'] = False |
|
def_flags['data_dir'] = os.path.join(root_data_dir, 'imagenet') |
|
|
|
def_flags['train_epochs'] = 2 |
|
|
|
def_flags['training_dataset_cache'] = True |
|
def_flags['log_steps'] = 100 |
|
|
|
|
|
|
|
|
|
|
|
super(Resnet50KerasBenchmarkRemoteData, self).__init__( |
|
output_dir=output_dir, default_flags=def_flags) |
|
|
|
def _override_flags_to_run_test_shorter(self): |
|
FLAGS.train_epochs = 1 |
|
FLAGS.train_steps = 300 |
|
FLAGS.log_steps = 10 |
|
|
|
def benchmark_1_gpu_no_dist_strat(self): |
|
"""Test Keras model with 1 GPU, no distribution strategy.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 1 |
|
FLAGS.enable_eager = True |
|
FLAGS.distribution_strategy = 'off' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_dist_strat') |
|
FLAGS.batch_size = 128 |
|
self._override_flags_to_run_test_shorter() |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_1_gpu_no_dist_strat_run_eagerly(self): |
|
"""Test Keras model with 1 GPU, no distribution strategy, run eagerly.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 1 |
|
FLAGS.enable_eager = True |
|
FLAGS.run_eagerly = True |
|
FLAGS.distribution_strategy = 'off' |
|
FLAGS.model_dir = self._get_model_dir( |
|
'benchmark_1_gpu_no_dist_strat_run_eagerly') |
|
FLAGS.batch_size = 64 |
|
self._override_flags_to_run_test_shorter() |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_1_gpu_no_dist_strat_run_eagerly_tweaked(self): |
|
"""Test Keras model with 1 GPU, no distribution strategy, run eagerly.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 1 |
|
FLAGS.enable_eager = True |
|
FLAGS.run_eagerly = True |
|
FLAGS.explicit_gpu_placement = True |
|
FLAGS.distribution_strategy = 'off' |
|
FLAGS.model_dir = self._get_model_dir( |
|
'benchmark_1_gpu_no_dist_strat_run_eagerly_tweaked') |
|
FLAGS.batch_size = 64 |
|
self._override_flags_to_run_test_shorter() |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_1_gpu_no_dist_strat_run_eagerly_fp16(self): |
|
"""Test with 1 GPU, no distribution strategy, fp16, run eagerly.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 1 |
|
FLAGS.enable_eager = True |
|
FLAGS.run_eagerly = True |
|
FLAGS.distribution_strategy = 'off' |
|
FLAGS.model_dir = self._get_model_dir( |
|
'benchmark_1_gpu_no_dist_strat_run_eagerly_fp16') |
|
FLAGS.dtype = 'fp16' |
|
FLAGS.batch_size = 128 |
|
self._override_flags_to_run_test_shorter() |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_1_gpu_no_dist_strat_run_eagerly_fp16_tweaked(self): |
|
"""Test with 1 GPU, no distribution strategy, fp16, run eagerly.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 1 |
|
FLAGS.enable_eager = True |
|
FLAGS.run_eagerly = True |
|
FLAGS.explicit_gpu_placement = True |
|
FLAGS.distribution_strategy = 'off' |
|
FLAGS.model_dir = self._get_model_dir( |
|
'benchmark_1_gpu_no_dist_strat_run_eagerly_fp16_tweaked') |
|
FLAGS.dtype = 'fp16' |
|
FLAGS.batch_size = 128 |
|
self._override_flags_to_run_test_shorter() |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_1_gpu(self): |
|
"""Test Keras model with 1 GPU.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 1 |
|
FLAGS.enable_eager = True |
|
FLAGS.distribution_strategy = 'one_device' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu') |
|
FLAGS.batch_size = 128 |
|
self._override_flags_to_run_test_shorter() |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_1_gpu_amp(self): |
|
"""Test Keras model with 1 GPU with automatic mixed precision.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 1 |
|
FLAGS.enable_eager = True |
|
FLAGS.dtype = 'fp16' |
|
FLAGS.fp16_implementation = 'graph_rewrite' |
|
FLAGS.distribution_strategy = 'one_device' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_amp') |
|
FLAGS.batch_size = 256 |
|
self._override_flags_to_run_test_shorter() |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_xla_1_gpu(self): |
|
"""Test Keras model with XLA and 1 GPU.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 1 |
|
FLAGS.enable_eager = True |
|
FLAGS.enable_xla = True |
|
FLAGS.distribution_strategy = 'one_device' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu') |
|
FLAGS.batch_size = 128 |
|
self._override_flags_to_run_test_shorter() |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_xla_1_gpu_amp(self): |
|
"""Test Keras model with XLA and 1 GPU with automatic mixed precision.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 1 |
|
FLAGS.enable_eager = True |
|
FLAGS.dtype = 'fp16' |
|
FLAGS.fp16_implementation = 'graph_rewrite' |
|
FLAGS.enable_xla = True |
|
FLAGS.distribution_strategy = 'one_device' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_amp') |
|
FLAGS.batch_size = 256 |
|
self._override_flags_to_run_test_shorter() |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_1_gpu_fp16(self): |
|
"""Test Keras model with 1 GPU and fp16.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 1 |
|
FLAGS.enable_eager = True |
|
FLAGS.distribution_strategy = 'one_device' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_fp16') |
|
FLAGS.dtype = 'fp16' |
|
FLAGS.batch_size = 256 |
|
self._override_flags_to_run_test_shorter() |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_1_gpu_fp16_dynamic(self): |
|
"""Test Keras model with 1 GPU, fp16, and dynamic loss scaling.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 1 |
|
FLAGS.enable_eager = True |
|
FLAGS.distribution_strategy = 'one_device' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_fp16_dynamic') |
|
FLAGS.dtype = 'fp16' |
|
FLAGS.batch_size = 256 |
|
FLAGS.loss_scale = 'dynamic' |
|
self._override_flags_to_run_test_shorter() |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_xla_1_gpu_fp16(self): |
|
"""Test Keras model with XLA, 1 GPU and fp16.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 1 |
|
FLAGS.enable_eager = True |
|
FLAGS.enable_xla = True |
|
FLAGS.distribution_strategy = 'one_device' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_fp16') |
|
FLAGS.dtype = 'fp16' |
|
FLAGS.batch_size = 256 |
|
self._override_flags_to_run_test_shorter() |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_xla_1_gpu_fp16_tweaked(self): |
|
"""Test Keras model with XLA, 1 GPU, fp16, and manual config tuning.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 1 |
|
FLAGS.enable_eager = True |
|
FLAGS.enable_xla = True |
|
FLAGS.distribution_strategy = 'one_device' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_fp16_tweaked') |
|
FLAGS.dtype = 'fp16' |
|
FLAGS.batch_size = 256 |
|
FLAGS.tf_gpu_thread_mode = 'gpu_private' |
|
self._override_flags_to_run_test_shorter() |
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_xla_1_gpu_fp16_dynamic(self): |
|
"""Test Keras model with XLA, 1 GPU, fp16, and dynamic loss scaling.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 1 |
|
FLAGS.enable_eager = True |
|
FLAGS.enable_xla = True |
|
FLAGS.distribution_strategy = 'one_device' |
|
FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_fp16_dynamic') |
|
FLAGS.dtype = 'fp16' |
|
FLAGS.batch_size = 256 |
|
FLAGS.loss_scale = 'dynamic' |
|
self._override_flags_to_run_test_shorter() |
|
self._run_and_report_benchmark() |
|
|
|
@benchmark_wrappers.enable_runtime_flags |
|
def _run_and_report_benchmark(self): |
|
if FLAGS.num_gpus == 1 or FLAGS.run_eagerly: |
|
|
|
|
|
|
|
skip_steps = None |
|
else: |
|
|
|
skip_steps = 600 |
|
super(Resnet50KerasBenchmarkRemoteData, |
|
self)._run_and_report_benchmark(skip_steps=skip_steps) |
|
|
|
|
|
class TrivialKerasBenchmarkReal(keras_benchmark.KerasBenchmark): |
|
"""Trivial model with real data benchmark tests.""" |
|
|
|
def __init__(self, output_dir=None, root_data_dir=None, **kwargs): |
|
flag_methods = [resnet_imagenet_main.define_imagenet_keras_flags] |
|
|
|
def_flags = {} |
|
def_flags['use_trivial_model'] = True |
|
def_flags['skip_eval'] = True |
|
def_flags['report_accuracy_metrics'] = False |
|
def_flags['dtype'] = 'fp16' |
|
def_flags['data_dir'] = os.path.join(root_data_dir, 'imagenet') |
|
def_flags['train_steps'] = 600 |
|
def_flags['log_steps'] = 100 |
|
def_flags['distribution_strategy'] = 'mirrored' |
|
|
|
super(TrivialKerasBenchmarkReal, self).__init__( |
|
output_dir=output_dir, |
|
flag_methods=flag_methods, |
|
default_flags=def_flags) |
|
|
|
@benchmark_wrappers.enable_runtime_flags |
|
def _run_and_report_benchmark(self): |
|
start_time_sec = time.time() |
|
stats = resnet_imagenet_main.run(FLAGS) |
|
wall_time_sec = time.time() - start_time_sec |
|
|
|
super(TrivialKerasBenchmarkReal, self)._report_benchmark( |
|
stats, |
|
wall_time_sec, |
|
total_batch_size=FLAGS.batch_size, |
|
log_steps=FLAGS.log_steps) |
|
|
|
def benchmark_8_gpu_warmup(self): |
|
"""Dummy test that runs over an epoch to warmup the machine.""" |
|
self._setup() |
|
|
|
FLAGS.num_gpus = 8 |
|
FLAGS.enable_eager = True |
|
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_warmup') |
|
FLAGS.batch_size = 256 * 8 |
|
FLAGS.train_steps = 700 |
|
self._run_and_report_benchmark() |
|
|
|
def fill_report_object(self, stats): |
|
super(TrivialKerasBenchmarkReal, self).fill_report_object( |
|
stats, |
|
total_batch_size=FLAGS.batch_size, |
|
log_steps=FLAGS.log_steps) |
|
|
|
|
|
class Resnet50MultiWorkerKerasAccuracy(keras_benchmark.KerasBenchmark): |
|
"""Resnet50 distributed accuracy tests with multiple workers.""" |
|
|
|
def __init__(self, output_dir=None, root_data_dir=None, **kwargs): |
|
flag_methods = [classifier_trainer.define_imagenet_keras_flags] |
|
self.data_dir = os.path.join(root_data_dir, 'imagenet') |
|
super(Resnet50MultiWorkerKerasAccuracy, self).__init__( |
|
output_dir=output_dir, flag_methods=flag_methods) |
|
|
|
def _benchmark_common(self, eager, num_workers, all_reduce_alg): |
|
"""Common to all benchmarks in this class.""" |
|
self._setup() |
|
|
|
num_gpus = 8 |
|
FLAGS.num_gpus = num_gpus |
|
FLAGS.data_dir = self.data_dir |
|
FLAGS.train_epochs = 90 |
|
FLAGS.epochs_between_evals = 10 |
|
FLAGS.dtype = 'fp16' |
|
FLAGS.enable_eager = eager |
|
FLAGS.enable_xla = False |
|
FLAGS.distribution_strategy = 'multi_worker_mirrored' |
|
FLAGS.tf_gpu_thread_mode = 'gpu_private' |
|
FLAGS.datasets_num_private_threads = 32 |
|
FLAGS.model_dir = self._get_model_dir( |
|
'benchmark_{}_8_gpu_{}_worker_fp16_{}_tweaked'.format( |
|
'eager' if eager else 'graph', num_workers, all_reduce_alg)) |
|
FLAGS.batch_size = 256 * num_gpus * num_workers |
|
FLAGS.all_reduce_alg = all_reduce_alg |
|
|
|
self._run_and_report_benchmark() |
|
|
|
@benchmark_wrappers.enable_runtime_flags |
|
def _run_and_report_benchmark(self, |
|
top_1_min=MIN_TOP_1_ACCURACY, |
|
top_1_max=MAX_TOP_1_ACCURACY): |
|
start_time_sec = time.time() |
|
stats = classifier_trainer.run(flags.FLAGS) |
|
wall_time_sec = time.time() - start_time_sec |
|
|
|
super(Resnet50MultiWorkerKerasAccuracy, self)._report_benchmark( |
|
stats, |
|
wall_time_sec, |
|
top_1_min=top_1_min, |
|
top_1_max=top_1_max, |
|
total_batch_size=FLAGS.batch_size, |
|
log_steps=100) |
|
|
|
def _get_model_dir(self, folder_name): |
|
return os.path.join(self.output_dir, folder_name) |
|
|
|
def benchmark_eager_8_gpu_2_workers_fp16_ring_tweaked(self): |
|
"""Eager, 8 GPUs per worker, 2 workers, fp16, ring all-reduce.""" |
|
self._benchmark_common(eager=True, num_workers=2, all_reduce_alg='ring') |
|
|
|
def benchmark_eager_8_gpu_2_workers_fp16_nccl_tweaked(self): |
|
"""Eager, 8 GPUs per worker, 2 workers, fp16, nccl all-reduce.""" |
|
self._benchmark_common(eager=True, num_workers=2, all_reduce_alg='nccl') |
|
|
|
def benchmark_eager_8_gpu_8_workers_fp16_ring_tweaked(self): |
|
"""Eager, 8 GPUs per worker, 8 workers, fp16, ring all-reduce.""" |
|
self._benchmark_common(eager=True, num_workers=8, all_reduce_alg='ring') |
|
|
|
def benchmark_eager_8_gpu_8_workers_fp16_nccl_tweaked(self): |
|
"""Eager, 8 GPUs per worker, 8 workers, fp16, nccl all-reduce.""" |
|
self._benchmark_common(eager=True, num_workers=8, all_reduce_alg='nccl') |
|
|
|
|
|
class Resnet50MultiWorkerKerasBenchmark(Resnet50KerasBenchmarkBase): |
|
"""Resnet50 distributed benchmark tests with multiple workers.""" |
|
|
|
def __init__(self, output_dir=None, default_flags=None): |
|
super(Resnet50MultiWorkerKerasBenchmark, self).__init__( |
|
output_dir=output_dir, default_flags=default_flags) |
|
|
|
def _benchmark_common(self, eager, num_workers, all_reduce_alg): |
|
"""Common to all benchmarks in this class.""" |
|
self._setup() |
|
|
|
num_gpus = 8 |
|
FLAGS.num_gpus = num_gpus |
|
FLAGS.dtype = 'fp16' |
|
FLAGS.enable_eager = eager |
|
FLAGS.enable_xla = False |
|
FLAGS.distribution_strategy = 'multi_worker_mirrored' |
|
FLAGS.tf_gpu_thread_mode = 'gpu_private' |
|
FLAGS.datasets_num_private_threads = 32 |
|
FLAGS.model_dir = self._get_model_dir( |
|
'benchmark_{}_8_gpu_{}_worker_fp16_{}_tweaked'.format( |
|
'eager' if eager else 'graph', num_workers, all_reduce_alg)) |
|
FLAGS.batch_size = 256 * num_gpus * num_workers |
|
FLAGS.all_reduce_alg = all_reduce_alg |
|
|
|
self._run_and_report_benchmark() |
|
|
|
def benchmark_eager_8_gpu_1_worker_fp16_ring_tweaked(self): |
|
"""Eager, 8 GPUs per worker, 1 worker, fp16, ring all-reduce.""" |
|
self._benchmark_common(eager=True, num_workers=1, all_reduce_alg='ring') |
|
|
|
def benchmark_eager_8_gpu_1_worker_fp16_nccl_tweaked(self): |
|
"""Eager, 8 GPUs per worker, 1 worker, fp16, nccl all-reduce.""" |
|
self._benchmark_common(eager=True, num_workers=1, all_reduce_alg='nccl') |
|
|
|
def benchmark_eager_8_gpu_2_workers_fp16_ring_tweaked(self): |
|
"""Eager, 8 GPUs per worker, 2 workers, fp16, ring all-reduce.""" |
|
self._benchmark_common(eager=True, num_workers=2, all_reduce_alg='ring') |
|
|
|
def benchmark_eager_8_gpu_2_workers_fp16_nccl_tweaked(self): |
|
"""Eager, 8 GPUs per worker, 2 workers, fp16, nccl all-reduce.""" |
|
self._benchmark_common(eager=True, num_workers=2, all_reduce_alg='nccl') |
|
|
|
def benchmark_eager_8_gpu_8_workers_fp16_ring_tweaked(self): |
|
"""Eager, 8 GPUs per worker, 8 workers, fp16, ring all-reduce.""" |
|
self._benchmark_common(eager=True, num_workers=8, all_reduce_alg='ring') |
|
|
|
def benchmark_eager_8_gpu_8_workers_fp16_nccl_tweaked(self): |
|
"""Eager, 8 GPUs per worker, 8 workers, fp16, nccl all-reduce.""" |
|
self._benchmark_common(eager=True, num_workers=8, all_reduce_alg='nccl') |
|
|
|
|
|
class Resnet50MultiWorkerKerasBenchmarkSynth(Resnet50MultiWorkerKerasBenchmark): |
|
"""Resnet50 multi-worker synthetic data benchmark tests.""" |
|
|
|
def __init__(self, output_dir=None, root_data_dir=None, **kwargs): |
|
def_flags = {} |
|
def_flags['skip_eval'] = True |
|
def_flags['report_accuracy_metrics'] = False |
|
def_flags['use_synthetic_data'] = True |
|
def_flags['train_steps'] = 110 |
|
def_flags['log_steps'] = 10 |
|
|
|
super(Resnet50MultiWorkerKerasBenchmarkSynth, self).__init__( |
|
output_dir=output_dir, default_flags=def_flags) |
|
|
|
|
|
class Resnet50MultiWorkerKerasBenchmarkReal(Resnet50MultiWorkerKerasBenchmark): |
|
"""Resnet50 multi-worker real data benchmark tests.""" |
|
|
|
def __init__(self, output_dir=None, root_data_dir=None, **kwargs): |
|
def_flags = {} |
|
def_flags['skip_eval'] = True |
|
def_flags['report_accuracy_metrics'] = False |
|
def_flags['data_dir'] = os.path.join(root_data_dir, 'imagenet') |
|
def_flags['train_steps'] = 110 |
|
def_flags['log_steps'] = 10 |
|
|
|
super(Resnet50MultiWorkerKerasBenchmarkReal, self).__init__( |
|
output_dir=output_dir, default_flags=def_flags) |
|
|
|
|
|
|
|
|
|
|
|
class KerasPruningAccuracyBase(keras_benchmark.KerasBenchmark): |
|
"""Benchmark accuracy tests for pruning method.""" |
|
|
|
def __init__(self, |
|
output_dir=None, |
|
root_data_dir=None, |
|
default_flags=None, |
|
**kwargs): |
|
"""A accuracy benchmark class for pruning method. |
|
|
|
Args: |
|
output_dir: directory where to output e.g. log files |
|
root_data_dir: directory under which to look for dataset |
|
default_flags: default flags |
|
**kwargs: arbitrary named arguments. This is needed to make the |
|
constructor forward compatible in case PerfZero provides more |
|
named arguments before updating the constructor. |
|
""" |
|
if default_flags is None: |
|
default_flags = {} |
|
default_flags['pruning_method'] = 'polynomial_decay' |
|
default_flags['data_dir'] = os.path.join(root_data_dir, 'imagenet') |
|
|
|
flag_methods = [resnet_imagenet_main.define_imagenet_keras_flags] |
|
|
|
super(KerasPruningAccuracyBase, self).__init__( |
|
output_dir=output_dir, |
|
flag_methods=flag_methods, |
|
default_flags=default_flags, |
|
**kwargs) |
|
|
|
def benchmark_8_gpu(self): |
|
"""Test Keras model with eager, dist_strat and 8 GPUs.""" |
|
self._setup() |
|
FLAGS.num_gpus = 8 |
|
FLAGS.batch_size = 32 * 8 |
|
FLAGS.train_epochs = 90 |
|
FLAGS.epochs_between_evals = 10 |
|
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu') |
|
FLAGS.dtype = 'fp32' |
|
FLAGS.enable_eager = True |
|
self._run_and_report_benchmark() |
|
|
|
@benchmark_wrappers.enable_runtime_flags |
|
def _run_and_report_benchmark(self, |
|
top_1_min=MODEL_OPTIMIZATION_TOP_1_ACCURACY[ |
|
'RESNET50_FINETUNE_PRUNING'][0], |
|
top_1_max=MODEL_OPTIMIZATION_TOP_1_ACCURACY[ |
|
'RESNET50_FINETUNE_PRUNING'][1]): |
|
start_time_sec = time.time() |
|
stats = resnet_imagenet_main.run(flags.FLAGS) |
|
wall_time_sec = time.time() - start_time_sec |
|
|
|
super(KerasPruningAccuracyBase, self)._report_benchmark( |
|
stats, |
|
wall_time_sec, |
|
top_1_min=top_1_min, |
|
top_1_max=top_1_max, |
|
total_batch_size=FLAGS.batch_size, |
|
log_steps=100) |
|
|
|
|
|
class MobilenetV1KerasPruningAccuracy(KerasPruningAccuracyBase): |
|
"""Benchmark accuracy tests for MobilenetV1 with pruning method.""" |
|
|
|
def __init__(self, root_data_dir=None, **kwargs): |
|
default_flags = { |
|
'model': 'mobilenet', |
|
'optimizer': 'mobilenet_default', |
|
'initial_learning_rate_per_sample': 0.00007, |
|
'pretrained_filepath': tf.train.latest_checkpoint( |
|
os.path.join(root_data_dir, 'mobilenet_v1')), |
|
'pruning_begin_step': 0, |
|
'pruning_end_step': 100000, |
|
'pruning_initial_sparsity': 0.0, |
|
'pruning_final_sparsity': 0.5, |
|
'pruning_frequency': 100, |
|
} |
|
super(MobilenetV1KerasPruningAccuracy, self).__init__( |
|
root_data_dir=root_data_dir, |
|
default_flags=default_flags, |
|
**kwargs) |
|
|
|
def _run_and_report_benchmark(self): |
|
super(MobilenetV1KerasPruningAccuracy, self)._run_and_report_benchmark( |
|
top_1_min=\ |
|
MODEL_OPTIMIZATION_TOP_1_ACCURACY['MOBILENET_V1_FINETUNE_PRUNING'][0], |
|
top_1_max=\ |
|
MODEL_OPTIMIZATION_TOP_1_ACCURACY['MOBILENET_V1_FINETUNE_PRUNING'][1]) |
|
|
|
|
|
class Resnet50KerasPruningAccuracy(KerasPruningAccuracyBase): |
|
"""Benchmark accuracy tests for resnet50 with pruning method.""" |
|
|
|
def __init__(self, root_data_dir=None, **kwargs): |
|
default_flags = { |
|
'model': 'resnet50_v1.5', |
|
'optimizer': 'mobilenet_default', |
|
'initial_learning_rate_per_sample': 0.0000039, |
|
'pretrained_filepath': tf.train.latest_checkpoint( |
|
os.path.join(root_data_dir, 'resnet50')), |
|
'pruning_begin_step': 0, |
|
'pruning_end_step': 50000, |
|
'pruning_initial_sparsity': 0.0, |
|
'pruning_final_sparsity': 0.5, |
|
'pruning_frequency': 100, |
|
} |
|
super(Resnet50KerasPruningAccuracy, self).__init__( |
|
root_data_dir=root_data_dir, |
|
default_flags=default_flags, |
|
**kwargs) |
|
|
|
def _run_and_report_benchmark(self): |
|
super(Resnet50KerasPruningAccuracy, self)._run_and_report_benchmark( |
|
top_1_min=\ |
|
MODEL_OPTIMIZATION_TOP_1_ACCURACY['RESNET50_FINETUNE_PRUNING'][0], |
|
top_1_max=\ |
|
MODEL_OPTIMIZATION_TOP_1_ACCURACY['RESNET50_FINETUNE_PRUNING'][1]) |
|
|
|
|
|
class KerasPruningBenchmarkRealBase(Resnet50KerasBenchmarkBase): |
|
"""Pruning method benchmarks.""" |
|
|
|
def __init__(self, root_data_dir=None, default_flags=None, **kwargs): |
|
if default_flags is None: |
|
default_flags = {} |
|
default_flags.update({ |
|
'skip_eval': True, |
|
'report_accuracy_metrics': False, |
|
'data_dir': os.path.join(root_data_dir, 'imagenet'), |
|
'train_steps': 110, |
|
'log_steps': 10, |
|
'pruning_method': 'polynomial_decay', |
|
'pruning_begin_step': 0, |
|
'pruning_end_step': 50000, |
|
'pruning_initial_sparsity': 0, |
|
'pruning_final_sparsity': 0.5, |
|
'pruning_frequency': 100, |
|
}) |
|
super(KerasPruningBenchmarkRealBase, self).__init__( |
|
default_flags=default_flags, **kwargs) |
|
|
|
|
|
class MobilenetV1KerasPruningBenchmarkReal(KerasPruningBenchmarkRealBase): |
|
"""Pruning method benchmarks for MobilenetV1.""" |
|
|
|
def __init__(self, **kwargs): |
|
default_flags = { |
|
'model': 'mobilenet', |
|
'optimizer': 'mobilenet_default', |
|
} |
|
super(MobilenetV1KerasPruningBenchmarkReal, self).__init__( |
|
default_flags=default_flags, **kwargs) |
|
|
|
|
|
class Resnet50KerasPruningBenchmarkReal(KerasPruningBenchmarkRealBase): |
|
"""Pruning method benchmarks for resnet50.""" |
|
|
|
def __init__(self, **kwargs): |
|
default_flags = { |
|
'model': 'resnet50_v1.5', |
|
'optimizer': 'mobilenet_default', |
|
} |
|
super(Resnet50KerasPruningBenchmarkReal, self).__init__( |
|
default_flags=default_flags, **kwargs) |
|
|
|
|
|
if __name__ == '__main__': |
|
tf.test.main() |
|
|