Spaces:
Build error
Build error
File size: 9,710 Bytes
6250360 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 |
from __future__ import print_function
try:
import tensorflow as tf
from tensorflow.python.ops import nn
relu = nn.relu
slim = tf.contrib.slim
sigmoid = nn.sigmoid
softmax = nn.softmax
except:
print("tensorflow is not installed, util.tf can not be used.")
def is_gpu_available(cuda_only=True):
"""
code from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/platform/test.py
Returns whether TensorFlow can access a GPU.
Args:
cuda_only: limit the search to CUDA gpus.
Returns:
True iff a gpu device of the requested kind is available.
"""
from tensorflow.python.client import device_lib as _device_lib
if cuda_only:
return any((x.device_type == 'GPU')
for x in _device_lib.list_local_devices())
else:
return any((x.device_type == 'GPU' or x.device_type == 'SYCL')
for x in _device_lib.list_local_devices())
def get_available_gpus(num_gpus = None):
"""
Modified on http://stackoverflow.com/questions/38559755/how-to-get-current-available-gpus-in-tensorflow
However, the original code will occupy all available gpu memory.
The modified code need a parameter: num_gpus. It does nothing but return the device handler name
It will work well on single-maching-training, but I don't know whether it will work well on a cluster.
"""
if num_gpus == None:
from tensorflow.python.client import device_lib as _device_lib
local_device_protos = _device_lib.list_local_devices()
return [x.name for x in local_device_protos if x.device_type == 'GPU']
else:
return ['/gpu:%d'%(idx) for idx in xrange(num_gpus)]
def get_latest_ckpt(path):
# tf.train.latest_checkpoint
import util
path = util.io.get_absolute_path(path)
if util.io.is_dir(path):
ckpt = tf.train.get_checkpoint_state(path)
if ckpt is not None:
ckpt_path = ckpt.model_checkpoint_path
else:
ckpt_path = None
else:
ckpt_path = path;
return ckpt_path
def get_all_ckpts(path):
ckpt = tf.train.get_checkpoint_state(path)
all_ckpts = ckpt.all_model_checkpoint_paths
ckpts = [str(c) for c in all_ckpts]
return ckpts
def get_iter(ckpt):
import util
iter_ = int(util.str.find_all(ckpt, '.ckpt-\d+')[0].split('-')[-1])
return iter_
def get_init_fn(checkpoint_path, train_dir, ignore_missing_vars = False,
checkpoint_exclude_scopes = None, model_name = None, checkpoint_model_scope = None):
"""
code from github/SSD-tensorflow/tf_utils.py
Returns a function run by the chief worker to warm-start the training.
Note that the init_fn is only run when initializing the model during the very
first global step.
checkpoint_path: the checkpoint to be restored
train_dir: the directory where checkpoints are stored during training.
ignore_missing_vars: if False and there are variables in the model but not in the checkpoint, an error will be raised.
checkpoint_model_scope and model_name: if the root scope of checkpoints and the model in session is different,
(but the sub-scopes are all the same), specify them clearly
checkpoint_exclude_scopes: variables to be excluded when restoring from checkpoint_path.
Returns:
An init function run by the supervisor.
"""
import util
if util.str.is_none_or_empty(checkpoint_path):
return None
# Warn the user if a checkpoint exists in the train_dir. Then ignore.
if tf.train.latest_checkpoint(train_dir):
tf.logging.info(
'Ignoring --checkpoint_path because a checkpoint already exists in %s'
% train_dir)
return None
exclusions = []
if checkpoint_exclude_scopes:
exclusions = [scope.strip()
for scope in checkpoint_exclude_scopes.split(',')]
# TODO(sguada) variables.filter_variables()
variables_to_restore = []
for var in slim.get_model_variables():
excluded = False
for exclusion in exclusions:
if var.op.name.startswith(exclusion):
excluded = True
break
if not excluded:
variables_to_restore.append(var)
# Change model scope if necessary.
if checkpoint_model_scope is not None:
variables_to_restore = {checkpoint_model_scope + '/' + var.op.name : var for var in variables_to_restore}
tf.logging.info('variables_to_restore: %r'%(variables_to_restore))
checkpoint_path = get_latest_ckpt(checkpoint_path)
tf.logging.info('Fine-tuning from %s. Ignoring missing vars: %s' % (checkpoint_path, ignore_missing_vars))
print ('checkpoint_path', checkpoint_path)
return slim.assign_from_checkpoint_fn(
checkpoint_path,
variables_to_restore,
ignore_missing_vars=ignore_missing_vars)
def get_variables_to_train(flags = None):
"""code from github/SSD-tensorflow/tf_utils.py
Returns a list of variables to train.
Returns:
A list of variables to train by the optimizer.
"""
if flags is None or flags.trainable_scopes is None:
return tf.trainable_variables()
else:
scopes = [scope.strip() for scope in flags.trainable_scopes.split(',')]
variables_to_train = []
for scope in scopes:
variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope)
variables_to_train.extend(variables)
return variables_to_train
def Print(tensor, data, msg = '', file = None, mode = 'w'):
from tensorflow.python.ops import control_flow_ops
import util
def np_print(*args):
if util.str.contains(msg, '%'):
message = msg%tuple(args)
else:
message = msg + ' %'*len(args)%tuple(args)
if file is not None:
file_path = util.io.get_absolute_path(file)
print('writting message to file(%s):'%(file_path), message)
with open(file_path, mode) as f:
print(message, file = f)
else:
print(message)
return control_flow_ops.with_dependencies([tf.py_func(np_print, data, [])], tensor)
def get_variable_names_in_checkpoint(path, return_shapes = False, return_reader = False):
"""
Args:
path: the path to training directory containing checkpoints,
or path to checkpoint
Return:
a list of variable names in the checkpoint
"""
import util
ckpt = get_latest_ckpt(path)
ckpt_reader = tf.train.NewCheckpointReader(ckpt)
ckpt_vars = ckpt_reader.get_variable_to_shape_map()
names = [var for var in ckpt_vars]
if return_shapes:
return names, ckpt_vars
def get(name):
return ckpt_reader.get_tensor(name)
if return_reader:
return names, get
return names
def min_area_rect(xs, ys):
import util
rects = tf.py_func(util.img.min_area_rect, [xs, ys], xs.dtype)
rects.set_shape([None, 5])
return rects
def gpu_config(config = None, allow_growth = None, gpu_memory_fraction = None):
if config is None:
config = tf.ConfigProto()
if allow_growth is not None:
config.gpu_options.allow_growth = allow_growth
if gpu_memory_fraction is not None:
config.gpu_options.per_process_gpu_memory_fraction = gpu_memory_fraction
return config
def wait_for_checkpoint(path):
from tensorflow.contrib.training.python.training import evaluation
return evaluation.checkpoints_iterator(path)
def focal_loss(labels, logits, gamma = 2.0, alpha = 0.75, normalize = True):
labels = tf.where(labels > 0, tf.ones_like(labels), tf.zeros_like(labels))
labels = tf.cast(labels, tf.float32)
probs = tf.sigmoid(logits)
CE = tf.nn.sigmoid_cross_entropy_with_logits(labels = labels, logits = logits)
alpha_t = tf.ones_like(logits) * alpha
alpha_t = tf.where(labels > 0, alpha_t, 1.0 - alpha_t)
probs_t = tf.where(labels > 0, probs, 1.0 - probs)
focal_matrix = alpha_t * tf.pow((1.0 - probs_t), gamma)
fl = focal_matrix * CE
fl = tf.reduce_sum(fl)
if normalize:
#n_pos = tf.reduce_sum(labels)
#fl = fl / tf.cast(n_pos, tf.float32)
total_weights = tf.stop_gradient(tf.reduce_sum(focal_matrix))
fl = fl / total_weights
return fl
def focal_loss_layer_initializer(sigma = 0.01, pi = 0.01):
import numpy as np
b0 = - np.log((1 - pi) / pi)
return tf.random_normal_initializer(stddev = sigma), \
tf.constant_initializer(b0)
def sum_gradients(clone_grads, do_summary = False):
averaged_grads = []
for grad_and_vars in zip(*clone_grads):
grads = []
var = grad_and_vars[0][1]
try:
for g, v in grad_and_vars:
assert v == var
grads.append(g)
grad = tf.add_n(grads, name = v.op.name + '_summed_gradients')
except:
import pdb
pdb.set_trace()
averaged_grads.append((grad, v))
if do_summary:
tf.summary.histogram("variables_and_gradients_" + grad.op.name, grad)
tf.summary.histogram("variables_and_gradients_" + v.op.name, v)
tf.summary.scalar("variables_and_gradients_" + grad.op.name+\
'_mean/var_mean', tf.reduce_mean(grad)/tf.reduce_mean(var))
tf.summary.scalar("variables_and_gradients_" + v.op.name+'_mean',tf.reduce_mean(var))
return averaged_grads
def get_update_op():
"""
Extremely important for BatchNorm
"""
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
if update_ops:
return tf.group(*update_ops)
return None
|