Spaces:
Build error
Build error
from __future__ import print_function | |
try: | |
import tensorflow as tf | |
from tensorflow.python.ops import nn | |
relu = nn.relu | |
slim = tf.contrib.slim | |
sigmoid = nn.sigmoid | |
softmax = nn.softmax | |
except: | |
print("tensorflow is not installed, util.tf can not be used.") | |
def is_gpu_available(cuda_only=True): | |
""" | |
code from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/platform/test.py | |
Returns whether TensorFlow can access a GPU. | |
Args: | |
cuda_only: limit the search to CUDA gpus. | |
Returns: | |
True iff a gpu device of the requested kind is available. | |
""" | |
from tensorflow.python.client import device_lib as _device_lib | |
if cuda_only: | |
return any((x.device_type == 'GPU') | |
for x in _device_lib.list_local_devices()) | |
else: | |
return any((x.device_type == 'GPU' or x.device_type == 'SYCL') | |
for x in _device_lib.list_local_devices()) | |
def get_available_gpus(num_gpus = None): | |
""" | |
Modified on http://stackoverflow.com/questions/38559755/how-to-get-current-available-gpus-in-tensorflow | |
However, the original code will occupy all available gpu memory. | |
The modified code need a parameter: num_gpus. It does nothing but return the device handler name | |
It will work well on single-maching-training, but I don't know whether it will work well on a cluster. | |
""" | |
if num_gpus == None: | |
from tensorflow.python.client import device_lib as _device_lib | |
local_device_protos = _device_lib.list_local_devices() | |
return [x.name for x in local_device_protos if x.device_type == 'GPU'] | |
else: | |
return ['/gpu:%d'%(idx) for idx in xrange(num_gpus)] | |
def get_latest_ckpt(path): | |
# tf.train.latest_checkpoint | |
import util | |
path = util.io.get_absolute_path(path) | |
if util.io.is_dir(path): | |
ckpt = tf.train.get_checkpoint_state(path) | |
if ckpt is not None: | |
ckpt_path = ckpt.model_checkpoint_path | |
else: | |
ckpt_path = None | |
else: | |
ckpt_path = path; | |
return ckpt_path | |
def get_all_ckpts(path): | |
ckpt = tf.train.get_checkpoint_state(path) | |
all_ckpts = ckpt.all_model_checkpoint_paths | |
ckpts = [str(c) for c in all_ckpts] | |
return ckpts | |
def get_iter(ckpt): | |
import util | |
iter_ = int(util.str.find_all(ckpt, '.ckpt-\d+')[0].split('-')[-1]) | |
return iter_ | |
def get_init_fn(checkpoint_path, train_dir, ignore_missing_vars = False, | |
checkpoint_exclude_scopes = None, model_name = None, checkpoint_model_scope = None): | |
""" | |
code from github/SSD-tensorflow/tf_utils.py | |
Returns a function run by the chief worker to warm-start the training. | |
Note that the init_fn is only run when initializing the model during the very | |
first global step. | |
checkpoint_path: the checkpoint to be restored | |
train_dir: the directory where checkpoints are stored during training. | |
ignore_missing_vars: if False and there are variables in the model but not in the checkpoint, an error will be raised. | |
checkpoint_model_scope and model_name: if the root scope of checkpoints and the model in session is different, | |
(but the sub-scopes are all the same), specify them clearly | |
checkpoint_exclude_scopes: variables to be excluded when restoring from checkpoint_path. | |
Returns: | |
An init function run by the supervisor. | |
""" | |
import util | |
if util.str.is_none_or_empty(checkpoint_path): | |
return None | |
# Warn the user if a checkpoint exists in the train_dir. Then ignore. | |
if tf.train.latest_checkpoint(train_dir): | |
tf.logging.info( | |
'Ignoring --checkpoint_path because a checkpoint already exists in %s' | |
% train_dir) | |
return None | |
exclusions = [] | |
if checkpoint_exclude_scopes: | |
exclusions = [scope.strip() | |
for scope in checkpoint_exclude_scopes.split(',')] | |
# TODO(sguada) variables.filter_variables() | |
variables_to_restore = [] | |
for var in slim.get_model_variables(): | |
excluded = False | |
for exclusion in exclusions: | |
if var.op.name.startswith(exclusion): | |
excluded = True | |
break | |
if not excluded: | |
variables_to_restore.append(var) | |
# Change model scope if necessary. | |
if checkpoint_model_scope is not None: | |
variables_to_restore = {checkpoint_model_scope + '/' + var.op.name : var for var in variables_to_restore} | |
tf.logging.info('variables_to_restore: %r'%(variables_to_restore)) | |
checkpoint_path = get_latest_ckpt(checkpoint_path) | |
tf.logging.info('Fine-tuning from %s. Ignoring missing vars: %s' % (checkpoint_path, ignore_missing_vars)) | |
print ('checkpoint_path', checkpoint_path) | |
return slim.assign_from_checkpoint_fn( | |
checkpoint_path, | |
variables_to_restore, | |
ignore_missing_vars=ignore_missing_vars) | |
def get_variables_to_train(flags = None): | |
"""code from github/SSD-tensorflow/tf_utils.py | |
Returns a list of variables to train. | |
Returns: | |
A list of variables to train by the optimizer. | |
""" | |
if flags is None or flags.trainable_scopes is None: | |
return tf.trainable_variables() | |
else: | |
scopes = [scope.strip() for scope in flags.trainable_scopes.split(',')] | |
variables_to_train = [] | |
for scope in scopes: | |
variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope) | |
variables_to_train.extend(variables) | |
return variables_to_train | |
def Print(tensor, data, msg = '', file = None, mode = 'w'): | |
from tensorflow.python.ops import control_flow_ops | |
import util | |
def np_print(*args): | |
if util.str.contains(msg, '%'): | |
message = msg%tuple(args) | |
else: | |
message = msg + ' %'*len(args)%tuple(args) | |
if file is not None: | |
file_path = util.io.get_absolute_path(file) | |
print('writting message to file(%s):'%(file_path), message) | |
with open(file_path, mode) as f: | |
print(message, file = f) | |
else: | |
print(message) | |
return control_flow_ops.with_dependencies([tf.py_func(np_print, data, [])], tensor) | |
def get_variable_names_in_checkpoint(path, return_shapes = False, return_reader = False): | |
""" | |
Args: | |
path: the path to training directory containing checkpoints, | |
or path to checkpoint | |
Return: | |
a list of variable names in the checkpoint | |
""" | |
import util | |
ckpt = get_latest_ckpt(path) | |
ckpt_reader = tf.train.NewCheckpointReader(ckpt) | |
ckpt_vars = ckpt_reader.get_variable_to_shape_map() | |
names = [var for var in ckpt_vars] | |
if return_shapes: | |
return names, ckpt_vars | |
def get(name): | |
return ckpt_reader.get_tensor(name) | |
if return_reader: | |
return names, get | |
return names | |
def min_area_rect(xs, ys): | |
import util | |
rects = tf.py_func(util.img.min_area_rect, [xs, ys], xs.dtype) | |
rects.set_shape([None, 5]) | |
return rects | |
def gpu_config(config = None, allow_growth = None, gpu_memory_fraction = None): | |
if config is None: | |
config = tf.ConfigProto() | |
if allow_growth is not None: | |
config.gpu_options.allow_growth = allow_growth | |
if gpu_memory_fraction is not None: | |
config.gpu_options.per_process_gpu_memory_fraction = gpu_memory_fraction | |
return config | |
def wait_for_checkpoint(path): | |
from tensorflow.contrib.training.python.training import evaluation | |
return evaluation.checkpoints_iterator(path) | |
def focal_loss(labels, logits, gamma = 2.0, alpha = 0.75, normalize = True): | |
labels = tf.where(labels > 0, tf.ones_like(labels), tf.zeros_like(labels)) | |
labels = tf.cast(labels, tf.float32) | |
probs = tf.sigmoid(logits) | |
CE = tf.nn.sigmoid_cross_entropy_with_logits(labels = labels, logits = logits) | |
alpha_t = tf.ones_like(logits) * alpha | |
alpha_t = tf.where(labels > 0, alpha_t, 1.0 - alpha_t) | |
probs_t = tf.where(labels > 0, probs, 1.0 - probs) | |
focal_matrix = alpha_t * tf.pow((1.0 - probs_t), gamma) | |
fl = focal_matrix * CE | |
fl = tf.reduce_sum(fl) | |
if normalize: | |
#n_pos = tf.reduce_sum(labels) | |
#fl = fl / tf.cast(n_pos, tf.float32) | |
total_weights = tf.stop_gradient(tf.reduce_sum(focal_matrix)) | |
fl = fl / total_weights | |
return fl | |
def focal_loss_layer_initializer(sigma = 0.01, pi = 0.01): | |
import numpy as np | |
b0 = - np.log((1 - pi) / pi) | |
return tf.random_normal_initializer(stddev = sigma), \ | |
tf.constant_initializer(b0) | |
def sum_gradients(clone_grads, do_summary = False): | |
averaged_grads = [] | |
for grad_and_vars in zip(*clone_grads): | |
grads = [] | |
var = grad_and_vars[0][1] | |
try: | |
for g, v in grad_and_vars: | |
assert v == var | |
grads.append(g) | |
grad = tf.add_n(grads, name = v.op.name + '_summed_gradients') | |
except: | |
import pdb | |
pdb.set_trace() | |
averaged_grads.append((grad, v)) | |
if do_summary: | |
tf.summary.histogram("variables_and_gradients_" + grad.op.name, grad) | |
tf.summary.histogram("variables_and_gradients_" + v.op.name, v) | |
tf.summary.scalar("variables_and_gradients_" + grad.op.name+\ | |
'_mean/var_mean', tf.reduce_mean(grad)/tf.reduce_mean(var)) | |
tf.summary.scalar("variables_and_gradients_" + v.op.name+'_mean',tf.reduce_mean(var)) | |
return averaged_grads | |
def get_update_op(): | |
""" | |
Extremely important for BatchNorm | |
""" | |
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) | |
if update_ops: | |
return tf.group(*update_ops) | |
return None | |