|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""A library showing off sequence recognition and generation with the simple |
|
example of names. |
|
|
|
We use recurrent neural nets to learn complex functions able to recognize and |
|
generate sequences of a given form. This can be used for natural language |
|
syntax recognition, dynamically generating maps or puzzles and of course |
|
baby name generation. |
|
|
|
Before using this module, it is recommended to read the Tensorflow tutorial on |
|
recurrent neural nets, as it explains the basic concepts of this model, and |
|
will show off another module, the PTB module on which this model bases itself. |
|
|
|
Here is an overview of the functions available in this module: |
|
|
|
* RNN Module for sequence functions based on PTB |
|
|
|
* Name recognition specifically for recognizing names, but can be adapted to |
|
recognizing sequence patterns |
|
|
|
* Name generations specifically for generating names, but can be adapted to |
|
generating arbitrary sequence patterns |
|
""" |
|
|
|
from __future__ import absolute_import |
|
from __future__ import division |
|
from __future__ import print_function |
|
|
|
import time |
|
|
|
import tensorflow as tf |
|
import numpy as np |
|
|
|
from model import NamignizerModel |
|
import data_utils |
|
|
|
|
|
class SmallConfig(object): |
|
"""Small config.""" |
|
init_scale = 0.1 |
|
learning_rate = 1.0 |
|
max_grad_norm = 5 |
|
num_layers = 2 |
|
num_steps = 20 |
|
hidden_size = 200 |
|
max_epoch = 4 |
|
max_max_epoch = 13 |
|
keep_prob = 1.0 |
|
lr_decay = 0.5 |
|
batch_size = 20 |
|
vocab_size = 27 |
|
epoch_size = 100 |
|
|
|
|
|
class LargeConfig(object): |
|
"""Medium config.""" |
|
init_scale = 0.05 |
|
learning_rate = 1.0 |
|
max_grad_norm = 5 |
|
num_layers = 2 |
|
num_steps = 35 |
|
hidden_size = 650 |
|
max_epoch = 6 |
|
max_max_epoch = 39 |
|
keep_prob = 0.5 |
|
lr_decay = 0.8 |
|
batch_size = 20 |
|
vocab_size = 27 |
|
epoch_size = 100 |
|
|
|
|
|
class TestConfig(object): |
|
"""Tiny config, for testing.""" |
|
init_scale = 0.1 |
|
learning_rate = 1.0 |
|
max_grad_norm = 1 |
|
num_layers = 1 |
|
num_steps = 2 |
|
hidden_size = 2 |
|
max_epoch = 1 |
|
max_max_epoch = 1 |
|
keep_prob = 1.0 |
|
lr_decay = 0.5 |
|
batch_size = 20 |
|
vocab_size = 27 |
|
epoch_size = 100 |
|
|
|
|
|
def run_epoch(session, m, names, counts, epoch_size, eval_op, verbose=False): |
|
"""Runs the model on the given data for one epoch |
|
|
|
Args: |
|
session: the tf session holding the model graph |
|
m: an instance of the NamignizerModel |
|
names: a set of lowercase names of 26 characters |
|
counts: a list of the frequency of the above names |
|
epoch_size: the number of batches to run |
|
eval_op: whether to change the params or not, and how to do it |
|
Kwargs: |
|
verbose: whether to print out state of training during the epoch |
|
Returns: |
|
cost: the average cost during the last stage of the epoch |
|
""" |
|
start_time = time.time() |
|
costs = 0.0 |
|
iters = 0 |
|
for step, (x, y) in enumerate(data_utils.namignizer_iterator(names, counts, |
|
m.batch_size, m.num_steps, epoch_size)): |
|
|
|
cost, _ = session.run([m.cost, eval_op], |
|
{m.input_data: x, |
|
m.targets: y, |
|
m.weights: np.ones(m.batch_size * m.num_steps)}) |
|
costs += cost |
|
iters += m.num_steps |
|
|
|
if verbose and step % (epoch_size // 10) == 9: |
|
print("%.3f perplexity: %.3f speed: %.0f lps" % |
|
(step * 1.0 / epoch_size, np.exp(costs / iters), |
|
iters * m.batch_size / (time.time() - start_time))) |
|
|
|
if step >= epoch_size: |
|
break |
|
|
|
return np.exp(costs / iters) |
|
|
|
|
|
def train(data_dir, checkpoint_path, config): |
|
"""Trains the model with the given data |
|
|
|
Args: |
|
data_dir: path to the data for the model (see data_utils for data |
|
format) |
|
checkpoint_path: the path to save the trained model checkpoints |
|
config: one of the above configs that specify the model and how it |
|
should be run and trained |
|
Returns: |
|
None |
|
""" |
|
|
|
print("Reading Name data in %s" % data_dir) |
|
names, counts = data_utils.read_names(data_dir) |
|
|
|
with tf.Graph().as_default(), tf.Session() as session: |
|
initializer = tf.random_uniform_initializer(-config.init_scale, |
|
config.init_scale) |
|
with tf.variable_scope("model", reuse=None, initializer=initializer): |
|
m = NamignizerModel(is_training=True, config=config) |
|
|
|
tf.global_variables_initializer().run() |
|
|
|
for i in range(config.max_max_epoch): |
|
lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0) |
|
m.assign_lr(session, config.learning_rate * lr_decay) |
|
|
|
print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) |
|
train_perplexity = run_epoch(session, m, names, counts, config.epoch_size, m.train_op, |
|
verbose=True) |
|
print("Epoch: %d Train Perplexity: %.3f" % |
|
(i + 1, train_perplexity)) |
|
|
|
m.saver.save(session, checkpoint_path, global_step=i) |
|
|
|
|
|
def namignize(names, checkpoint_path, config): |
|
"""Recognizes names and prints the Perplexity of the model for each names |
|
in the list |
|
|
|
Args: |
|
names: a list of names in the model format |
|
checkpoint_path: the path to restore the trained model from, should not |
|
include the model name, just the path to |
|
config: one of the above configs that specify the model and how it |
|
should be run and trained |
|
Returns: |
|
None |
|
""" |
|
with tf.Graph().as_default(), tf.Session() as session: |
|
|
|
with tf.variable_scope("model"): |
|
m = NamignizerModel(is_training=False, config=config) |
|
|
|
m.saver.restore(session, checkpoint_path) |
|
|
|
for name in names: |
|
x, y = data_utils.name_to_batch(name, m.batch_size, m.num_steps) |
|
|
|
cost, loss, _ = session.run([m.cost, m.loss, tf.no_op()], |
|
{m.input_data: x, |
|
m.targets: y, |
|
m.weights: np.concatenate(( |
|
np.ones(len(name)), np.zeros(m.batch_size * m.num_steps - len(name))))}) |
|
|
|
print("Name {} gives us a perplexity of {}".format( |
|
name, np.exp(cost))) |
|
|
|
|
|
def namignator(checkpoint_path, config): |
|
"""Generates names randomly according to a given model |
|
|
|
Args: |
|
checkpoint_path: the path to restore the trained model from, should not |
|
include the model name, just the path to |
|
config: one of the above configs that specify the model and how it |
|
should be run and trained |
|
Returns: |
|
None |
|
""" |
|
|
|
config.num_steps = 1 |
|
config.batch_size = 1 |
|
|
|
with tf.Graph().as_default(), tf.Session() as session: |
|
|
|
with tf.variable_scope("model"): |
|
m = NamignizerModel(is_training=False, config=config) |
|
|
|
m.saver.restore(session, checkpoint_path) |
|
|
|
activations, final_state, _ = session.run([m.activations, m.final_state, tf.no_op()], |
|
{m.input_data: np.zeros((1, 1)), |
|
m.targets: np.zeros((1, 1)), |
|
m.weights: np.ones(1)}) |
|
|
|
|
|
next_letter = np.random.choice(27, p=activations[0]) |
|
name = [next_letter] |
|
while next_letter != 0: |
|
activations, final_state, _ = session.run([m.activations, m.final_state, tf.no_op()], |
|
{m.input_data: [[next_letter]], |
|
m.targets: np.zeros((1, 1)), |
|
m.initial_state: final_state, |
|
m.weights: np.ones(1)}) |
|
|
|
next_letter = np.random.choice(27, p=activations[0]) |
|
name += [next_letter] |
|
|
|
print(map(lambda x: chr(x + 96), name)) |
|
|
|
|
|
if __name__ == "__main__": |
|
train("data/SmallNames.txt", "model/namignizer", SmallConfig) |
|
|
|
namignize(["mary", "ida", "gazorbazorb", "mmmhmm", "bob"], |
|
tf.train.latest_checkpoint("model"), SmallConfig) |
|
|
|
namignator(tf.train.latest_checkpoint("model"), SmallConfig) |
|
|