diff --git a/configs/LRS3_V_WER19.1.ini b/configs/LRS3_V_WER19.1.ini new file mode 100644 index 0000000000000000000000000000000000000000..7c1ac9a7c0ebb812423f1a22826a9c0e7fc99902 --- /dev/null +++ b/configs/LRS3_V_WER19.1.ini @@ -0,0 +1,18 @@ +[input] +modality=video +v_fps=25 + +[model] +v_fps=25 +model_path=benchmarks/LRS3/models/LRS3_V_WER19.1/model.pth +model_conf=benchmarks/LRS3/models/LRS3_V_WER19.1/model.json +rnnlm=benchmarks/LRS3/language_models/lm_en_subword/model.pth +rnnlm_conf=benchmarks/LRS3/language_models/lm_en_subword/model.json + +[decode] +beam_size=40 +penalty=0.0 +maxlenratio=0.0 +minlenratio=0.0 +ctc_weight=0.1 +lm_weight=0.3 diff --git a/espnet/.DS_Store b/espnet/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..81c5efd96703d9ec242bcfbfeb1cc7e92c570439 Binary files /dev/null and b/espnet/.DS_Store differ diff --git a/espnet/asr/asr_utils.py b/espnet/asr/asr_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..4f72ba13a677a11c58b8ac5a10235ebe79a3824e --- /dev/null +++ b/espnet/asr/asr_utils.py @@ -0,0 +1,990 @@ +# Copyright 2017 Johns Hopkins University (Shinji Watanabe) +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +import argparse +import copy +import json +import logging +import os +import shutil +import tempfile + +import numpy as np +import torch + + +# * -------------------- training iterator related -------------------- * + + +class CompareValueTrigger(object): + """Trigger invoked when key value getting bigger or lower than before. + + Args: + key (str) : Key of value. + compare_fn ((float, float) -> bool) : Function to compare the values. + trigger (tuple(int, str)) : Trigger that decide the comparison interval. + + """ + + def __init__(self, key, compare_fn, trigger=(1, "epoch")): + from chainer import training + + self._key = key + self._best_value = None + self._interval_trigger = training.util.get_trigger(trigger) + self._init_summary() + self._compare_fn = compare_fn + + def __call__(self, trainer): + """Get value related to the key and compare with current value.""" + observation = trainer.observation + summary = self._summary + key = self._key + if key in observation: + summary.add({key: observation[key]}) + + if not self._interval_trigger(trainer): + return False + + stats = summary.compute_mean() + value = float(stats[key]) # copy to CPU + self._init_summary() + + if self._best_value is None: + # initialize best value + self._best_value = value + return False + elif self._compare_fn(self._best_value, value): + return True + else: + self._best_value = value + return False + + def _init_summary(self): + import chainer + + self._summary = chainer.reporter.DictSummary() + + +try: + from chainer.training import extension +except ImportError: + PlotAttentionReport = None +else: + + class PlotAttentionReport(extension.Extension): + """Plot attention reporter. + + Args: + att_vis_fn (espnet.nets.*_backend.e2e_asr.E2E.calculate_all_attentions): + Function of attention visualization. + data (list[tuple(str, dict[str, list[Any]])]): List json utt key items. + outdir (str): Directory to save figures. + converter (espnet.asr.*_backend.asr.CustomConverter): + Function to convert data. + device (int | torch.device): Device. + reverse (bool): If True, input and output length are reversed. + ikey (str): Key to access input + (for ASR/ST ikey="input", for MT ikey="output".) + iaxis (int): Dimension to access input + (for ASR/ST iaxis=0, for MT iaxis=1.) + okey (str): Key to access output + (for ASR/ST okey="input", MT okay="output".) + oaxis (int): Dimension to access output + (for ASR/ST oaxis=0, for MT oaxis=0.) + subsampling_factor (int): subsampling factor in encoder + + """ + + def __init__( + self, + att_vis_fn, + data, + outdir, + converter, + transform, + device, + reverse=False, + ikey="input", + iaxis=0, + okey="output", + oaxis=0, + subsampling_factor=1, + ): + self.att_vis_fn = att_vis_fn + self.data = copy.deepcopy(data) + self.data_dict = {k: v for k, v in copy.deepcopy(data)} + # key is utterance ID + self.outdir = outdir + self.converter = converter + self.transform = transform + self.device = device + self.reverse = reverse + self.ikey = ikey + self.iaxis = iaxis + self.okey = okey + self.oaxis = oaxis + self.factor = subsampling_factor + if not os.path.exists(self.outdir): + os.makedirs(self.outdir) + + def __call__(self, trainer): + """Plot and save image file of att_ws matrix.""" + att_ws, uttid_list = self.get_attention_weights() + if isinstance(att_ws, list): # multi-encoder case + num_encs = len(att_ws) - 1 + # atts + for i in range(num_encs): + for idx, att_w in enumerate(att_ws[i]): + filename = "%s/%s.ep.{.updater.epoch}.att%d.png" % ( + self.outdir, + uttid_list[idx], + i + 1, + ) + att_w = self.trim_attention_weight(uttid_list[idx], att_w) + np_filename = "%s/%s.ep.{.updater.epoch}.att%d.npy" % ( + self.outdir, + uttid_list[idx], + i + 1, + ) + np.save(np_filename.format(trainer), att_w) + self._plot_and_save_attention(att_w, filename.format(trainer)) + # han + for idx, att_w in enumerate(att_ws[num_encs]): + filename = "%s/%s.ep.{.updater.epoch}.han.png" % ( + self.outdir, + uttid_list[idx], + ) + att_w = self.trim_attention_weight(uttid_list[idx], att_w) + np_filename = "%s/%s.ep.{.updater.epoch}.han.npy" % ( + self.outdir, + uttid_list[idx], + ) + np.save(np_filename.format(trainer), att_w) + self._plot_and_save_attention( + att_w, filename.format(trainer), han_mode=True + ) + else: + for idx, att_w in enumerate(att_ws): + filename = "%s/%s.ep.{.updater.epoch}.png" % ( + self.outdir, + uttid_list[idx], + ) + att_w = self.trim_attention_weight(uttid_list[idx], att_w) + np_filename = "%s/%s.ep.{.updater.epoch}.npy" % ( + self.outdir, + uttid_list[idx], + ) + np.save(np_filename.format(trainer), att_w) + self._plot_and_save_attention(att_w, filename.format(trainer)) + + def log_attentions(self, logger, step): + """Add image files of att_ws matrix to the tensorboard.""" + att_ws, uttid_list = self.get_attention_weights() + if isinstance(att_ws, list): # multi-encoder case + num_encs = len(att_ws) - 1 + # atts + for i in range(num_encs): + for idx, att_w in enumerate(att_ws[i]): + att_w = self.trim_attention_weight(uttid_list[idx], att_w) + plot = self.draw_attention_plot(att_w) + logger.add_figure( + "%s_att%d" % (uttid_list[idx], i + 1), + plot.gcf(), + step, + ) + # han + for idx, att_w in enumerate(att_ws[num_encs]): + att_w = self.trim_attention_weight(uttid_list[idx], att_w) + plot = self.draw_han_plot(att_w) + logger.add_figure( + "%s_han" % (uttid_list[idx]), + plot.gcf(), + step, + ) + else: + for idx, att_w in enumerate(att_ws): + att_w = self.trim_attention_weight(uttid_list[idx], att_w) + plot = self.draw_attention_plot(att_w) + logger.add_figure("%s" % (uttid_list[idx]), plot.gcf(), step) + + def get_attention_weights(self): + """Return attention weights. + + Returns: + numpy.ndarray: attention weights. float. Its shape would be + differ from backend. + * pytorch-> 1) multi-head case => (B, H, Lmax, Tmax), 2) + other case => (B, Lmax, Tmax). + * chainer-> (B, Lmax, Tmax) + + """ + return_batch, uttid_list = self.transform(self.data, return_uttid=True) + batch = self.converter([return_batch], self.device) + if isinstance(batch, tuple): + att_ws = self.att_vis_fn(*batch) + else: + att_ws = self.att_vis_fn(**batch) + return att_ws, uttid_list + + def trim_attention_weight(self, uttid, att_w): + """Transform attention matrix with regard to self.reverse.""" + if self.reverse: + enc_key, enc_axis = self.okey, self.oaxis + dec_key, dec_axis = self.ikey, self.iaxis + else: + enc_key, enc_axis = self.ikey, self.iaxis + dec_key, dec_axis = self.okey, self.oaxis + dec_len = int(self.data_dict[uttid][dec_key][dec_axis]["shape"][0]) + enc_len = int(self.data_dict[uttid][enc_key][enc_axis]["shape"][0]) + if self.factor > 1: + enc_len //= self.factor + if len(att_w.shape) == 3: + att_w = att_w[:, :dec_len, :enc_len] + else: + att_w = att_w[:dec_len, :enc_len] + return att_w + + def draw_attention_plot(self, att_w): + """Plot the att_w matrix. + + Returns: + matplotlib.pyplot: pyplot object with attention matrix image. + + """ + import matplotlib + + matplotlib.use("Agg") + import matplotlib.pyplot as plt + + plt.clf() + att_w = att_w.astype(np.float32) + if len(att_w.shape) == 3: + for h, aw in enumerate(att_w, 1): + plt.subplot(1, len(att_w), h) + plt.imshow(aw, aspect="auto") + plt.xlabel("Encoder Index") + plt.ylabel("Decoder Index") + else: + plt.imshow(att_w, aspect="auto") + plt.xlabel("Encoder Index") + plt.ylabel("Decoder Index") + plt.tight_layout() + return plt + + def draw_han_plot(self, att_w): + """Plot the att_w matrix for hierarchical attention. + + Returns: + matplotlib.pyplot: pyplot object with attention matrix image. + + """ + import matplotlib + + matplotlib.use("Agg") + import matplotlib.pyplot as plt + + plt.clf() + if len(att_w.shape) == 3: + for h, aw in enumerate(att_w, 1): + legends = [] + plt.subplot(1, len(att_w), h) + for i in range(aw.shape[1]): + plt.plot(aw[:, i]) + legends.append("Att{}".format(i)) + plt.ylim([0, 1.0]) + plt.xlim([0, aw.shape[0]]) + plt.grid(True) + plt.ylabel("Attention Weight") + plt.xlabel("Decoder Index") + plt.legend(legends) + else: + legends = [] + for i in range(att_w.shape[1]): + plt.plot(att_w[:, i]) + legends.append("Att{}".format(i)) + plt.ylim([0, 1.0]) + plt.xlim([0, att_w.shape[0]]) + plt.grid(True) + plt.ylabel("Attention Weight") + plt.xlabel("Decoder Index") + plt.legend(legends) + plt.tight_layout() + return plt + + def _plot_and_save_attention(self, att_w, filename, han_mode=False): + if han_mode: + plt = self.draw_han_plot(att_w) + else: + plt = self.draw_attention_plot(att_w) + plt.savefig(filename) + plt.close() + + +try: + from chainer.training import extension +except ImportError: + PlotCTCReport = None +else: + + class PlotCTCReport(extension.Extension): + """Plot CTC reporter. + + Args: + ctc_vis_fn (espnet.nets.*_backend.e2e_asr.E2E.calculate_all_ctc_probs): + Function of CTC visualization. + data (list[tuple(str, dict[str, list[Any]])]): List json utt key items. + outdir (str): Directory to save figures. + converter (espnet.asr.*_backend.asr.CustomConverter): + Function to convert data. + device (int | torch.device): Device. + reverse (bool): If True, input and output length are reversed. + ikey (str): Key to access input + (for ASR/ST ikey="input", for MT ikey="output".) + iaxis (int): Dimension to access input + (for ASR/ST iaxis=0, for MT iaxis=1.) + okey (str): Key to access output + (for ASR/ST okey="input", MT okay="output".) + oaxis (int): Dimension to access output + (for ASR/ST oaxis=0, for MT oaxis=0.) + subsampling_factor (int): subsampling factor in encoder + + """ + + def __init__( + self, + ctc_vis_fn, + data, + outdir, + converter, + transform, + device, + reverse=False, + ikey="input", + iaxis=0, + okey="output", + oaxis=0, + subsampling_factor=1, + ): + self.ctc_vis_fn = ctc_vis_fn + self.data = copy.deepcopy(data) + self.data_dict = {k: v for k, v in copy.deepcopy(data)} + # key is utterance ID + self.outdir = outdir + self.converter = converter + self.transform = transform + self.device = device + self.reverse = reverse + self.ikey = ikey + self.iaxis = iaxis + self.okey = okey + self.oaxis = oaxis + self.factor = subsampling_factor + if not os.path.exists(self.outdir): + os.makedirs(self.outdir) + + def __call__(self, trainer): + """Plot and save image file of ctc prob.""" + ctc_probs, uttid_list = self.get_ctc_probs() + if isinstance(ctc_probs, list): # multi-encoder case + num_encs = len(ctc_probs) - 1 + for i in range(num_encs): + for idx, ctc_prob in enumerate(ctc_probs[i]): + filename = "%s/%s.ep.{.updater.epoch}.ctc%d.png" % ( + self.outdir, + uttid_list[idx], + i + 1, + ) + ctc_prob = self.trim_ctc_prob(uttid_list[idx], ctc_prob) + np_filename = "%s/%s.ep.{.updater.epoch}.ctc%d.npy" % ( + self.outdir, + uttid_list[idx], + i + 1, + ) + np.save(np_filename.format(trainer), ctc_prob) + self._plot_and_save_ctc(ctc_prob, filename.format(trainer)) + else: + for idx, ctc_prob in enumerate(ctc_probs): + filename = "%s/%s.ep.{.updater.epoch}.png" % ( + self.outdir, + uttid_list[idx], + ) + ctc_prob = self.trim_ctc_prob(uttid_list[idx], ctc_prob) + np_filename = "%s/%s.ep.{.updater.epoch}.npy" % ( + self.outdir, + uttid_list[idx], + ) + np.save(np_filename.format(trainer), ctc_prob) + self._plot_and_save_ctc(ctc_prob, filename.format(trainer)) + + def log_ctc_probs(self, logger, step): + """Add image files of ctc probs to the tensorboard.""" + ctc_probs, uttid_list = self.get_ctc_probs() + if isinstance(ctc_probs, list): # multi-encoder case + num_encs = len(ctc_probs) - 1 + for i in range(num_encs): + for idx, ctc_prob in enumerate(ctc_probs[i]): + ctc_prob = self.trim_ctc_prob(uttid_list[idx], ctc_prob) + plot = self.draw_ctc_plot(ctc_prob) + logger.add_figure( + "%s_ctc%d" % (uttid_list[idx], i + 1), + plot.gcf(), + step, + ) + else: + for idx, ctc_prob in enumerate(ctc_probs): + ctc_prob = self.trim_ctc_prob(uttid_list[idx], ctc_prob) + plot = self.draw_ctc_plot(ctc_prob) + logger.add_figure("%s" % (uttid_list[idx]), plot.gcf(), step) + + def get_ctc_probs(self): + """Return CTC probs. + + Returns: + numpy.ndarray: CTC probs. float. Its shape would be + differ from backend. (B, Tmax, vocab). + + """ + return_batch, uttid_list = self.transform(self.data, return_uttid=True) + batch = self.converter([return_batch], self.device) + if isinstance(batch, tuple): + probs = self.ctc_vis_fn(*batch) + else: + probs = self.ctc_vis_fn(**batch) + return probs, uttid_list + + def trim_ctc_prob(self, uttid, prob): + """Trim CTC posteriors accoding to input lengths.""" + enc_len = int(self.data_dict[uttid][self.ikey][self.iaxis]["shape"][0]) + if self.factor > 1: + enc_len //= self.factor + prob = prob[:enc_len] + return prob + + def draw_ctc_plot(self, ctc_prob): + """Plot the ctc_prob matrix. + + Returns: + matplotlib.pyplot: pyplot object with CTC prob matrix image. + + """ + import matplotlib + + matplotlib.use("Agg") + import matplotlib.pyplot as plt + + ctc_prob = ctc_prob.astype(np.float32) + + plt.clf() + topk_ids = np.argsort(ctc_prob, axis=1) + n_frames, vocab = ctc_prob.shape + times_probs = np.arange(n_frames) + + plt.figure(figsize=(20, 8)) + + # NOTE: index 0 is reserved for blank + for idx in set(topk_ids.reshape(-1).tolist()): + if idx == 0: + plt.plot( + times_probs, ctc_prob[:, 0], ":", label="", color="grey" + ) + else: + plt.plot(times_probs, ctc_prob[:, idx]) + plt.xlabel("Input [frame]", fontsize=12) + plt.ylabel("Posteriors", fontsize=12) + plt.xticks(list(range(0, int(n_frames) + 1, 10))) + plt.yticks(list(range(0, 2, 1))) + plt.tight_layout() + return plt + + def _plot_and_save_ctc(self, ctc_prob, filename): + plt = self.draw_ctc_plot(ctc_prob) + plt.savefig(filename) + plt.close() + + +def restore_snapshot(model, snapshot, load_fn=None): + """Extension to restore snapshot. + + Returns: + An extension function. + + """ + import chainer + from chainer import training + + if load_fn is None: + load_fn = chainer.serializers.load_npz + + @training.make_extension(trigger=(1, "epoch")) + def restore_snapshot(trainer): + _restore_snapshot(model, snapshot, load_fn) + + return restore_snapshot + + +def _restore_snapshot(model, snapshot, load_fn=None): + if load_fn is None: + import chainer + + load_fn = chainer.serializers.load_npz + + load_fn(snapshot, model) + logging.info("restored from " + str(snapshot)) + + +def adadelta_eps_decay(eps_decay): + """Extension to perform adadelta eps decay. + + Args: + eps_decay (float): Decay rate of eps. + + Returns: + An extension function. + + """ + from chainer import training + + @training.make_extension(trigger=(1, "epoch")) + def adadelta_eps_decay(trainer): + _adadelta_eps_decay(trainer, eps_decay) + + return adadelta_eps_decay + + +def _adadelta_eps_decay(trainer, eps_decay): + optimizer = trainer.updater.get_optimizer("main") + # for chainer + if hasattr(optimizer, "eps"): + current_eps = optimizer.eps + setattr(optimizer, "eps", current_eps * eps_decay) + logging.info("adadelta eps decayed to " + str(optimizer.eps)) + # pytorch + else: + for p in optimizer.param_groups: + p["eps"] *= eps_decay + logging.info("adadelta eps decayed to " + str(p["eps"])) + + +def adam_lr_decay(eps_decay): + """Extension to perform adam lr decay. + + Args: + eps_decay (float): Decay rate of lr. + + Returns: + An extension function. + + """ + from chainer import training + + @training.make_extension(trigger=(1, "epoch")) + def adam_lr_decay(trainer): + _adam_lr_decay(trainer, eps_decay) + + return adam_lr_decay + + +def _adam_lr_decay(trainer, eps_decay): + optimizer = trainer.updater.get_optimizer("main") + # for chainer + if hasattr(optimizer, "lr"): + current_lr = optimizer.lr + setattr(optimizer, "lr", current_lr * eps_decay) + logging.info("adam lr decayed to " + str(optimizer.lr)) + # pytorch + else: + for p in optimizer.param_groups: + p["lr"] *= eps_decay + logging.info("adam lr decayed to " + str(p["lr"])) + + +def torch_snapshot(savefun=torch.save, filename="snapshot.ep.{.updater.epoch}"): + """Extension to take snapshot of the trainer for pytorch. + + Returns: + An extension function. + + """ + from chainer.training import extension + + @extension.make_extension(trigger=(1, "epoch"), priority=-100) + def torch_snapshot(trainer): + _torch_snapshot_object(trainer, trainer, filename.format(trainer), savefun) + + return torch_snapshot + + +def _torch_snapshot_object(trainer, target, filename, savefun): + from chainer.serializers import DictionarySerializer + + # make snapshot_dict dictionary + s = DictionarySerializer() + s.save(trainer) + if hasattr(trainer.updater.model, "model"): + # (for TTS) + if hasattr(trainer.updater.model.model, "module"): + model_state_dict = trainer.updater.model.model.module.state_dict() + else: + model_state_dict = trainer.updater.model.model.state_dict() + else: + # (for ASR) + if hasattr(trainer.updater.model, "module"): + model_state_dict = trainer.updater.model.module.state_dict() + else: + model_state_dict = trainer.updater.model.state_dict() + snapshot_dict = { + "trainer": s.target, + "model": model_state_dict, + "optimizer": trainer.updater.get_optimizer("main").state_dict(), + } + + # save snapshot dictionary + fn = filename.format(trainer) + prefix = "tmp" + fn + tmpdir = tempfile.mkdtemp(prefix=prefix, dir=trainer.out) + tmppath = os.path.join(tmpdir, fn) + try: + savefun(snapshot_dict, tmppath) + shutil.move(tmppath, os.path.join(trainer.out, fn)) + finally: + shutil.rmtree(tmpdir) + + +def add_gradient_noise(model, iteration, duration=100, eta=1.0, scale_factor=0.55): + """Adds noise from a standard normal distribution to the gradients. + + The standard deviation (`sigma`) is controlled by the three hyper-parameters below. + `sigma` goes to zero (no noise) with more iterations. + + Args: + model (torch.nn.model): Model. + iteration (int): Number of iterations. + duration (int) {100, 1000}: + Number of durations to control the interval of the `sigma` change. + eta (float) {0.01, 0.3, 1.0}: The magnitude of `sigma`. + scale_factor (float) {0.55}: The scale of `sigma`. + """ + interval = (iteration // duration) + 1 + sigma = eta / interval**scale_factor + for param in model.parameters(): + if param.grad is not None: + _shape = param.grad.size() + noise = sigma * torch.randn(_shape).to(param.device) + param.grad += noise + + +# * -------------------- general -------------------- * +def get_model_conf(model_path, conf_path=None): + """Get model config information by reading a model config file (model.json). + + Args: + model_path (str): Model path. + conf_path (str): Optional model config path. + + Returns: + list[int, int, dict[str, Any]]: Config information loaded from json file. + + """ + if conf_path is None: + model_conf = os.path.dirname(model_path) + "/model.json" + else: + model_conf = conf_path + with open(model_conf, "rb") as f: + logging.info("reading a config file from " + model_conf) + confs = json.load(f) + if isinstance(confs, dict): + # for lm + args = confs + return argparse.Namespace(**args) + else: + # for asr, tts, mt + idim, odim, args = confs + return idim, odim, argparse.Namespace(**args) + + +def chainer_load(path, model): + """Load chainer model parameters. + + Args: + path (str): Model path or snapshot file path to be loaded. + model (chainer.Chain): Chainer model. + + """ + import chainer + + if "snapshot" in os.path.basename(path): + chainer.serializers.load_npz(path, model, path="updater/model:main/") + else: + chainer.serializers.load_npz(path, model) + + +def torch_save(path, model): + """Save torch model states. + + Args: + path (str): Model path to be saved. + model (torch.nn.Module): Torch model. + + """ + if hasattr(model, "module"): + torch.save(model.module.state_dict(), path) + else: + torch.save(model.state_dict(), path) + + +def snapshot_object(target, filename): + """Returns a trainer extension to take snapshots of a given object. + + Args: + target (model): Object to serialize. + filename (str): Name of the file into which the object is serialized.It can + be a format string, where the trainer object is passed to + the :meth: `str.format` method. For example, + ``'snapshot_{.updater.iteration}'`` is converted to + ``'snapshot_10000'`` at the 10,000th iteration. + + Returns: + An extension function. + + """ + from chainer.training import extension + + @extension.make_extension(trigger=(1, "epoch"), priority=-100) + def snapshot_object(trainer): + torch_save(os.path.join(trainer.out, filename.format(trainer)), target) + + return snapshot_object + + +def torch_load(path, model): + """Load torch model states. + + Args: + path (str): Model path or snapshot file path to be loaded. + model (torch.nn.Module): Torch model. + + """ + if "snapshot" in os.path.basename(path): + model_state_dict = torch.load(path, map_location=lambda storage, loc: storage)[ + "model" + ] + else: + model_state_dict = torch.load(path, map_location=lambda storage, loc: storage) + + if hasattr(model, "module"): + model.module.load_state_dict(model_state_dict) + else: + model.load_state_dict(model_state_dict) + + del model_state_dict + + +def torch_resume(snapshot_path, trainer): + """Resume from snapshot for pytorch. + + Args: + snapshot_path (str): Snapshot file path. + trainer (chainer.training.Trainer): Chainer's trainer instance. + + """ + from chainer.serializers import NpzDeserializer + + # load snapshot + snapshot_dict = torch.load(snapshot_path, map_location=lambda storage, loc: storage) + + # restore trainer states + d = NpzDeserializer(snapshot_dict["trainer"]) + d.load(trainer) + + # restore model states + if hasattr(trainer.updater.model, "model"): + # (for TTS model) + if hasattr(trainer.updater.model.model, "module"): + trainer.updater.model.model.module.load_state_dict(snapshot_dict["model"]) + else: + trainer.updater.model.model.load_state_dict(snapshot_dict["model"]) + else: + # (for ASR model) + if hasattr(trainer.updater.model, "module"): + trainer.updater.model.module.load_state_dict(snapshot_dict["model"]) + else: + trainer.updater.model.load_state_dict(snapshot_dict["model"]) + + # retore optimizer states + trainer.updater.get_optimizer("main").load_state_dict(snapshot_dict["optimizer"]) + + # delete opened snapshot + del snapshot_dict + + +# * ------------------ recognition related ------------------ * +def parse_hypothesis(hyp, char_list): + """Parse hypothesis. + + Args: + hyp (list[dict[str, Any]]): Recognition hypothesis. + char_list (list[str]): List of characters. + + Returns: + tuple(str, str, str, float) + + """ + # remove sos and get results + tokenid_as_list = list(map(int, hyp["yseq"][1:])) + token_as_list = [char_list[idx] for idx in tokenid_as_list] + score = float(hyp["score"]) + + # convert to string + tokenid = " ".join([str(idx) for idx in tokenid_as_list]) + token = " ".join(token_as_list) + text = "".join(token_as_list).replace("", " ") + + return text, token, tokenid, score + + +def add_results_to_json(nbest_hyps, char_list): + """Add N-best results to json. + Args: + js (dict[str, Any]): Groundtruth utterance dict. + nbest_hyps_sd (list[dict[str, Any]]): + List of hypothesis for multi_speakers: nutts x nspkrs. + char_list (list[str]): List of characters. + Returns: + str: 1-best result + """ + assert len(nbest_hyps) == 1, "only 1-best result is supported." + # parse hypothesis + rec_text, rec_token, rec_tokenid, score = parse_hypothesis(nbest_hyps[0], char_list) + return rec_text + + +def plot_spectrogram( + plt, + spec, + mode="db", + fs=None, + frame_shift=None, + bottom=True, + left=True, + right=True, + top=False, + labelbottom=True, + labelleft=True, + labelright=True, + labeltop=False, + cmap="inferno", +): + """Plot spectrogram using matplotlib. + + Args: + plt (matplotlib.pyplot): pyplot object. + spec (numpy.ndarray): Input stft (Freq, Time) + mode (str): db or linear. + fs (int): Sample frequency. To convert y-axis to kHz unit. + frame_shift (int): The frame shift of stft. To convert x-axis to second unit. + bottom (bool):Whether to draw the respective ticks. + left (bool): + right (bool): + top (bool): + labelbottom (bool):Whether to draw the respective tick labels. + labelleft (bool): + labelright (bool): + labeltop (bool): + cmap (str): Colormap defined in matplotlib. + + """ + spec = np.abs(spec) + if mode == "db": + x = 20 * np.log10(spec + np.finfo(spec.dtype).eps) + elif mode == "linear": + x = spec + else: + raise ValueError(mode) + + if fs is not None: + ytop = fs / 2000 + ylabel = "kHz" + else: + ytop = x.shape[0] + ylabel = "bin" + + if frame_shift is not None and fs is not None: + xtop = x.shape[1] * frame_shift / fs + xlabel = "s" + else: + xtop = x.shape[1] + xlabel = "frame" + + extent = (0, xtop, 0, ytop) + plt.imshow(x[::-1], cmap=cmap, extent=extent) + + if labelbottom: + plt.xlabel("time [{}]".format(xlabel)) + if labelleft: + plt.ylabel("freq [{}]".format(ylabel)) + plt.colorbar().set_label("{}".format(mode)) + + plt.tick_params( + bottom=bottom, + left=left, + right=right, + top=top, + labelbottom=labelbottom, + labelleft=labelleft, + labelright=labelright, + labeltop=labeltop, + ) + plt.axis("auto") + + +# * ------------------ recognition related ------------------ * +def format_mulenc_args(args): + """Format args for multi-encoder setup. + + It deals with following situations: (when args.num_encs=2): + 1. args.elayers = None -> args.elayers = [4, 4]; + 2. args.elayers = 4 -> args.elayers = [4, 4]; + 3. args.elayers = [4, 4, 4] -> args.elayers = [4, 4]. + + """ + # default values when None is assigned. + default_dict = { + "etype": "blstmp", + "elayers": 4, + "eunits": 300, + "subsample": "1", + "dropout_rate": 0.0, + "atype": "dot", + "adim": 320, + "awin": 5, + "aheads": 4, + "aconv_chans": -1, + "aconv_filts": 100, + } + for k in default_dict.keys(): + if isinstance(vars(args)[k], list): + if len(vars(args)[k]) != args.num_encs: + logging.warning( + "Length mismatch {}: Convert {} to {}.".format( + k, vars(args)[k], vars(args)[k][: args.num_encs] + ) + ) + vars(args)[k] = vars(args)[k][: args.num_encs] + else: + if not vars(args)[k]: + # assign default value if it is None + vars(args)[k] = default_dict[k] + logging.warning( + "{} is not specified, use default value {}.".format( + k, default_dict[k] + ) + ) + # duplicate + logging.warning( + "Type mismatch {}: Convert {} to {}.".format( + k, vars(args)[k], [vars(args)[k] for _ in range(args.num_encs)] + ) + ) + vars(args)[k] = [vars(args)[k] for _ in range(args.num_encs)] + return args diff --git a/espnet/nets/.DS_Store b/espnet/nets/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..18468621f0c4966193dd622b3b05091ca6164b8c Binary files /dev/null and b/espnet/nets/.DS_Store differ diff --git a/espnet/nets/batch_beam_search.py b/espnet/nets/batch_beam_search.py new file mode 100644 index 0000000000000000000000000000000000000000..7ce5a10ca061d38f1904c5ec5f49fbbf32acd554 --- /dev/null +++ b/espnet/nets/batch_beam_search.py @@ -0,0 +1,349 @@ +"""Parallel beam search module.""" + +import logging +from typing import Any +from typing import Dict +from typing import List +from typing import NamedTuple +from typing import Tuple + +import torch +from torch.nn.utils.rnn import pad_sequence + +from espnet.nets.beam_search import BeamSearch +from espnet.nets.beam_search import Hypothesis + + +class BatchHypothesis(NamedTuple): + """Batchfied/Vectorized hypothesis data type.""" + + yseq: torch.Tensor = torch.tensor([]) # (batch, maxlen) + score: torch.Tensor = torch.tensor([]) # (batch,) + length: torch.Tensor = torch.tensor([]) # (batch,) + scores: Dict[str, torch.Tensor] = dict() # values: (batch,) + states: Dict[str, Dict] = dict() + + def __len__(self) -> int: + """Return a batch size.""" + return len(self.length) + + +class BatchBeamSearch(BeamSearch): + """Batch beam search implementation.""" + + def batchfy(self, hyps: List[Hypothesis]) -> BatchHypothesis: + """Convert list to batch.""" + if len(hyps) == 0: + return BatchHypothesis() + yseq=pad_sequence( + [h.yseq for h in hyps], batch_first=True, padding_value=self.eos + ) + return BatchHypothesis( + yseq=yseq, + length=torch.tensor([len(h.yseq) for h in hyps], dtype=torch.int64, device=yseq.device), + score=torch.tensor([h.score for h in hyps]).to(yseq.device), + scores={k: torch.tensor([h.scores[k] for h in hyps], device=yseq.device) for k in self.scorers}, + states={k: [h.states[k] for h in hyps] for k in self.scorers}, + ) + + def _batch_select(self, hyps: BatchHypothesis, ids: List[int]) -> BatchHypothesis: + return BatchHypothesis( + yseq=hyps.yseq[ids], + score=hyps.score[ids], + length=hyps.length[ids], + scores={k: v[ids] for k, v in hyps.scores.items()}, + states={ + k: [self.scorers[k].select_state(v, i) for i in ids] + for k, v in hyps.states.items() + }, + ) + + def _select(self, hyps: BatchHypothesis, i: int) -> Hypothesis: + return Hypothesis( + yseq=hyps.yseq[i, : hyps.length[i]], + score=hyps.score[i], + scores={k: v[i] for k, v in hyps.scores.items()}, + states={ + k: self.scorers[k].select_state(v, i) for k, v in hyps.states.items() + }, + ) + + def unbatchfy(self, batch_hyps: BatchHypothesis) -> List[Hypothesis]: + """Revert batch to list.""" + return [ + Hypothesis( + yseq=batch_hyps.yseq[i][: batch_hyps.length[i]], + score=batch_hyps.score[i], + scores={k: batch_hyps.scores[k][i] for k in self.scorers}, + states={ + k: v.select_state(batch_hyps.states[k], i) + for k, v in self.scorers.items() + }, + ) + for i in range(len(batch_hyps.length)) + ] + + def batch_beam( + self, weighted_scores: torch.Tensor, ids: torch.Tensor + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + """Batch-compute topk full token ids and partial token ids. + + Args: + weighted_scores (torch.Tensor): The weighted sum scores for each tokens. + Its shape is `(n_beam, self.vocab_size)`. + ids (torch.Tensor): The partial token ids to compute topk. + Its shape is `(n_beam, self.pre_beam_size)`. + + Returns: + Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + The topk full (prev_hyp, new_token) ids + and partial (prev_hyp, new_token) ids. + Their shapes are all `(self.beam_size,)` + + """ + top_ids = weighted_scores.view(-1).topk(self.beam_size)[1] + # Because of the flatten above, `top_ids` is organized as: + # [hyp1 * V + token1, hyp2 * V + token2, ..., hypK * V + tokenK], + # where V is `self.n_vocab` and K is `self.beam_size` + prev_hyp_ids = torch.div(top_ids, self.n_vocab, rounding_mode='trunc') + new_token_ids = top_ids % self.n_vocab + return prev_hyp_ids, new_token_ids, prev_hyp_ids, new_token_ids + + def init_hyp(self, x: torch.Tensor) -> BatchHypothesis: + """Get an initial hypothesis data. + + Args: + x (torch.Tensor): The encoder output feature + + Returns: + Hypothesis: The initial hypothesis. + + """ + init_states = dict() + init_scores = dict() + for k, d in self.scorers.items(): + init_states[k] = d.batch_init_state(x) + init_scores[k] = 0.0 + return self.batchfy( + [ + Hypothesis( + score=0.0, + scores=init_scores, + states=init_states, + yseq=torch.tensor([self.sos], device=x.device), + ) + ] + ) + + def score_full( + self, hyp: BatchHypothesis, x: torch.Tensor + ) -> Tuple[Dict[str, torch.Tensor], Dict[str, Any]]: + """Score new hypothesis by `self.full_scorers`. + + Args: + hyp (Hypothesis): Hypothesis with prefix tokens to score + x (torch.Tensor): Corresponding input feature + + Returns: + Tuple[Dict[str, torch.Tensor], Dict[str, Any]]: Tuple of + score dict of `hyp` that has string keys of `self.full_scorers` + and tensor score values of shape: `(self.n_vocab,)`, + and state dict that has string keys + and state values of `self.full_scorers` + + """ + scores = dict() + states = dict() + for k, d in self.full_scorers.items(): + scores[k], states[k] = d.batch_score(hyp.yseq, hyp.states[k], x) + return scores, states + + def score_partial( + self, hyp: BatchHypothesis, ids: torch.Tensor, x: torch.Tensor + ) -> Tuple[Dict[str, torch.Tensor], Dict[str, Any]]: + """Score new hypothesis by `self.full_scorers`. + + Args: + hyp (Hypothesis): Hypothesis with prefix tokens to score + ids (torch.Tensor): 2D tensor of new partial tokens to score + x (torch.Tensor): Corresponding input feature + + Returns: + Tuple[Dict[str, torch.Tensor], Dict[str, Any]]: Tuple of + score dict of `hyp` that has string keys of `self.full_scorers` + and tensor score values of shape: `(self.n_vocab,)`, + and state dict that has string keys + and state values of `self.full_scorers` + + """ + scores = dict() + states = dict() + for k, d in self.part_scorers.items(): + scores[k], states[k] = d.batch_score_partial( + hyp.yseq, ids, hyp.states[k], x + ) + return scores, states + + def merge_states(self, states: Any, part_states: Any, part_idx: int) -> Any: + """Merge states for new hypothesis. + + Args: + states: states of `self.full_scorers` + part_states: states of `self.part_scorers` + part_idx (int): The new token id for `part_scores` + + Returns: + Dict[str, torch.Tensor]: The new score dict. + Its keys are names of `self.full_scorers` and `self.part_scorers`. + Its values are states of the scorers. + + """ + new_states = dict() + for k, v in states.items(): + new_states[k] = v + for k, v in part_states.items(): + new_states[k] = v + return new_states + + def search(self, running_hyps: BatchHypothesis, x: torch.Tensor) -> BatchHypothesis: + """Search new tokens for running hypotheses and encoded speech x. + + Args: + running_hyps (BatchHypothesis): Running hypotheses on beam + x (torch.Tensor): Encoded speech feature (T, D) + + Returns: + BatchHypothesis: Best sorted hypotheses + + """ + n_batch = len(running_hyps) + part_ids = None # no pre-beam + # batch scoring + weighted_scores = torch.zeros( + n_batch, self.n_vocab, dtype=x.dtype, device=x.device + ) + scores, states = self.score_full(running_hyps, x.expand(n_batch, *x.shape)) + for k in self.full_scorers: + weighted_scores += self.weights[k] * scores[k] + # partial scoring + if self.do_pre_beam: + pre_beam_scores = ( + weighted_scores + if self.pre_beam_score_key == "full" + else scores[self.pre_beam_score_key] + ) + part_ids = torch.topk(pre_beam_scores, self.pre_beam_size, dim=-1)[1] + # NOTE(takaaki-hori): Unlike BeamSearch, we assume that score_partial returns + # full-size score matrices, which has non-zero scores for part_ids and zeros + # for others. + part_scores, part_states = self.score_partial(running_hyps, part_ids, x) + for k in self.part_scorers: + weighted_scores += self.weights[k] * part_scores[k] + # add previous hyp scores + weighted_scores += running_hyps.score.to( + dtype=x.dtype, device=x.device + ).unsqueeze(1) + + # TODO(karita): do not use list. use batch instead + # see also https://github.com/espnet/espnet/pull/1402#discussion_r354561029 + # update hyps + best_hyps = [] + prev_hyps = self.unbatchfy(running_hyps) + for ( + full_prev_hyp_id, + full_new_token_id, + part_prev_hyp_id, + part_new_token_id, + ) in zip(*self.batch_beam(weighted_scores, part_ids)): + prev_hyp = prev_hyps[full_prev_hyp_id] + best_hyps.append( + Hypothesis( + score=weighted_scores[full_prev_hyp_id, full_new_token_id], + yseq=self.append_token(prev_hyp.yseq, full_new_token_id), + scores=self.merge_scores( + prev_hyp.scores, + {k: v[full_prev_hyp_id] for k, v in scores.items()}, + full_new_token_id, + {k: v[part_prev_hyp_id] for k, v in part_scores.items()}, + part_new_token_id, + ), + states=self.merge_states( + { + k: self.full_scorers[k].select_state(v, full_prev_hyp_id) + for k, v in states.items() + }, + { + k: self.part_scorers[k].select_state( + v, part_prev_hyp_id, part_new_token_id + ) + for k, v in part_states.items() + }, + part_new_token_id, + ), + ) + ) + return self.batchfy(best_hyps) + + def post_process( + self, + i: int, + maxlen: int, + maxlenratio: float, + running_hyps: BatchHypothesis, + ended_hyps: List[Hypothesis], + ) -> BatchHypothesis: + """Perform post-processing of beam search iterations. + + Args: + i (int): The length of hypothesis tokens. + maxlen (int): The maximum length of tokens in beam search. + maxlenratio (int): The maximum length ratio in beam search. + running_hyps (BatchHypothesis): The running hypotheses in beam search. + ended_hyps (List[Hypothesis]): The ended hypotheses in beam search. + + Returns: + BatchHypothesis: The new running hypotheses. + + """ + n_batch = running_hyps.yseq.shape[0] + logging.debug(f"the number of running hypothes: {n_batch}") + if self.token_list is not None: + logging.debug( + "best hypo: " + + "".join( + [ + self.token_list[x] + for x in running_hyps.yseq[0, 1 : running_hyps.length[0]] + ] + ) + ) + # add eos in the final loop to avoid that there are no ended hyps + if i == maxlen - 1: + logging.info("adding in the last position in the loop") + yseq_eos = torch.cat( + ( + running_hyps.yseq, + torch.full( + (n_batch, 1), + self.eos, + device=running_hyps.yseq.device, + dtype=torch.int64, + ), + ), + 1, + ) + running_hyps.yseq.resize_as_(yseq_eos) + running_hyps.yseq[:] = yseq_eos + running_hyps.length[:] = yseq_eos.shape[1] + + # add ended hypotheses to a final list, and removed them from current hypotheses + # (this will be a probmlem, number of hyps < beam) + is_eos = ( + running_hyps.yseq[torch.arange(n_batch), running_hyps.length - 1] + == self.eos + ) + for b in torch.nonzero(is_eos, as_tuple=False).view(-1): + hyp = self._select(running_hyps, b) + ended_hyps.append(hyp) + remained_ids = torch.nonzero(is_eos == 0, as_tuple=False).view(-1) + return self._batch_select(running_hyps, remained_ids) diff --git a/espnet/nets/beam_search.py b/espnet/nets/beam_search.py new file mode 100644 index 0000000000000000000000000000000000000000..0f33d8c63bf667c7eed598a10ce9e5cb53be121c --- /dev/null +++ b/espnet/nets/beam_search.py @@ -0,0 +1,516 @@ +"""Beam search module.""" + +from itertools import chain +import logging +from typing import Any +from typing import Dict +from typing import List +from typing import NamedTuple +from typing import Tuple +from typing import Union + +import torch + +from espnet.nets.e2e_asr_common import end_detect +from espnet.nets.scorer_interface import PartialScorerInterface +from espnet.nets.scorer_interface import ScorerInterface + + +class Hypothesis(NamedTuple): + """Hypothesis data type.""" + + yseq: torch.Tensor + score: Union[float, torch.Tensor] = 0 + scores: Dict[str, Union[float, torch.Tensor]] = dict() + states: Dict[str, Any] = dict() + + def asdict(self) -> dict: + """Convert data to JSON-friendly dict.""" + return self._replace( + yseq=self.yseq.tolist(), + score=float(self.score), + scores={k: float(v) for k, v in self.scores.items()}, + )._asdict() + + +class BeamSearch(torch.nn.Module): + """Beam search implementation.""" + + def __init__( + self, + scorers: Dict[str, ScorerInterface], + weights: Dict[str, float], + beam_size: int, + vocab_size: int, + sos: int, + eos: int, + token_list: List[str] = None, + pre_beam_ratio: float = 1.5, + pre_beam_score_key: str = None, + ): + """Initialize beam search. + + Args: + scorers (dict[str, ScorerInterface]): Dict of decoder modules + e.g., Decoder, CTCPrefixScorer, LM + The scorer will be ignored if it is `None` + weights (dict[str, float]): Dict of weights for each scorers + The scorer will be ignored if its weight is 0 + beam_size (int): The number of hypotheses kept during search + vocab_size (int): The number of vocabulary + sos (int): Start of sequence id + eos (int): End of sequence id + token_list (list[str]): List of tokens for debug log + pre_beam_score_key (str): key of scores to perform pre-beam search + pre_beam_ratio (float): beam size in the pre-beam search + will be `int(pre_beam_ratio * beam_size)` + + """ + super().__init__() + # set scorers + self.weights = weights + self.scorers = dict() + self.full_scorers = dict() + self.part_scorers = dict() + # this module dict is required for recursive cast + # `self.to(device, dtype)` in `recog.py` + self.nn_dict = torch.nn.ModuleDict() + for k, v in scorers.items(): + w = weights.get(k, 0) + if w == 0 or v is None: + continue + assert isinstance( + v, ScorerInterface + ), f"{k} ({type(v)}) does not implement ScorerInterface" + self.scorers[k] = v + if isinstance(v, PartialScorerInterface): + self.part_scorers[k] = v + else: + self.full_scorers[k] = v + if isinstance(v, torch.nn.Module): + self.nn_dict[k] = v + + # set configurations + self.sos = sos + self.eos = eos + self.token_list = token_list + self.pre_beam_size = int(pre_beam_ratio * beam_size) + self.beam_size = beam_size + self.n_vocab = vocab_size + if ( + pre_beam_score_key is not None + and pre_beam_score_key != "full" + and pre_beam_score_key not in self.full_scorers + ): + raise KeyError(f"{pre_beam_score_key} is not found in {self.full_scorers}") + self.pre_beam_score_key = pre_beam_score_key + self.do_pre_beam = ( + self.pre_beam_score_key is not None + and self.pre_beam_size < self.n_vocab + and len(self.part_scorers) > 0 + ) + + def init_hyp(self, x: torch.Tensor) -> List[Hypothesis]: + """Get an initial hypothesis data. + + Args: + x (torch.Tensor): The encoder output feature + + Returns: + Hypothesis: The initial hypothesis. + + """ + init_states = dict() + init_scores = dict() + for k, d in self.scorers.items(): + init_states[k] = d.init_state(x) + init_scores[k] = 0.0 + return [ + Hypothesis( + score=0.0, + scores=init_scores, + states=init_states, + yseq=torch.tensor([self.sos], device=x.device), + ) + ] + + @staticmethod + def append_token(xs: torch.Tensor, x: int) -> torch.Tensor: + """Append new token to prefix tokens. + + Args: + xs (torch.Tensor): The prefix token + x (int): The new token to append + + Returns: + torch.Tensor: New tensor contains: xs + [x] with xs.dtype and xs.device + + """ + x = torch.tensor([x], dtype=xs.dtype, device=xs.device) + return torch.cat((xs, x)) + + def score_full( + self, hyp: Hypothesis, x: torch.Tensor + ) -> Tuple[Dict[str, torch.Tensor], Dict[str, Any]]: + """Score new hypothesis by `self.full_scorers`. + + Args: + hyp (Hypothesis): Hypothesis with prefix tokens to score + x (torch.Tensor): Corresponding input feature + + Returns: + Tuple[Dict[str, torch.Tensor], Dict[str, Any]]: Tuple of + score dict of `hyp` that has string keys of `self.full_scorers` + and tensor score values of shape: `(self.n_vocab,)`, + and state dict that has string keys + and state values of `self.full_scorers` + + """ + scores = dict() + states = dict() + for k, d in self.full_scorers.items(): + scores[k], states[k] = d.score(hyp.yseq, hyp.states[k], x) + return scores, states + + def score_partial( + self, hyp: Hypothesis, ids: torch.Tensor, x: torch.Tensor + ) -> Tuple[Dict[str, torch.Tensor], Dict[str, Any]]: + """Score new hypothesis by `self.part_scorers`. + + Args: + hyp (Hypothesis): Hypothesis with prefix tokens to score + ids (torch.Tensor): 1D tensor of new partial tokens to score + x (torch.Tensor): Corresponding input feature + + Returns: + Tuple[Dict[str, torch.Tensor], Dict[str, Any]]: Tuple of + score dict of `hyp` that has string keys of `self.part_scorers` + and tensor score values of shape: `(len(ids),)`, + and state dict that has string keys + and state values of `self.part_scorers` + + """ + scores = dict() + states = dict() + for k, d in self.part_scorers.items(): + scores[k], states[k] = d.score_partial(hyp.yseq, ids, hyp.states[k], x) + return scores, states + + def beam( + self, weighted_scores: torch.Tensor, ids: torch.Tensor + ) -> Tuple[torch.Tensor, torch.Tensor]: + """Compute topk full token ids and partial token ids. + + Args: + weighted_scores (torch.Tensor): The weighted sum scores for each tokens. + Its shape is `(self.n_vocab,)`. + ids (torch.Tensor): The partial token ids to compute topk + + Returns: + Tuple[torch.Tensor, torch.Tensor]: + The topk full token ids and partial token ids. + Their shapes are `(self.beam_size,)` + + """ + # no pre beam performed + if weighted_scores.size(0) == ids.size(0): + top_ids = weighted_scores.topk(self.beam_size)[1] + return top_ids, top_ids + + # mask pruned in pre-beam not to select in topk + tmp = weighted_scores[ids] + weighted_scores[:] = -float("inf") + weighted_scores[ids] = tmp + top_ids = weighted_scores.topk(self.beam_size)[1] + local_ids = weighted_scores[ids].topk(self.beam_size)[1] + return top_ids, local_ids + + @staticmethod + def merge_scores( + prev_scores: Dict[str, float], + next_full_scores: Dict[str, torch.Tensor], + full_idx: int, + next_part_scores: Dict[str, torch.Tensor], + part_idx: int, + ) -> Dict[str, torch.Tensor]: + """Merge scores for new hypothesis. + + Args: + prev_scores (Dict[str, float]): + The previous hypothesis scores by `self.scorers` + next_full_scores (Dict[str, torch.Tensor]): scores by `self.full_scorers` + full_idx (int): The next token id for `next_full_scores` + next_part_scores (Dict[str, torch.Tensor]): + scores of partial tokens by `self.part_scorers` + part_idx (int): The new token id for `next_part_scores` + + Returns: + Dict[str, torch.Tensor]: The new score dict. + Its keys are names of `self.full_scorers` and `self.part_scorers`. + Its values are scalar tensors by the scorers. + + """ + new_scores = dict() + for k, v in next_full_scores.items(): + new_scores[k] = prev_scores[k] + v[full_idx] + for k, v in next_part_scores.items(): + new_scores[k] = prev_scores[k] + v[part_idx] + return new_scores + + def merge_states(self, states: Any, part_states: Any, part_idx: int) -> Any: + """Merge states for new hypothesis. + + Args: + states: states of `self.full_scorers` + part_states: states of `self.part_scorers` + part_idx (int): The new token id for `part_scores` + + Returns: + Dict[str, torch.Tensor]: The new score dict. + Its keys are names of `self.full_scorers` and `self.part_scorers`. + Its values are states of the scorers. + + """ + new_states = dict() + for k, v in states.items(): + new_states[k] = v + for k, d in self.part_scorers.items(): + new_states[k] = d.select_state(part_states[k], part_idx) + return new_states + + def search( + self, running_hyps: List[Hypothesis], x: torch.Tensor + ) -> List[Hypothesis]: + """Search new tokens for running hypotheses and encoded speech x. + + Args: + running_hyps (List[Hypothesis]): Running hypotheses on beam + x (torch.Tensor): Encoded speech feature (T, D) + + Returns: + List[Hypotheses]: Best sorted hypotheses + + """ + best_hyps = [] + part_ids = torch.arange(self.n_vocab, device=x.device) # no pre-beam + for hyp in running_hyps: + # scoring + weighted_scores = torch.zeros(self.n_vocab, dtype=x.dtype, device=x.device) + scores, states = self.score_full(hyp, x) + for k in self.full_scorers: + weighted_scores += self.weights[k] * scores[k] + # partial scoring + if self.do_pre_beam: + pre_beam_scores = ( + weighted_scores + if self.pre_beam_score_key == "full" + else scores[self.pre_beam_score_key] + ) + part_ids = torch.topk(pre_beam_scores, self.pre_beam_size)[1] + part_scores, part_states = self.score_partial(hyp, part_ids, x) + for k in self.part_scorers: + weighted_scores[part_ids] += self.weights[k] * part_scores[k] + # add previous hyp score + weighted_scores += hyp.score + + # update hyps + for j, part_j in zip(*self.beam(weighted_scores, part_ids)): + # will be (2 x beam at most) + best_hyps.append( + Hypothesis( + score=weighted_scores[j], + yseq=self.append_token(hyp.yseq, j), + scores=self.merge_scores( + hyp.scores, scores, j, part_scores, part_j + ), + states=self.merge_states(states, part_states, part_j), + ) + ) + + # sort and prune 2 x beam -> beam + best_hyps = sorted(best_hyps, key=lambda x: x.score, reverse=True)[ + : min(len(best_hyps), self.beam_size) + ] + return best_hyps + + def forward( + self, x: torch.Tensor, maxlenratio: float = 0.0, minlenratio: float = 0.0 + ) -> List[Hypothesis]: + """Perform beam search. + + Args: + x (torch.Tensor): Encoded speech feature (T, D) + maxlenratio (float): Input length ratio to obtain max output length. + If maxlenratio=0.0 (default), it uses a end-detect function + to automatically find maximum hypothesis lengths + If maxlenratio<0.0, its absolute value is interpreted + as a constant max output length. + minlenratio (float): Input length ratio to obtain min output length. + + Returns: + list[Hypothesis]: N-best decoding results + + """ + # set length bounds + if maxlenratio == 0: + maxlen = x.shape[0] + elif maxlenratio < 0: + maxlen = -1 * int(maxlenratio) + else: + maxlen = max(1, int(maxlenratio * x.size(0))) + minlen = int(minlenratio * x.size(0)) + logging.info("decoder input length: " + str(x.shape[0])) + logging.info("max output length: " + str(maxlen)) + logging.info("min output length: " + str(minlen)) + + # main loop of prefix search + running_hyps = self.init_hyp(x) + ended_hyps = [] + for i in range(maxlen): + logging.debug("position " + str(i)) + best = self.search(running_hyps, x) + # post process of one iteration + running_hyps = self.post_process(i, maxlen, maxlenratio, best, ended_hyps) + # end detection + if maxlenratio == 0.0 and end_detect([h.asdict() for h in ended_hyps], i): + logging.info(f"end detected at {i}") + break + if len(running_hyps) == 0: + logging.info("no hypothesis. Finish decoding.") + break + else: + logging.debug(f"remained hypotheses: {len(running_hyps)}") + + nbest_hyps = sorted(ended_hyps, key=lambda x: x.score, reverse=True) + # check the number of hypotheses reaching to eos + if len(nbest_hyps) == 0: + logging.warning( + "there is no N-best results, perform recognition " + "again with smaller minlenratio." + ) + return ( + [] + if minlenratio < 0.1 + else self.forward(x, maxlenratio, max(0.0, minlenratio - 0.1)) + ) + + # report the best result + best = nbest_hyps[0] + for k, v in best.scores.items(): + logging.info( + f"{v:6.2f} * {self.weights[k]:3} = {v * self.weights[k]:6.2f} for {k}" + ) + logging.info(f"total log probability: {best.score:.2f}") + logging.info(f"normalized log probability: {best.score / len(best.yseq):.2f}") + logging.info(f"total number of ended hypotheses: {len(nbest_hyps)}") + if self.token_list is not None: + logging.info( + "best hypo: " + + "".join([self.token_list[x] for x in best.yseq[1:-1]]) + + "\n" + ) + return nbest_hyps + + def post_process( + self, + i: int, + maxlen: int, + maxlenratio: float, + running_hyps: List[Hypothesis], + ended_hyps: List[Hypothesis], + ) -> List[Hypothesis]: + """Perform post-processing of beam search iterations. + + Args: + i (int): The length of hypothesis tokens. + maxlen (int): The maximum length of tokens in beam search. + maxlenratio (int): The maximum length ratio in beam search. + running_hyps (List[Hypothesis]): The running hypotheses in beam search. + ended_hyps (List[Hypothesis]): The ended hypotheses in beam search. + + Returns: + List[Hypothesis]: The new running hypotheses. + + """ + logging.debug(f"the number of running hypotheses: {len(running_hyps)}") + if self.token_list is not None: + logging.debug( + "best hypo: " + + "".join([self.token_list[x] for x in running_hyps[0].yseq[1:]]) + ) + # add eos in the final loop to avoid that there are no ended hyps + if i == maxlen - 1: + logging.info("adding in the last position in the loop") + running_hyps = [ + h._replace(yseq=self.append_token(h.yseq, self.eos)) + for h in running_hyps + ] + + # add ended hypotheses to a final list, and removed them from current hypotheses + # (this will be a problem, number of hyps < beam) + remained_hyps = [] + for hyp in running_hyps: + if hyp.yseq[-1] == self.eos: + # e.g., Word LM needs to add final score + for k, d in chain(self.full_scorers.items(), self.part_scorers.items()): + s = d.final_score(hyp.states[k]) + hyp.scores[k] += s + hyp = hyp._replace(score=hyp.score + self.weights[k] * s) + ended_hyps.append(hyp) + else: + remained_hyps.append(hyp) + return remained_hyps + + +def beam_search( + x: torch.Tensor, + sos: int, + eos: int, + beam_size: int, + vocab_size: int, + scorers: Dict[str, ScorerInterface], + weights: Dict[str, float], + token_list: List[str] = None, + maxlenratio: float = 0.0, + minlenratio: float = 0.0, + pre_beam_ratio: float = 1.5, + pre_beam_score_key: str = "full", +) -> list: + """Perform beam search with scorers. + + Args: + x (torch.Tensor): Encoded speech feature (T, D) + sos (int): Start of sequence id + eos (int): End of sequence id + beam_size (int): The number of hypotheses kept during search + vocab_size (int): The number of vocabulary + scorers (dict[str, ScorerInterface]): Dict of decoder modules + e.g., Decoder, CTCPrefixScorer, LM + The scorer will be ignored if it is `None` + weights (dict[str, float]): Dict of weights for each scorers + The scorer will be ignored if its weight is 0 + token_list (list[str]): List of tokens for debug log + maxlenratio (float): Input length ratio to obtain max output length. + If maxlenratio=0.0 (default), it uses a end-detect function + to automatically find maximum hypothesis lengths + minlenratio (float): Input length ratio to obtain min output length. + pre_beam_score_key (str): key of scores to perform pre-beam search + pre_beam_ratio (float): beam size in the pre-beam search + will be `int(pre_beam_ratio * beam_size)` + + Returns: + list: N-best decoding results + + """ + ret = BeamSearch( + scorers, + weights, + beam_size=beam_size, + vocab_size=vocab_size, + pre_beam_ratio=pre_beam_ratio, + pre_beam_score_key=pre_beam_score_key, + sos=sos, + eos=eos, + token_list=token_list, + ).forward(x=x, maxlenratio=maxlenratio, minlenratio=minlenratio) + return [h.asdict() for h in ret] diff --git a/espnet/nets/ctc_prefix_score.py b/espnet/nets/ctc_prefix_score.py new file mode 100644 index 0000000000000000000000000000000000000000..0c67ecd096de46ad00972cf3a8ba812852f38c97 --- /dev/null +++ b/espnet/nets/ctc_prefix_score.py @@ -0,0 +1,359 @@ +#!/usr/bin/env python3 + +# Copyright 2018 Mitsubishi Electric Research Labs (Takaaki Hori) +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +import torch + +import numpy as np +import six + + +class CTCPrefixScoreTH(object): + """Batch processing of CTCPrefixScore + + which is based on Algorithm 2 in WATANABE et al. + "HYBRID CTC/ATTENTION ARCHITECTURE FOR END-TO-END SPEECH RECOGNITION," + but extended to efficiently compute the label probablities for multiple + hypotheses simultaneously + See also Seki et al. "Vectorized Beam Search for CTC-Attention-Based + Speech Recognition," In INTERSPEECH (pp. 3825-3829), 2019. + """ + + def __init__(self, x, xlens, blank, eos, margin=0): + """Construct CTC prefix scorer + + :param torch.Tensor x: input label posterior sequences (B, T, O) + :param torch.Tensor xlens: input lengths (B,) + :param int blank: blank label id + :param int eos: end-of-sequence id + :param int margin: margin parameter for windowing (0 means no windowing) + """ + # In the comment lines, + # we assume T: input_length, B: batch size, W: beam width, O: output dim. + self.logzero = -10000000000.0 + self.blank = blank + self.eos = eos + self.batch = x.size(0) + self.input_length = x.size(1) + self.odim = x.size(2) + self.dtype = x.dtype + self.device = ( + torch.device("cuda:%d" % x.get_device()) + if x.is_cuda + else torch.device("cpu") + ) + # Pad the rest of posteriors in the batch + # TODO(takaaki-hori): need a better way without for-loops + for i, l in enumerate(xlens): + if l < self.input_length: + x[i, l:, :] = self.logzero + x[i, l:, blank] = 0 + # Reshape input x + xn = x.transpose(0, 1) # (B, T, O) -> (T, B, O) + xb = xn[:, :, self.blank].unsqueeze(2).expand(-1, -1, self.odim) + self.x = torch.stack([xn, xb]) # (2, T, B, O) + self.end_frames = torch.as_tensor(xlens) - 1 + + # Setup CTC windowing + self.margin = margin + if margin > 0: + self.frame_ids = torch.arange( + self.input_length, dtype=self.dtype, device=self.device + ) + # Base indices for index conversion + self.idx_bh = None + self.idx_b = torch.arange(self.batch, device=self.device) + self.idx_bo = (self.idx_b * self.odim).unsqueeze(1) + + def __call__(self, y, state, scoring_ids=None, att_w=None): + """Compute CTC prefix scores for next labels + + :param list y: prefix label sequences + :param tuple state: previous CTC state + :param torch.Tensor pre_scores: scores for pre-selection of hypotheses (BW, O) + :param torch.Tensor att_w: attention weights to decide CTC window + :return new_state, ctc_local_scores (BW, O) + """ + output_length = len(y[0]) - 1 # ignore sos + last_ids = [yi[-1] for yi in y] # last output label ids + n_bh = len(last_ids) # batch * hyps + n_hyps = n_bh // self.batch # assuming each utterance has the same # of hyps + self.scoring_num = scoring_ids.size(-1) if scoring_ids is not None else 0 + # prepare state info + if state is None: + r_prev = torch.full( + (self.input_length, 2, self.batch, n_hyps), + self.logzero, + dtype=self.dtype, + device=self.device, + ) + r_prev[:, 1] = torch.cumsum(self.x[0, :, :, self.blank], 0).unsqueeze(2) + r_prev = r_prev.view(-1, 2, n_bh) + s_prev = 0.0 + f_min_prev = 0 + f_max_prev = 1 + else: + r_prev, s_prev, f_min_prev, f_max_prev = state + + # select input dimensions for scoring + if self.scoring_num > 0: + scoring_idmap = torch.full( + (n_bh, self.odim), -1, dtype=torch.long, device=self.device + ) + snum = self.scoring_num + if self.idx_bh is None or n_bh > len(self.idx_bh): + self.idx_bh = torch.arange(n_bh, device=self.device).view(-1, 1) + scoring_idmap[self.idx_bh[:n_bh], scoring_ids] = torch.arange( + snum, device=self.device + ) + scoring_idx = ( + scoring_ids + self.idx_bo.repeat(1, n_hyps).view(-1, 1) + ).view(-1) + x_ = torch.index_select( + self.x.view(2, -1, self.batch * self.odim), 2, scoring_idx + ).view(2, -1, n_bh, snum) + else: + scoring_ids = None + scoring_idmap = None + snum = self.odim + x_ = self.x.unsqueeze(3).repeat(1, 1, 1, n_hyps, 1).view(2, -1, n_bh, snum) + + # new CTC forward probs are prepared as a (T x 2 x BW x S) tensor + # that corresponds to r_t^n(h) and r_t^b(h) in a batch. + r = torch.full( + (self.input_length, 2, n_bh, snum), + self.logzero, + dtype=self.dtype, + device=self.device, + ) + if output_length == 0: + r[0, 0] = x_[0, 0] + + r_sum = torch.logsumexp(r_prev, 1) + log_phi = r_sum.unsqueeze(2).repeat(1, 1, snum) + if scoring_ids is not None: + for idx in range(n_bh): + pos = scoring_idmap[idx, last_ids[idx]] + if pos >= 0: + log_phi[:, idx, pos] = r_prev[:, 1, idx] + else: + for idx in range(n_bh): + log_phi[:, idx, last_ids[idx]] = r_prev[:, 1, idx] + + # decide start and end frames based on attention weights + if att_w is not None and self.margin > 0: + f_arg = torch.matmul(att_w, self.frame_ids) + f_min = max(int(f_arg.min().cpu()), f_min_prev) + f_max = max(int(f_arg.max().cpu()), f_max_prev) + start = min(f_max_prev, max(f_min - self.margin, output_length, 1)) + end = min(f_max + self.margin, self.input_length) + else: + f_min = f_max = 0 + start = max(output_length, 1) + end = self.input_length + + # compute forward probabilities log(r_t^n(h)) and log(r_t^b(h)) + for t in range(start, end): + rp = r[t - 1] + rr = torch.stack([rp[0], log_phi[t - 1], rp[0], rp[1]]).view( + 2, 2, n_bh, snum + ) + r[t] = torch.logsumexp(rr, 1) + x_[:, t] + + # compute log prefix probabilities log(psi) + log_phi_x = torch.cat((log_phi[0].unsqueeze(0), log_phi[:-1]), dim=0) + x_[0] + if scoring_ids is not None: + log_psi = torch.full( + (n_bh, self.odim), self.logzero, dtype=self.dtype, device=self.device + ) + log_psi_ = torch.logsumexp( + torch.cat((log_phi_x[start:end], r[start - 1, 0].unsqueeze(0)), dim=0), + dim=0, + ) + for si in range(n_bh): + log_psi[si, scoring_ids[si]] = log_psi_[si] + else: + log_psi = torch.logsumexp( + torch.cat((log_phi_x[start:end], r[start - 1, 0].unsqueeze(0)), dim=0), + dim=0, + ) + + for si in range(n_bh): + log_psi[si, self.eos] = r_sum[self.end_frames[si // n_hyps], si] + + # exclude blank probs + log_psi[:, self.blank] = self.logzero + + return (log_psi - s_prev), (r, log_psi, f_min, f_max, scoring_idmap) + + def index_select_state(self, state, best_ids): + """Select CTC states according to best ids + + :param state : CTC state + :param best_ids : index numbers selected by beam pruning (B, W) + :return selected_state + """ + r, s, f_min, f_max, scoring_idmap = state + # convert ids to BHO space + n_bh = len(s) + n_hyps = n_bh // self.batch + vidx = (best_ids + (self.idx_b * (n_hyps * self.odim)).view(-1, 1)).view(-1) + # select hypothesis scores + s_new = torch.index_select(s.view(-1), 0, vidx) + s_new = s_new.view(-1, 1).repeat(1, self.odim).view(n_bh, self.odim) + # convert ids to BHS space (S: scoring_num) + if scoring_idmap is not None: + snum = self.scoring_num + hyp_idx = (best_ids // self.odim + (self.idx_b * n_hyps).view(-1, 1)).view( + -1 + ) + label_ids = torch.fmod(best_ids, self.odim).view(-1) + score_idx = scoring_idmap[hyp_idx, label_ids] + score_idx[score_idx == -1] = 0 + vidx = score_idx + hyp_idx * snum + else: + snum = self.odim + # select forward probabilities + r_new = torch.index_select(r.view(-1, 2, n_bh * snum), 2, vidx).view( + -1, 2, n_bh + ) + return r_new, s_new, f_min, f_max + + def extend_prob(self, x): + """Extend CTC prob. + + :param torch.Tensor x: input label posterior sequences (B, T, O) + """ + + if self.x.shape[1] < x.shape[1]: # self.x (2,T,B,O); x (B,T,O) + # Pad the rest of posteriors in the batch + # TODO(takaaki-hori): need a better way without for-loops + xlens = [x.size(1)] + for i, l in enumerate(xlens): + if l < self.input_length: + x[i, l:, :] = self.logzero + x[i, l:, self.blank] = 0 + tmp_x = self.x + xn = x.transpose(0, 1) # (B, T, O) -> (T, B, O) + xb = xn[:, :, self.blank].unsqueeze(2).expand(-1, -1, self.odim) + self.x = torch.stack([xn, xb]) # (2, T, B, O) + self.x[:, : tmp_x.shape[1], :, :] = tmp_x + self.input_length = x.size(1) + self.end_frames = torch.as_tensor(xlens) - 1 + + def extend_state(self, state): + """Compute CTC prefix state. + + + :param state : CTC state + :return ctc_state + """ + + if state is None: + # nothing to do + return state + else: + r_prev, s_prev, f_min_prev, f_max_prev = state + + r_prev_new = torch.full( + (self.input_length, 2), + self.logzero, + dtype=self.dtype, + device=self.device, + ) + start = max(r_prev.shape[0], 1) + r_prev_new[0:start] = r_prev + for t in six.moves.range(start, self.input_length): + r_prev_new[t, 1] = r_prev_new[t - 1, 1] + self.x[0, t, :, self.blank] + + return (r_prev_new, s_prev, f_min_prev, f_max_prev) + + +class CTCPrefixScore(object): + """Compute CTC label sequence scores + + which is based on Algorithm 2 in WATANABE et al. + "HYBRID CTC/ATTENTION ARCHITECTURE FOR END-TO-END SPEECH RECOGNITION," + but extended to efficiently compute the probablities of multiple labels + simultaneously + """ + + def __init__(self, x, blank, eos, xp): + self.xp = xp + self.logzero = -10000000000.0 + self.blank = blank + self.eos = eos + self.input_length = len(x) + self.x = x + + def initial_state(self): + """Obtain an initial CTC state + + :return: CTC state + """ + # initial CTC state is made of a frame x 2 tensor that corresponds to + # r_t^n() and r_t^b(), where 0 and 1 of axis=1 represent + # superscripts n and b (non-blank and blank), respectively. + r = self.xp.full((self.input_length, 2), self.logzero, dtype=np.float32) + r[0, 1] = self.x[0, self.blank] + for i in six.moves.range(1, self.input_length): + r[i, 1] = r[i - 1, 1] + self.x[i, self.blank] + return r + + def __call__(self, y, cs, r_prev): + """Compute CTC prefix scores for next labels + + :param y : prefix label sequence + :param cs : array of next labels + :param r_prev: previous CTC state + :return ctc_scores, ctc_states + """ + # initialize CTC states + output_length = len(y) - 1 # ignore sos + # new CTC states are prepared as a frame x (n or b) x n_labels tensor + # that corresponds to r_t^n(h) and r_t^b(h). + r = self.xp.ndarray((self.input_length, 2, len(cs)), dtype=np.float32) + xs = self.x[:, cs] + if output_length == 0: + r[0, 0] = xs[0] + r[0, 1] = self.logzero + else: + r[output_length - 1] = self.logzero + + # prepare forward probabilities for the last label + r_sum = self.xp.logaddexp( + r_prev[:, 0], r_prev[:, 1] + ) # log(r_t^n(g) + r_t^b(g)) + last = y[-1] + if output_length > 0 and last in cs: + log_phi = self.xp.ndarray((self.input_length, len(cs)), dtype=np.float32) + for i in six.moves.range(len(cs)): + log_phi[:, i] = r_sum if cs[i] != last else r_prev[:, 1] + else: + log_phi = r_sum + + # compute forward probabilities log(r_t^n(h)), log(r_t^b(h)), + # and log prefix probabilities log(psi) + start = max(output_length, 1) + log_psi = r[start - 1, 0] + for t in six.moves.range(start, self.input_length): + r[t, 0] = self.xp.logaddexp(r[t - 1, 0], log_phi[t - 1]) + xs[t] + r[t, 1] = ( + self.xp.logaddexp(r[t - 1, 0], r[t - 1, 1]) + self.x[t, self.blank] + ) + log_psi = self.xp.logaddexp(log_psi, log_phi[t - 1] + xs[t]) + + # get P(...eos|X) that ends with the prefix itself + eos_pos = self.xp.where(cs == self.eos)[0] + if len(eos_pos) > 0: + log_psi[eos_pos] = r_sum[-1] # log(r_T^n(g) + r_T^b(g)) + + # exclude blank probs + blank_pos = self.xp.where(cs == self.blank)[0] + if len(blank_pos) > 0: + log_psi[blank_pos] = self.logzero + + # return the log prefix probability and CTC states, where the label axis + # of the CTC states is moved to the first axis to slice it easily + return log_psi, self.xp.rollaxis(r, 2) diff --git a/espnet/nets/e2e_asr_common.py b/espnet/nets/e2e_asr_common.py new file mode 100644 index 0000000000000000000000000000000000000000..92f90796a3a230b3bfc47ebe8d9292fae37a1b9c --- /dev/null +++ b/espnet/nets/e2e_asr_common.py @@ -0,0 +1,249 @@ +#!/usr/bin/env python3 +# encoding: utf-8 + +# Copyright 2017 Johns Hopkins University (Shinji Watanabe) +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +"""Common functions for ASR.""" + +import json +import logging +import sys + +from itertools import groupby +import numpy as np +import six + + +def end_detect(ended_hyps, i, M=3, D_end=np.log(1 * np.exp(-10))): + """End detection. + + described in Eq. (50) of S. Watanabe et al + "Hybrid CTC/Attention Architecture for End-to-End Speech Recognition" + + :param ended_hyps: + :param i: + :param M: + :param D_end: + :return: + """ + if len(ended_hyps) == 0: + return False + count = 0 + best_hyp = sorted(ended_hyps, key=lambda x: x["score"], reverse=True)[0] + for m in six.moves.range(M): + # get ended_hyps with their length is i - m + hyp_length = i - m + hyps_same_length = [x for x in ended_hyps if len(x["yseq"]) == hyp_length] + if len(hyps_same_length) > 0: + best_hyp_same_length = sorted( + hyps_same_length, key=lambda x: x["score"], reverse=True + )[0] + if best_hyp_same_length["score"] - best_hyp["score"] < D_end: + count += 1 + + if count == M: + return True + else: + return False + + +# TODO(takaaki-hori): add different smoothing methods +def label_smoothing_dist(odim, lsm_type, transcript=None, blank=0): + """Obtain label distribution for loss smoothing. + + :param odim: + :param lsm_type: + :param blank: + :param transcript: + :return: + """ + if transcript is not None: + with open(transcript, "rb") as f: + trans_json = json.load(f)["utts"] + + if lsm_type == "unigram": + assert transcript is not None, ( + "transcript is required for %s label smoothing" % lsm_type + ) + labelcount = np.zeros(odim) + for k, v in trans_json.items(): + ids = np.array([int(n) for n in v["output"][0]["tokenid"].split()]) + # to avoid an error when there is no text in an uttrance + if len(ids) > 0: + labelcount[ids] += 1 + labelcount[odim - 1] = len(transcript) # count + labelcount[labelcount == 0] = 1 # flooring + labelcount[blank] = 0 # remove counts for blank + labeldist = labelcount.astype(np.float32) / np.sum(labelcount) + else: + logging.error("Error: unexpected label smoothing type: %s" % lsm_type) + sys.exit() + + return labeldist + + +def get_vgg2l_odim(idim, in_channel=3, out_channel=128): + """Return the output size of the VGG frontend. + + :param in_channel: input channel size + :param out_channel: output channel size + :return: output size + :rtype int + """ + idim = idim / in_channel + idim = np.ceil(np.array(idim, dtype=np.float32) / 2) # 1st max pooling + idim = np.ceil(np.array(idim, dtype=np.float32) / 2) # 2nd max pooling + return int(idim) * out_channel # numer of channels + + +class ErrorCalculator(object): + """Calculate CER and WER for E2E_ASR and CTC models during training. + + :param y_hats: numpy array with predicted text + :param y_pads: numpy array with true (target) text + :param char_list: + :param sym_space: + :param sym_blank: + :return: + """ + + def __init__( + self, char_list, sym_space, sym_blank, report_cer=False, report_wer=False + ): + """Construct an ErrorCalculator object.""" + super(ErrorCalculator, self).__init__() + + self.report_cer = report_cer + self.report_wer = report_wer + + self.char_list = char_list + self.space = sym_space + self.blank = sym_blank + self.idx_blank = self.char_list.index(self.blank) + if self.space in self.char_list: + self.idx_space = self.char_list.index(self.space) + else: + self.idx_space = None + + def __call__(self, ys_hat, ys_pad, is_ctc=False): + """Calculate sentence-level WER/CER score. + + :param torch.Tensor ys_hat: prediction (batch, seqlen) + :param torch.Tensor ys_pad: reference (batch, seqlen) + :param bool is_ctc: calculate CER score for CTC + :return: sentence-level WER score + :rtype float + :return: sentence-level CER score + :rtype float + """ + cer, wer = None, None + if is_ctc: + return self.calculate_cer_ctc(ys_hat, ys_pad) + elif not self.report_cer and not self.report_wer: + return cer, wer + + seqs_hat, seqs_true = self.convert_to_char(ys_hat, ys_pad) + if self.report_cer: + cer = self.calculate_cer(seqs_hat, seqs_true) + + if self.report_wer: + wer = self.calculate_wer(seqs_hat, seqs_true) + return cer, wer + + def calculate_cer_ctc(self, ys_hat, ys_pad): + """Calculate sentence-level CER score for CTC. + + :param torch.Tensor ys_hat: prediction (batch, seqlen) + :param torch.Tensor ys_pad: reference (batch, seqlen) + :return: average sentence-level CER score + :rtype float + """ + import editdistance + + cers, char_ref_lens = [], [] + for i, y in enumerate(ys_hat): + y_hat = [x[0] for x in groupby(y)] + y_true = ys_pad[i] + seq_hat, seq_true = [], [] + for idx in y_hat: + idx = int(idx) + if idx != -1 and idx != self.idx_blank and idx != self.idx_space: + seq_hat.append(self.char_list[int(idx)]) + + for idx in y_true: + idx = int(idx) + if idx != -1 and idx != self.idx_blank and idx != self.idx_space: + seq_true.append(self.char_list[int(idx)]) + + hyp_chars = "".join(seq_hat) + ref_chars = "".join(seq_true) + if len(ref_chars) > 0: + cers.append(editdistance.eval(hyp_chars, ref_chars)) + char_ref_lens.append(len(ref_chars)) + + cer_ctc = float(sum(cers)) / sum(char_ref_lens) if cers else None + return cer_ctc + + def convert_to_char(self, ys_hat, ys_pad): + """Convert index to character. + + :param torch.Tensor seqs_hat: prediction (batch, seqlen) + :param torch.Tensor seqs_true: reference (batch, seqlen) + :return: token list of prediction + :rtype list + :return: token list of reference + :rtype list + """ + seqs_hat, seqs_true = [], [] + for i, y_hat in enumerate(ys_hat): + y_true = ys_pad[i] + eos_true = np.where(y_true == -1)[0] + ymax = eos_true[0] if len(eos_true) > 0 else len(y_true) + # NOTE: padding index (-1) in y_true is used to pad y_hat + seq_hat = [self.char_list[int(idx)] for idx in y_hat[:ymax]] + seq_true = [self.char_list[int(idx)] for idx in y_true if int(idx) != -1] + seq_hat_text = "".join(seq_hat).replace(self.space, " ") + seq_hat_text = seq_hat_text.replace(self.blank, "") + seq_true_text = "".join(seq_true).replace(self.space, " ") + seqs_hat.append(seq_hat_text) + seqs_true.append(seq_true_text) + return seqs_hat, seqs_true + + def calculate_cer(self, seqs_hat, seqs_true): + """Calculate sentence-level CER score. + + :param list seqs_hat: prediction + :param list seqs_true: reference + :return: average sentence-level CER score + :rtype float + """ + import editdistance + + char_eds, char_ref_lens = [], [] + for i, seq_hat_text in enumerate(seqs_hat): + seq_true_text = seqs_true[i] + hyp_chars = seq_hat_text.replace(" ", "") + ref_chars = seq_true_text.replace(" ", "") + char_eds.append(editdistance.eval(hyp_chars, ref_chars)) + char_ref_lens.append(len(ref_chars)) + return float(sum(char_eds)) / sum(char_ref_lens) + + def calculate_wer(self, seqs_hat, seqs_true): + """Calculate sentence-level WER score. + + :param list seqs_hat: prediction + :param list seqs_true: reference + :return: average sentence-level WER score + :rtype float + """ + import editdistance + + word_eds, word_ref_lens = [], [] + for i, seq_hat_text in enumerate(seqs_hat): + seq_true_text = seqs_true[i] + hyp_words = seq_hat_text.split() + ref_words = seq_true_text.split() + word_eds.append(editdistance.eval(hyp_words, ref_words)) + word_ref_lens.append(len(ref_words)) + return float(sum(word_eds)) / sum(word_ref_lens) diff --git a/espnet/nets/lm_interface.py b/espnet/nets/lm_interface.py new file mode 100644 index 0000000000000000000000000000000000000000..0f1e751c4d8c945c8bae3fc4356a4d380fc1e023 --- /dev/null +++ b/espnet/nets/lm_interface.py @@ -0,0 +1,86 @@ +"""Language model interface.""" + +import argparse + +from espnet.nets.scorer_interface import ScorerInterface +from espnet.utils.dynamic_import import dynamic_import +from espnet.utils.fill_missing_args import fill_missing_args + + +class LMInterface(ScorerInterface): + """LM Interface for ESPnet model implementation.""" + + @staticmethod + def add_arguments(parser): + """Add arguments to command line argument parser.""" + return parser + + @classmethod + def build(cls, n_vocab: int, **kwargs): + """Initialize this class with python-level args. + + Args: + idim (int): The number of vocabulary. + + Returns: + LMinterface: A new instance of LMInterface. + + """ + # local import to avoid cyclic import in lm_train + from espnet.bin.lm_train import get_parser + + def wrap(parser): + return get_parser(parser, required=False) + + args = argparse.Namespace(**kwargs) + args = fill_missing_args(args, wrap) + args = fill_missing_args(args, cls.add_arguments) + return cls(n_vocab, args) + + def forward(self, x, t): + """Compute LM loss value from buffer sequences. + + Args: + x (torch.Tensor): Input ids. (batch, len) + t (torch.Tensor): Target ids. (batch, len) + + Returns: + tuple[torch.Tensor, torch.Tensor, torch.Tensor]: Tuple of + loss to backward (scalar), + negative log-likelihood of t: -log p(t) (scalar) and + the number of elements in x (scalar) + + Notes: + The last two return values are used + in perplexity: p(t)^{-n} = exp(-log p(t) / n) + + """ + raise NotImplementedError("forward method is not implemented") + + +predefined_lms = { + "pytorch": { + "default": "espnet.nets.pytorch_backend.lm.default:DefaultRNNLM", + "seq_rnn": "espnet.nets.pytorch_backend.lm.seq_rnn:SequentialRNNLM", + "transformer": "espnet.nets.pytorch_backend.lm.transformer:TransformerLM", + }, + "chainer": {"default": "espnet.lm.chainer_backend.lm:DefaultRNNLM"}, +} + + +def dynamic_import_lm(module, backend): + """Import LM class dynamically. + + Args: + module (str): module_name:class_name or alias in `predefined_lms` + backend (str): NN backend. e.g., pytorch, chainer + + Returns: + type: LM class + + """ + model_class = dynamic_import(module, predefined_lms.get(backend, dict())) + assert issubclass( + model_class, LMInterface + ), f"{module} does not implement LMInterface" + return model_class diff --git a/espnet/nets/pytorch_backend/backbones/conv1d_extractor.py b/espnet/nets/pytorch_backend/backbones/conv1d_extractor.py new file mode 100755 index 0000000000000000000000000000000000000000..7f456e0a6fbe832fa440bae07973475cd3170679 --- /dev/null +++ b/espnet/nets/pytorch_backend/backbones/conv1d_extractor.py @@ -0,0 +1,25 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright 2021 Imperial College London (Pingchuan Ma) +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) +import torch +from espnet.nets.pytorch_backend.backbones.modules.resnet1d import ResNet1D, BasicBlock1D + +class Conv1dResNet(torch.nn.Module): + def __init__(self, relu_type="swish", a_upsample_ratio=1): + super().__init__() + self.a_upsample_ratio = a_upsample_ratio + self.trunk = ResNet1D(BasicBlock1D, [2, 2, 2, 2], relu_type=relu_type, a_upsample_ratio=a_upsample_ratio) + + + def forward(self, xs_pad): + """forward. + + :param xs_pad: torch.Tensor, batch of padded input sequences (B, Tmax, idim) + """ + B, T, C = xs_pad.size() + xs_pad = xs_pad[:, :T // 640 * 640, :] + xs_pad = xs_pad.transpose(1, 2) + xs_pad = self.trunk(xs_pad) + return xs_pad.transpose(1, 2) diff --git a/espnet/nets/pytorch_backend/backbones/conv3d_extractor.py b/espnet/nets/pytorch_backend/backbones/conv3d_extractor.py new file mode 100755 index 0000000000000000000000000000000000000000..302bdfb643fcf9f99cbecc3603465e92e26fd0b9 --- /dev/null +++ b/espnet/nets/pytorch_backend/backbones/conv3d_extractor.py @@ -0,0 +1,47 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright 2021 Imperial College London (Pingchuan Ma) +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +import torch +import torch.nn as nn +from espnet.nets.pytorch_backend.backbones.modules.resnet import ResNet, BasicBlock +from espnet.nets.pytorch_backend.transformer.convolution import Swish + + +def threeD_to_2D_tensor(x): + n_batch, n_channels, s_time, sx, sy = x.shape + x = x.transpose(1, 2) + return x.reshape(n_batch * s_time, n_channels, sx, sy) + + + +class Conv3dResNet(torch.nn.Module): + """Conv3dResNet module + """ + + def __init__(self, backbone_type="resnet", relu_type="swish"): + """__init__. + + :param backbone_type: str, the type of a visual front-end. + :param relu_type: str, activation function used in an audio front-end. + """ + super(Conv3dResNet, self).__init__() + self.frontend_nout = 64 + self.trunk = ResNet(BasicBlock, [2, 2, 2, 2], relu_type=relu_type) + self.frontend3D = nn.Sequential( + nn.Conv3d(1, self.frontend_nout, (5, 7, 7), (1, 2, 2), (2, 3, 3), bias=False), + nn.BatchNorm3d(self.frontend_nout), + Swish(), + nn.MaxPool3d((1, 3, 3), (1, 2, 2), (0, 1, 1)) + ) + + + def forward(self, xs_pad): + B, C, T, H, W = xs_pad.size() + xs_pad = self.frontend3D(xs_pad) + Tnew = xs_pad.shape[2] + xs_pad = threeD_to_2D_tensor(xs_pad) + xs_pad = self.trunk(xs_pad) + return xs_pad.view(B, Tnew, xs_pad.size(1)) diff --git a/espnet/nets/pytorch_backend/backbones/modules/resnet.py b/espnet/nets/pytorch_backend/backbones/modules/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..18b14a59c3049ecb2bd2e3680c9bfdce91f309f7 --- /dev/null +++ b/espnet/nets/pytorch_backend/backbones/modules/resnet.py @@ -0,0 +1,178 @@ +import math +import torch.nn as nn +import pdb + +from espnet.nets.pytorch_backend.transformer.convolution import Swish + + +def conv3x3(in_planes, out_planes, stride=1): + """conv3x3. + + :param in_planes: int, number of channels in the input sequence. + :param out_planes: int, number of channels produced by the convolution. + :param stride: int, size of the convolving kernel. + """ + return nn.Conv2d( + in_planes, + out_planes, + kernel_size=3, + stride=stride, + padding=1, + bias=False, + ) + + +def downsample_basic_block(inplanes, outplanes, stride): + """downsample_basic_block. + + :param inplanes: int, number of channels in the input sequence. + :param outplanes: int, number of channels produced by the convolution. + :param stride: int, size of the convolving kernel. + """ + return nn.Sequential( + nn.Conv2d( + inplanes, + outplanes, + kernel_size=1, + stride=stride, + bias=False, + ), + nn.BatchNorm2d(outplanes), + ) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__( + self, + inplanes, + planes, + stride=1, + downsample=None, + relu_type="swish", + ): + """__init__. + + :param inplanes: int, number of channels in the input sequence. + :param planes: int, number of channels produced by the convolution. + :param stride: int, size of the convolving kernel. + :param downsample: boolean, if True, the temporal resolution is downsampled. + :param relu_type: str, type of activation function. + """ + super(BasicBlock, self).__init__() + + assert relu_type in ["relu", "prelu", "swish"] + + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes) + + if relu_type == "relu": + self.relu1 = nn.ReLU(inplace=True) + self.relu2 = nn.ReLU(inplace=True) + elif relu_type == "prelu": + self.relu1 = nn.PReLU(num_parameters=planes) + self.relu2 = nn.PReLU(num_parameters=planes) + elif relu_type == "swish": + self.relu1 = Swish() + self.relu2 = Swish() + else: + raise NotImplementedError + # -------- + + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + + self.downsample = downsample + self.stride = stride + + def forward(self, x): + """forward. + + :param x: torch.Tensor, input tensor with input size (B, C, T, H, W). + """ + residual = x + out = self.conv1(x) + out = self.bn1(out) + out = self.relu1(out) + out = self.conv2(out) + out = self.bn2(out) + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu2(out) + + return out + + +class ResNet(nn.Module): + + def __init__( + self, + block, + layers, + relu_type="swish", + ): + super(ResNet, self).__init__() + self.inplanes = 64 + self.relu_type = relu_type + self.downsample_block = downsample_basic_block + + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + self.avgpool = nn.AdaptiveAvgPool2d(1) + + + def _make_layer(self, block, planes, blocks, stride=1): + """_make_layer. + + :param block: torch.nn.Module, class of blocks. + :param planes: int, number of channels produced by the convolution. + :param blocks: int, number of layers in a block. + :param stride: int, size of the convolving kernel. + """ + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = self.downsample_block( + inplanes=self.inplanes, + outplanes=planes*block.expansion, + stride=stride, + ) + + layers = [] + layers.append( + block( + self.inplanes, + planes, + stride, + downsample, + relu_type=self.relu_type, + ) + ) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append( + block( + self.inplanes, + planes, + relu_type=self.relu_type, + ) + ) + + return nn.Sequential(*layers) + + def forward(self, x): + """forward. + + :param x: torch.Tensor, input tensor with input size (B, C, T, H, W). + """ + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + x = self.avgpool(x) + x = x.view(x.size(0), -1) + return x diff --git a/espnet/nets/pytorch_backend/backbones/modules/resnet1d.py b/espnet/nets/pytorch_backend/backbones/modules/resnet1d.py new file mode 100644 index 0000000000000000000000000000000000000000..adfec4a010871e7ddfe657e3474b08b5527720c6 --- /dev/null +++ b/espnet/nets/pytorch_backend/backbones/modules/resnet1d.py @@ -0,0 +1,213 @@ +import math +import torch.nn as nn +import pdb + +from espnet.nets.pytorch_backend.transformer.convolution import Swish + + +def conv3x3(in_planes, out_planes, stride=1): + """conv3x3. + + :param in_planes: int, number of channels in the input sequence. + :param out_planes: int, number of channels produced by the convolution. + :param stride: int, size of the convolving kernel. + """ + return nn.Conv1d( + in_planes, + out_planes, + kernel_size=3, + stride=stride, + padding=1, + bias=False, + ) + + +def downsample_basic_block(inplanes, outplanes, stride): + """downsample_basic_block. + + :param inplanes: int, number of channels in the input sequence. + :param outplanes: int, number of channels produced by the convolution. + :param stride: int, size of the convolving kernel. + """ + return nn.Sequential( + nn.Conv1d( + inplanes, + outplanes, + kernel_size=1, + stride=stride, + bias=False, + ), + nn.BatchNorm1d(outplanes), + ) + + +class BasicBlock1D(nn.Module): + expansion = 1 + + def __init__( + self, + inplanes, + planes, + stride=1, + downsample=None, + relu_type="relu", + ): + """__init__. + + :param inplanes: int, number of channels in the input sequence. + :param planes: int, number of channels produced by the convolution. + :param stride: int, size of the convolving kernel. + :param downsample: boolean, if True, the temporal resolution is downsampled. + :param relu_type: str, type of activation function. + """ + super(BasicBlock1D, self).__init__() + + assert relu_type in ["relu","prelu", "swish"] + + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm1d(planes) + + # type of ReLU is an input option + if relu_type == "relu": + self.relu1 = nn.ReLU(inplace=True) + self.relu2 = nn.ReLU(inplace=True) + elif relu_type == "prelu": + self.relu1 = nn.PReLU(num_parameters=planes) + self.relu2 = nn.PReLU(num_parameters=planes) + elif relu_type == "swish": + self.relu1 = Swish() + self.relu2 = Swish() + else: + raise NotImplementedError + # -------- + + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm1d(planes) + + self.downsample = downsample + self.stride = stride + + def forward(self, x): + """forward. + + :param x: torch.Tensor, input tensor with input size (B, C, T) + """ + residual = x + out = self.conv1(x) + out = self.bn1(out) + out = self.relu1(out) + out = self.conv2(out) + out = self.bn2(out) + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu2(out) + + return out + + +class ResNet1D(nn.Module): + + def __init__(self, + block, + layers, + relu_type="swish", + a_upsample_ratio=1, + ): + """__init__. + + :param block: torch.nn.Module, class of blocks. + :param layers: List, customised layers in each block. + :param relu_type: str, type of activation function. + :param a_upsample_ratio: int, The ratio related to the \ + temporal resolution of output features of the frontend. \ + a_upsample_ratio=1 produce features with a fps of 25. + """ + super(ResNet1D, self).__init__() + self.inplanes = 64 + self.relu_type = relu_type + self.downsample_block = downsample_basic_block + self.a_upsample_ratio = a_upsample_ratio + + self.conv1 = nn.Conv1d( + in_channels=1, + out_channels=self.inplanes, + kernel_size=80, + stride=4, + padding=38, + bias=False, + ) + self.bn1 = nn.BatchNorm1d(self.inplanes) + + if relu_type == "relu": + self.relu = nn.ReLU(inplace=True) + elif relu_type == "prelu": + self.relu = nn.PReLU(num_parameters=self.inplanes) + elif relu_type == "swish": + self.relu = Swish() + + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + self.avgpool = nn.AvgPool1d( + kernel_size=20//self.a_upsample_ratio, + stride=20//self.a_upsample_ratio, + ) + + + def _make_layer(self, block, planes, blocks, stride=1): + """_make_layer. + + :param block: torch.nn.Module, class of blocks. + :param planes: int, number of channels produced by the convolution. + :param blocks: int, number of layers in a block. + :param stride: int, size of the convolving kernel. + """ + + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = self.downsample_block( + inplanes=self.inplanes, + outplanes=planes*block.expansion, + stride=stride, + ) + + layers = [] + layers.append( + block( + self.inplanes, + planes, + stride, + downsample, + relu_type=self.relu_type, + ) + ) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append( + block( + self.inplanes, + planes, + relu_type=self.relu_type, + ) + ) + + return nn.Sequential(*layers) + + def forward(self, x): + """forward. + + :param x: torch.Tensor, input tensor with input size (B, C, T) + """ + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + x = self.avgpool(x) + return x diff --git a/espnet/nets/pytorch_backend/backbones/modules/shufflenetv2.py b/espnet/nets/pytorch_backend/backbones/modules/shufflenetv2.py new file mode 100644 index 0000000000000000000000000000000000000000..53db7a8163ddb1d74d88e7d0a4d8824646918a6c --- /dev/null +++ b/espnet/nets/pytorch_backend/backbones/modules/shufflenetv2.py @@ -0,0 +1,165 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Variable +from collections import OrderedDict +from torch.nn import init +import math + +import pdb + +def conv_bn(inp, oup, stride): + return nn.Sequential( + nn.Conv2d(inp, oup, 3, stride, 1, bias=False), + nn.BatchNorm2d(oup), + nn.ReLU(inplace=True) + ) + + +def conv_1x1_bn(inp, oup): + return nn.Sequential( + nn.Conv2d(inp, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + nn.ReLU(inplace=True) + ) + +def channel_shuffle(x, groups): + batchsize, num_channels, height, width = x.data.size() + + channels_per_group = num_channels // groups + + # reshape + x = x.view(batchsize, groups, + channels_per_group, height, width) + + x = torch.transpose(x, 1, 2).contiguous() + + # flatten + x = x.view(batchsize, -1, height, width) + + return x + +class InvertedResidual(nn.Module): + def __init__(self, inp, oup, stride, benchmodel): + super(InvertedResidual, self).__init__() + self.benchmodel = benchmodel + self.stride = stride + assert stride in [1, 2] + + oup_inc = oup//2 + + if self.benchmodel == 1: + #assert inp == oup_inc + self.banch2 = nn.Sequential( + # pw + nn.Conv2d(oup_inc, oup_inc, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup_inc), + nn.ReLU(inplace=True), + # dw + nn.Conv2d(oup_inc, oup_inc, 3, stride, 1, groups=oup_inc, bias=False), + nn.BatchNorm2d(oup_inc), + # pw-linear + nn.Conv2d(oup_inc, oup_inc, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup_inc), + nn.ReLU(inplace=True), + ) + else: + self.banch1 = nn.Sequential( + # dw + nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), + nn.BatchNorm2d(inp), + # pw-linear + nn.Conv2d(inp, oup_inc, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup_inc), + nn.ReLU(inplace=True), + ) + + self.banch2 = nn.Sequential( + # pw + nn.Conv2d(inp, oup_inc, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup_inc), + nn.ReLU(inplace=True), + # dw + nn.Conv2d(oup_inc, oup_inc, 3, stride, 1, groups=oup_inc, bias=False), + nn.BatchNorm2d(oup_inc), + # pw-linear + nn.Conv2d(oup_inc, oup_inc, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup_inc), + nn.ReLU(inplace=True), + ) + + @staticmethod + def _concat(x, out): + # concatenate along channel axis + return torch.cat((x, out), 1) + + def forward(self, x): + if 1==self.benchmodel: + x1 = x[:, :(x.shape[1]//2), :, :] + x2 = x[:, (x.shape[1]//2):, :, :] + out = self._concat(x1, self.banch2(x2)) + elif 2==self.benchmodel: + out = self._concat(self.banch1(x), self.banch2(x)) + + return channel_shuffle(out, 2) + + +class ShuffleNetV2(nn.Module): + def __init__(self, n_class=1000, input_size=224, width_mult=2.): + super(ShuffleNetV2, self).__init__() + + assert input_size % 32 == 0, "Input size needs to be divisible by 32" + + self.stage_repeats = [4, 8, 4] + # index 0 is invalid and should never be called. + # only used for indexing convenience. + if width_mult == 0.5: + self.stage_out_channels = [-1, 24, 48, 96, 192, 1024] + elif width_mult == 1.0: + self.stage_out_channels = [-1, 24, 116, 232, 464, 1024] + elif width_mult == 1.5: + self.stage_out_channels = [-1, 24, 176, 352, 704, 1024] + elif width_mult == 2.0: + self.stage_out_channels = [-1, 24, 244, 488, 976, 2048] + else: + raise ValueError( + """Width multiplier should be in [0.5, 1.0, 1.5, 2.0]. Current value: {}""".format(width_mult)) + + # building first layer + input_channel = self.stage_out_channels[1] + self.conv1 = conv_bn(3, input_channel, 2) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.features = [] + # building inverted residual blocks + for idxstage in range(len(self.stage_repeats)): + numrepeat = self.stage_repeats[idxstage] + output_channel = self.stage_out_channels[idxstage+2] + for i in range(numrepeat): + if i == 0: + #inp, oup, stride, benchmodel): + self.features.append(InvertedResidual(input_channel, output_channel, 2, 2)) + else: + self.features.append(InvertedResidual(input_channel, output_channel, 1, 1)) + input_channel = output_channel + + + # make it nn.Sequential + self.features = nn.Sequential(*self.features) + + # building last several layers + self.conv_last = conv_1x1_bn(input_channel, self.stage_out_channels[-1]) + self.globalpool = nn.Sequential(nn.AvgPool2d(int(input_size/32))) + + # building classifier + self.classifier = nn.Sequential(nn.Linear(self.stage_out_channels[-1], n_class)) + + def forward(self, x): + x = self.conv1(x) + x = self.maxpool(x) + x = self.features(x) + x = self.conv_last(x) + x = self.globalpool(x) + x = x.view(-1, self.stage_out_channels[-1]) + x = self.classifier(x) + return x diff --git a/espnet/nets/pytorch_backend/ctc.py b/espnet/nets/pytorch_backend/ctc.py new file mode 100644 index 0000000000000000000000000000000000000000..60e176df9268c83fd6251b64f823c6b1c512641d --- /dev/null +++ b/espnet/nets/pytorch_backend/ctc.py @@ -0,0 +1,283 @@ +from distutils.version import LooseVersion +import logging + +import numpy as np +import six +import torch +import torch.nn.functional as F + +from espnet.nets.pytorch_backend.nets_utils import to_device + + +class CTC(torch.nn.Module): + """CTC module + + :param int odim: dimension of outputs + :param int eprojs: number of encoder projection units + :param float dropout_rate: dropout rate (0.0 ~ 1.0) + :param str ctc_type: builtin or warpctc + :param bool reduce: reduce the CTC loss into a scalar + """ + + def __init__(self, odim, eprojs, dropout_rate, ctc_type="warpctc", reduce=True): + super().__init__() + self.dropout_rate = dropout_rate + self.loss = None + self.ctc_lo = torch.nn.Linear(eprojs, odim) + self.dropout = torch.nn.Dropout(dropout_rate) + self.probs = None # for visualization + + # In case of Pytorch >= 1.7.0, CTC will be always builtin + self.ctc_type = ( + ctc_type + if LooseVersion(torch.__version__) < LooseVersion("1.7.0") + else "builtin" + ) + + if self.ctc_type == "builtin": + reduction_type = "sum" if reduce else "none" + self.ctc_loss = torch.nn.CTCLoss( + reduction=reduction_type, zero_infinity=True + ) + elif self.ctc_type == "cudnnctc": + reduction_type = "sum" if reduce else "none" + self.ctc_loss = torch.nn.CTCLoss(reduction=reduction_type) + elif self.ctc_type == "warpctc": + import warpctc_pytorch as warp_ctc + + self.ctc_loss = warp_ctc.CTCLoss(size_average=True, reduce=reduce) + elif self.ctc_type == "gtnctc": + from espnet.nets.pytorch_backend.gtn_ctc import GTNCTCLossFunction + + self.ctc_loss = GTNCTCLossFunction.apply + else: + raise ValueError( + 'ctc_type must be "builtin" or "warpctc": {}'.format(self.ctc_type) + ) + + self.ignore_id = -1 + self.reduce = reduce + + def loss_fn(self, th_pred, th_target, th_ilen, th_olen): + if self.ctc_type in ["builtin", "cudnnctc"]: + th_pred = th_pred.log_softmax(2) + # Use the deterministic CuDNN implementation of CTC loss to avoid + # [issue#17798](https://github.com/pytorch/pytorch/issues/17798) + with torch.backends.cudnn.flags(deterministic=True): + loss = self.ctc_loss(th_pred, th_target, th_ilen, th_olen) + # Batch-size average + loss = loss / th_pred.size(1) + return loss + elif self.ctc_type == "warpctc": + return self.ctc_loss(th_pred, th_target, th_ilen, th_olen) + elif self.ctc_type == "gtnctc": + targets = [t.tolist() for t in th_target] + log_probs = torch.nn.functional.log_softmax(th_pred, dim=2) + return self.ctc_loss(log_probs, targets, th_ilen, 0, "none") + else: + raise NotImplementedError + + def forward(self, hs_pad, hlens, ys_pad): + """CTC forward + + :param torch.Tensor hs_pad: batch of padded hidden state sequences (B, Tmax, D) + :param torch.Tensor hlens: batch of lengths of hidden state sequences (B) + :param torch.Tensor ys_pad: + batch of padded character id sequence tensor (B, Lmax) + :return: ctc loss value + :rtype: torch.Tensor + """ + # TODO(kan-bayashi): need to make more smart way + ys = [y[y != self.ignore_id] for y in ys_pad] # parse padded ys + + # zero padding for hs + ys_hat = self.ctc_lo(self.dropout(hs_pad)) + if self.ctc_type != "gtnctc": + ys_hat = ys_hat.transpose(0, 1) + + if self.ctc_type == "builtin": + olens = to_device(ys_hat, torch.LongTensor([len(s) for s in ys])) + hlens = hlens.long() + ys_pad = torch.cat(ys) # without this the code breaks for asr_mix + self.loss = self.loss_fn(ys_hat, ys_pad, hlens, olens) + else: + self.loss = None + hlens = torch.from_numpy(np.fromiter(hlens, dtype=np.int32)) + olens = torch.from_numpy( + np.fromiter((x.size(0) for x in ys), dtype=np.int32) + ) + # zero padding for ys + ys_true = torch.cat(ys).cpu().int() # batch x olen + # get ctc loss + # expected shape of seqLength x batchSize x alphabet_size + dtype = ys_hat.dtype + if self.ctc_type == "warpctc" or dtype == torch.float16: + # warpctc only supports float32 + # torch.ctc does not support float16 (#1751) + ys_hat = ys_hat.to(dtype=torch.float32) + if self.ctc_type == "cudnnctc": + # use GPU when using the cuDNN implementation + ys_true = to_device(hs_pad, ys_true) + if self.ctc_type == "gtnctc": + # keep as list for gtn + ys_true = ys + self.loss = to_device( + hs_pad, self.loss_fn(ys_hat, ys_true, hlens, olens) + ).to(dtype=dtype) + + # get length info + logging.info( + self.__class__.__name__ + + " input lengths: " + + "".join(str(hlens).split("\n")) + ) + logging.info( + self.__class__.__name__ + + " output lengths: " + + "".join(str(olens).split("\n")) + ) + + if self.reduce: + # NOTE: sum() is needed to keep consistency + # since warpctc return as tensor w/ shape (1,) + # but builtin return as tensor w/o shape (scalar). + self.loss = self.loss.sum() + logging.info("ctc loss:" + str(float(self.loss))) + + return self.loss + + def softmax(self, hs_pad): + """softmax of frame activations + + :param torch.Tensor hs_pad: 3d tensor (B, Tmax, eprojs) + :return: log softmax applied 3d tensor (B, Tmax, odim) + :rtype: torch.Tensor + """ + self.probs = F.softmax(self.ctc_lo(hs_pad), dim=2) + return self.probs + + def log_softmax(self, hs_pad): + """log_softmax of frame activations + + :param torch.Tensor hs_pad: 3d tensor (B, Tmax, eprojs) + :return: log softmax applied 3d tensor (B, Tmax, odim) + :rtype: torch.Tensor + """ + return F.log_softmax(self.ctc_lo(hs_pad), dim=2) + + def argmax(self, hs_pad): + """argmax of frame activations + + :param torch.Tensor hs_pad: 3d tensor (B, Tmax, eprojs) + :return: argmax applied 2d tensor (B, Tmax) + :rtype: torch.Tensor + """ + return torch.argmax(self.ctc_lo(hs_pad), dim=2) + + def forced_align(self, h, y, blank_id=0): + """forced alignment. + + :param torch.Tensor h: hidden state sequence, 2d tensor (T, D) + :param torch.Tensor y: id sequence tensor 1d tensor (L) + :param int y: blank symbol index + :return: best alignment results + :rtype: list + """ + + def interpolate_blank(label, blank_id=0): + """Insert blank token between every two label token.""" + label = np.expand_dims(label, 1) + blanks = np.zeros((label.shape[0], 1), dtype=np.int64) + blank_id + label = np.concatenate([blanks, label], axis=1) + label = label.reshape(-1) + label = np.append(label, label[0]) + return label + + lpz = self.log_softmax(h) + lpz = lpz.squeeze(0) + + y_int = interpolate_blank(y, blank_id) + + logdelta = np.zeros((lpz.size(0), len(y_int))) - 100000000000.0 # log of zero + state_path = ( + np.zeros((lpz.size(0), len(y_int)), dtype=np.int16) - 1 + ) # state path + + logdelta[0, 0] = lpz[0][y_int[0]] + logdelta[0, 1] = lpz[0][y_int[1]] + + for t in six.moves.range(1, lpz.size(0)): + for s in six.moves.range(len(y_int)): + if y_int[s] == blank_id or s < 2 or y_int[s] == y_int[s - 2]: + candidates = np.array([logdelta[t - 1, s], logdelta[t - 1, s - 1]]) + prev_state = [s, s - 1] + else: + candidates = np.array( + [ + logdelta[t - 1, s], + logdelta[t - 1, s - 1], + logdelta[t - 1, s - 2], + ] + ) + prev_state = [s, s - 1, s - 2] + logdelta[t, s] = np.max(candidates) + lpz[t][y_int[s]] + state_path[t, s] = prev_state[np.argmax(candidates)] + + state_seq = -1 * np.ones((lpz.size(0), 1), dtype=np.int16) + + candidates = np.array( + [logdelta[-1, len(y_int) - 1], logdelta[-1, len(y_int) - 2]] + ) + prev_state = [len(y_int) - 1, len(y_int) - 2] + state_seq[-1] = prev_state[np.argmax(candidates)] + for t in six.moves.range(lpz.size(0) - 2, -1, -1): + state_seq[t] = state_path[t + 1, state_seq[t + 1, 0]] + + output_state_seq = [] + for t in six.moves.range(0, lpz.size(0)): + output_state_seq.append(y_int[state_seq[t, 0]]) + + return output_state_seq + + +def ctc_for(args, odim, reduce=True): + """Returns the CTC module for the given args and output dimension + + :param Namespace args: the program args + :param int odim : The output dimension + :param bool reduce : return the CTC loss in a scalar + :return: the corresponding CTC module + """ + num_encs = getattr(args, "num_encs", 1) # use getattr to keep compatibility + if num_encs == 1: + # compatible with single encoder asr mode + return CTC( + odim, args.eprojs, args.dropout_rate, ctc_type=args.ctc_type, reduce=reduce + ) + elif num_encs >= 1: + ctcs_list = torch.nn.ModuleList() + if args.share_ctc: + # use dropout_rate of the first encoder + ctc = CTC( + odim, + args.eprojs, + args.dropout_rate[0], + ctc_type=args.ctc_type, + reduce=reduce, + ) + ctcs_list.append(ctc) + else: + for idx in range(num_encs): + ctc = CTC( + odim, + args.eprojs, + args.dropout_rate[idx], + ctc_type=args.ctc_type, + reduce=reduce, + ) + ctcs_list.append(ctc) + return ctcs_list + else: + raise ValueError( + "Number of encoders needs to be more than one. {}".format(num_encs) + ) diff --git a/espnet/nets/pytorch_backend/e2e_asr_transformer.py b/espnet/nets/pytorch_backend/e2e_asr_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..ac89e0a9e3b80e3dd8afdabf10fe07eedd4b469b --- /dev/null +++ b/espnet/nets/pytorch_backend/e2e_asr_transformer.py @@ -0,0 +1,320 @@ +# Copyright 2019 Shigeki Karita +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +"""Transformer speech recognition model (pytorch).""" + +from argparse import Namespace +from distutils.util import strtobool +import logging +import math + +import numpy +import torch + +from espnet.nets.ctc_prefix_score import CTCPrefixScore +from espnet.nets.e2e_asr_common import end_detect +from espnet.nets.e2e_asr_common import ErrorCalculator +from espnet.nets.pytorch_backend.ctc import CTC +from espnet.nets.pytorch_backend.nets_utils import get_subsample +from espnet.nets.pytorch_backend.nets_utils import make_non_pad_mask +from espnet.nets.pytorch_backend.nets_utils import th_accuracy +from espnet.nets.pytorch_backend.transformer.add_sos_eos import add_sos_eos +from espnet.nets.pytorch_backend.transformer.attention import ( + MultiHeadedAttention, # noqa: H301 + RelPositionMultiHeadedAttention, # noqa: H301 +) +from espnet.nets.pytorch_backend.transformer.decoder import Decoder +from espnet.nets.pytorch_backend.transformer.encoder import Encoder +from espnet.nets.pytorch_backend.transformer.label_smoothing_loss import ( + LabelSmoothingLoss, # noqa: H301 +) +from espnet.nets.pytorch_backend.transformer.mask import subsequent_mask +from espnet.nets.pytorch_backend.transformer.mask import target_mask +from espnet.nets.scorers.ctc import CTCPrefixScorer + + +class E2E(torch.nn.Module): + """E2E module. + + :param int idim: dimension of inputs + :param int odim: dimension of outputs + :param Namespace args: argument Namespace containing options + + """ + + @staticmethod + def add_arguments(parser): + """Add arguments.""" + group = parser.add_argument_group("transformer model setting") + + group.add_argument( + "--transformer-init", + type=str, + default="pytorch", + choices=[ + "pytorch", + "xavier_uniform", + "xavier_normal", + "kaiming_uniform", + "kaiming_normal", + ], + help="how to initialize transformer parameters", + ) + group.add_argument( + "--transformer-input-layer", + type=str, + default="conv2d", + choices=["conv3d", "conv2d", "conv1d", "linear", "embed"], + help="transformer input layer type", + ) + group.add_argument( + "--transformer-encoder-attn-layer-type", + type=str, + default="mha", + choices=["mha", "rel_mha", "legacy_rel_mha"], + help="transformer encoder attention layer type", + ) + group.add_argument( + "--transformer-attn-dropout-rate", + default=None, + type=float, + help="dropout in transformer attention. use --dropout-rate if None is set", + ) + group.add_argument( + "--transformer-lr", + default=10.0, + type=float, + help="Initial value of learning rate", + ) + group.add_argument( + "--transformer-warmup-steps", + default=25000, + type=int, + help="optimizer warmup steps", + ) + group.add_argument( + "--transformer-length-normalized-loss", + default=True, + type=strtobool, + help="normalize loss by length", + ) + group.add_argument( + "--dropout-rate", + default=0.0, + type=float, + help="Dropout rate for the encoder", + ) + group.add_argument( + "--macaron-style", + default=False, + type=strtobool, + help="Whether to use macaron style for positionwise layer", + ) + # -- input + group.add_argument( + "--a-upsample-ratio", + default=1, + type=int, + help="Upsample rate for audio", + ) + group.add_argument( + "--relu-type", + default="swish", + type=str, + help="the type of activation layer", + ) + # Encoder + group.add_argument( + "--elayers", + default=4, + type=int, + help="Number of encoder layers (for shared recognition part " + "in multi-speaker asr mode)", + ) + group.add_argument( + "--eunits", + "-u", + default=300, + type=int, + help="Number of encoder hidden units", + ) + group.add_argument( + "--use-cnn-module", + default=False, + type=strtobool, + help="Use convolution module or not", + ) + group.add_argument( + "--cnn-module-kernel", + default=31, + type=int, + help="Kernel size of convolution module.", + ) + # Attention + group.add_argument( + "--adim", + default=320, + type=int, + help="Number of attention transformation dimensions", + ) + group.add_argument( + "--aheads", + default=4, + type=int, + help="Number of heads for multi head attention", + ) + group.add_argument( + "--zero-triu", + default=False, + type=strtobool, + help="If true, zero the uppper triangular part of attention matrix.", + ) + # Relative positional encoding + group.add_argument( + "--rel-pos-type", + type=str, + default="legacy", + choices=["legacy", "latest"], + help="Whether to use the latest relative positional encoding or the legacy one." + "The legacy relative positional encoding will be deprecated in the future." + "More Details can be found in https://github.com/espnet/espnet/pull/2816.", + ) + # Decoder + group.add_argument( + "--dlayers", default=1, type=int, help="Number of decoder layers" + ) + group.add_argument( + "--dunits", default=320, type=int, help="Number of decoder hidden units" + ) + # -- pretrain + group.add_argument("--pretrain-dataset", + default="", + type=str, + help='pre-trained dataset for encoder' + ) + # -- custom name + group.add_argument("--custom-pretrain-name", + default="", + type=str, + help='pre-trained model for encoder' + ) + return parser + + @property + def attention_plot_class(self): + """Return PlotAttentionReport.""" + return PlotAttentionReport + + def __init__(self, odim, args, ignore_id=-1): + """Construct an E2E object. + :param int odim: dimension of outputs + :param Namespace args: argument Namespace containing options + """ + torch.nn.Module.__init__(self) + if args.transformer_attn_dropout_rate is None: + args.transformer_attn_dropout_rate = args.dropout_rate + # Check the relative positional encoding type + self.rel_pos_type = getattr(args, "rel_pos_type", None) + if self.rel_pos_type is None and args.transformer_encoder_attn_layer_type == "rel_mha": + args.transformer_encoder_attn_layer_type = "legacy_rel_mha" + logging.warning( + "Using legacy_rel_pos and it will be deprecated in the future." + ) + + idim = 80 + + self.encoder = Encoder( + idim=idim, + attention_dim=args.adim, + attention_heads=args.aheads, + linear_units=args.eunits, + num_blocks=args.elayers, + input_layer=args.transformer_input_layer, + dropout_rate=args.dropout_rate, + positional_dropout_rate=args.dropout_rate, + attention_dropout_rate=args.transformer_attn_dropout_rate, + encoder_attn_layer_type=args.transformer_encoder_attn_layer_type, + macaron_style=args.macaron_style, + use_cnn_module=args.use_cnn_module, + cnn_module_kernel=args.cnn_module_kernel, + zero_triu=getattr(args, "zero_triu", False), + a_upsample_ratio=args.a_upsample_ratio, + relu_type=getattr(args, "relu_type", "swish"), + ) + + self.transformer_input_layer = args.transformer_input_layer + self.a_upsample_ratio = args.a_upsample_ratio + + if args.mtlalpha < 1: + self.decoder = Decoder( + odim=odim, + attention_dim=args.adim, + attention_heads=args.aheads, + linear_units=args.dunits, + num_blocks=args.dlayers, + dropout_rate=args.dropout_rate, + positional_dropout_rate=args.dropout_rate, + self_attention_dropout_rate=args.transformer_attn_dropout_rate, + src_attention_dropout_rate=args.transformer_attn_dropout_rate, + ) + else: + self.decoder = None + self.blank = 0 + self.sos = odim - 1 + self.eos = odim - 1 + self.odim = odim + self.ignore_id = ignore_id + self.subsample = get_subsample(args, mode="asr", arch="transformer") + + # self.lsm_weight = a + self.criterion = LabelSmoothingLoss( + self.odim, + self.ignore_id, + args.lsm_weight, + args.transformer_length_normalized_loss, + ) + + self.adim = args.adim + self.mtlalpha = args.mtlalpha + if args.mtlalpha > 0.0: + self.ctc = CTC( + odim, args.adim, args.dropout_rate, ctc_type=args.ctc_type, reduce=True + ) + else: + self.ctc = None + + if args.report_cer or args.report_wer: + self.error_calculator = ErrorCalculator( + args.char_list, + args.sym_space, + args.sym_blank, + args.report_cer, + args.report_wer, + ) + else: + self.error_calculator = None + self.rnnlm = None + + def scorers(self): + """Scorers.""" + return dict(decoder=self.decoder, ctc=CTCPrefixScorer(self.ctc, self.eos)) + + def encode(self, x, extract_resnet_feats=False): + """Encode acoustic features. + + :param ndarray x: source acoustic feature (T, D) + :return: encoder outputs + :rtype: torch.Tensor + """ + self.eval() + x = torch.as_tensor(x).unsqueeze(0) + if extract_resnet_feats: + resnet_feats = self.encoder( + x, + None, + extract_resnet_feats=extract_resnet_feats, + ) + return resnet_feats.squeeze(0) + else: + enc_output, _ = self.encoder(x, None) + return enc_output.squeeze(0) diff --git a/espnet/nets/pytorch_backend/e2e_asr_transformer_av.py b/espnet/nets/pytorch_backend/e2e_asr_transformer_av.py new file mode 100644 index 0000000000000000000000000000000000000000..9f4cb81b933da330933807bda6c32735d0df0f25 --- /dev/null +++ b/espnet/nets/pytorch_backend/e2e_asr_transformer_av.py @@ -0,0 +1,352 @@ +# Copyright 2019 Shigeki Karita +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +"""Transformer speech recognition model (pytorch).""" + +from argparse import Namespace +from distutils.util import strtobool +import logging +import math + +import numpy +import torch + +from espnet.nets.ctc_prefix_score import CTCPrefixScore +from espnet.nets.e2e_asr_common import end_detect +from espnet.nets.e2e_asr_common import ErrorCalculator +from espnet.nets.pytorch_backend.ctc import CTC +from espnet.nets.pytorch_backend.nets_utils import get_subsample +from espnet.nets.pytorch_backend.nets_utils import make_non_pad_mask +from espnet.nets.pytorch_backend.nets_utils import th_accuracy +from espnet.nets.pytorch_backend.transformer.add_sos_eos import add_sos_eos +from espnet.nets.pytorch_backend.transformer.attention import ( + MultiHeadedAttention, # noqa: H301 + RelPositionMultiHeadedAttention, # noqa: H301 +) +from espnet.nets.pytorch_backend.transformer.decoder import Decoder +from espnet.nets.pytorch_backend.transformer.encoder import Encoder +from espnet.nets.pytorch_backend.transformer.label_smoothing_loss import ( + LabelSmoothingLoss, # noqa: H301 +) +from espnet.nets.pytorch_backend.transformer.mask import subsequent_mask +from espnet.nets.pytorch_backend.transformer.mask import target_mask +from espnet.nets.scorers.ctc import CTCPrefixScorer +from espnet.nets.pytorch_backend.nets_utils import MLPHead + + +class E2E(torch.nn.Module): + """E2E module. + + :param int idim: dimension of inputs + :param int odim: dimension of outputs + :param Namespace args: argument Namespace containing options + + """ + + @staticmethod + def add_arguments(parser): + """Add arguments.""" + group = parser.add_argument_group("transformer model setting") + + group.add_argument( + "--transformer-init", + type=str, + default="pytorch", + choices=[ + "pytorch", + "xavier_uniform", + "xavier_normal", + "kaiming_uniform", + "kaiming_normal", + ], + help="how to initialize transformer parameters", + ) + group.add_argument( + "--transformer-input-layer", + type=str, + default="conv2d", + choices=["conv3d", "conv2d", "conv1d", "linear", "embed"], + help="transformer input layer type", + ) + group.add_argument( + "--transformer-encoder-attn-layer-type", + type=str, + default="mha", + choices=["mha", "rel_mha", "legacy_rel_mha"], + help="transformer encoder attention layer type", + ) + group.add_argument( + "--transformer-attn-dropout-rate", + default=None, + type=float, + help="dropout in transformer attention. use --dropout-rate if None is set", + ) + group.add_argument( + "--transformer-lr", + default=10.0, + type=float, + help="Initial value of learning rate", + ) + group.add_argument( + "--transformer-warmup-steps", + default=25000, + type=int, + help="optimizer warmup steps", + ) + group.add_argument( + "--transformer-length-normalized-loss", + default=True, + type=strtobool, + help="normalize loss by length", + ) + group.add_argument( + "--dropout-rate", + default=0.0, + type=float, + help="Dropout rate for the encoder", + ) + group.add_argument( + "--macaron-style", + default=False, + type=strtobool, + help="Whether to use macaron style for positionwise layer", + ) + # -- input + group.add_argument( + "--a-upsample-ratio", + default=1, + type=int, + help="Upsample rate for audio", + ) + group.add_argument( + "--relu-type", + default="swish", + type=str, + help="the type of activation layer", + ) + # Encoder + group.add_argument( + "--elayers", + default=4, + type=int, + help="Number of encoder layers (for shared recognition part " + "in multi-speaker asr mode)", + ) + group.add_argument( + "--eunits", + "-u", + default=300, + type=int, + help="Number of encoder hidden units", + ) + group.add_argument( + "--use-cnn-module", + default=False, + type=strtobool, + help="Use convolution module or not", + ) + group.add_argument( + "--cnn-module-kernel", + default=31, + type=int, + help="Kernel size of convolution module.", + ) + # Attention + group.add_argument( + "--adim", + default=320, + type=int, + help="Number of attention transformation dimensions", + ) + group.add_argument( + "--aheads", + default=4, + type=int, + help="Number of heads for multi head attention", + ) + group.add_argument( + "--zero-triu", + default=False, + type=strtobool, + help="If true, zero the uppper triangular part of attention matrix.", + ) + # Relative positional encoding + group.add_argument( + "--rel-pos-type", + type=str, + default="legacy", + choices=["legacy", "latest"], + help="Whether to use the latest relative positional encoding or the legacy one." + "The legacy relative positional encoding will be deprecated in the future." + "More Details can be found in https://github.com/espnet/espnet/pull/2816.", + ) + # Decoder + group.add_argument( + "--dlayers", default=1, type=int, help="Number of decoder layers" + ) + group.add_argument( + "--dunits", default=320, type=int, help="Number of decoder hidden units" + ) + # -- pretrain + group.add_argument("--pretrain-dataset", + default="", + type=str, + help='pre-trained dataset for encoder' + ) + # -- custom name + group.add_argument("--custom-pretrain-name", + default="", + type=str, + help='pre-trained model for encoder' + ) + return parser + + @property + def attention_plot_class(self): + """Return PlotAttentionReport.""" + return PlotAttentionReport + + def __init__(self, odim, args, ignore_id=-1): + """Construct an E2E object. + :param int odim: dimension of outputs + :param Namespace args: argument Namespace containing options + """ + torch.nn.Module.__init__(self) + if args.transformer_attn_dropout_rate is None: + args.transformer_attn_dropout_rate = args.dropout_rate + # Check the relative positional encoding type + self.rel_pos_type = getattr(args, "rel_pos_type", None) + if self.rel_pos_type is None and args.transformer_encoder_attn_layer_type == "rel_mha": + args.transformer_encoder_attn_layer_type = "legacy_rel_mha" + logging.warning( + "Using legacy_rel_pos and it will be deprecated in the future." + ) + + idim = 80 + + self.encoder = Encoder( + idim=idim, + attention_dim=args.adim, + attention_heads=args.aheads, + linear_units=args.eunits, + num_blocks=args.elayers, + input_layer=args.transformer_input_layer, + dropout_rate=args.dropout_rate, + positional_dropout_rate=args.dropout_rate, + attention_dropout_rate=args.transformer_attn_dropout_rate, + encoder_attn_layer_type=args.transformer_encoder_attn_layer_type, + macaron_style=args.macaron_style, + use_cnn_module=args.use_cnn_module, + cnn_module_kernel=args.cnn_module_kernel, + zero_triu=getattr(args, "zero_triu", False), + a_upsample_ratio=args.a_upsample_ratio, + relu_type=getattr(args, "relu_type", "swish"), + ) + + self.transformer_input_layer = args.transformer_input_layer + self.a_upsample_ratio = args.a_upsample_ratio + + self.aux_encoder = Encoder( + idim=idim, + attention_dim=args.aux_adim, + attention_heads=args.aux_aheads, + linear_units=args.aux_eunits, + num_blocks=args.aux_elayers, + input_layer=args.aux_transformer_input_layer, + dropout_rate=args.aux_dropout_rate, + positional_dropout_rate=args.aux_dropout_rate, + attention_dropout_rate=args.aux_transformer_attn_dropout_rate, + encoder_attn_layer_type=args.aux_transformer_encoder_attn_layer_type, + macaron_style=args.aux_macaron_style, + use_cnn_module=args.aux_use_cnn_module, + cnn_module_kernel=args.aux_cnn_module_kernel, + zero_triu=getattr(args, "aux_zero_triu", False), + a_upsample_ratio=args.aux_a_upsample_ratio, + relu_type=getattr(args, "aux_relu_type", "swish"), + ) + self.aux_transformer_input_layer = args.aux_transformer_input_layer + + self.fusion = MLPHead( + idim=args.adim + args.aux_adim, + hdim=args.fusion_hdim, + odim=args.adim, + norm=args.fusion_norm, + ) + + if args.mtlalpha < 1: + self.decoder = Decoder( + odim=odim, + attention_dim=args.adim, + attention_heads=args.aheads, + linear_units=args.dunits, + num_blocks=args.dlayers, + dropout_rate=args.dropout_rate, + positional_dropout_rate=args.dropout_rate, + self_attention_dropout_rate=args.transformer_attn_dropout_rate, + src_attention_dropout_rate=args.transformer_attn_dropout_rate, + ) + else: + self.decoder = None + self.blank = 0 + self.sos = odim - 1 + self.eos = odim - 1 + self.odim = odim + self.ignore_id = ignore_id + self.subsample = get_subsample(args, mode="asr", arch="transformer") + + # self.lsm_weight = a + self.criterion = LabelSmoothingLoss( + self.odim, + self.ignore_id, + args.lsm_weight, + args.transformer_length_normalized_loss, + ) + + self.adim = args.adim + self.mtlalpha = args.mtlalpha + if args.mtlalpha > 0.0: + self.ctc = CTC( + odim, args.adim, args.dropout_rate, ctc_type=args.ctc_type, reduce=True + ) + else: + self.ctc = None + + if args.report_cer or args.report_wer: + self.error_calculator = ErrorCalculator( + args.char_list, + args.sym_space, + args.sym_blank, + args.report_cer, + args.report_wer, + ) + else: + self.error_calculator = None + self.rnnlm = None + + def scorers(self): + """Scorers.""" + return dict(decoder=self.decoder, ctc=CTCPrefixScorer(self.ctc, self.eos)) + + def encode(self, x, aux_x, extract_resnet_feats=False): + """Encode acoustic features. + + :param ndarray x: source acoustic feature (T, D) + :return: encoder outputs + :rtype: torch.Tensor + """ + self.eval() + if extract_resnet_feats: + x = torch.as_tensor(x).unsqueeze(0) + resnet_feats = self.encoder( + x, + None, + extract_resnet_feats=extract_resnet_feats, + ) + return resnet_feats.squeeze(0) + else: + x = torch.as_tensor(x).unsqueeze(0) + aux_x = torch.as_tensor(aux_x).unsqueeze(0) + feat, _ = self.encoder(x, None) + aux_feat, _ = self.aux_encoder(aux_x, None) + fus_output = self.fusion(torch.cat((feat, aux_feat), dim=-1)) + return fus_output.squeeze(0) diff --git a/espnet/nets/pytorch_backend/lm/__init__.py b/espnet/nets/pytorch_backend/lm/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b7f177368e62a5578b8706300e101f831a3972ac --- /dev/null +++ b/espnet/nets/pytorch_backend/lm/__init__.py @@ -0,0 +1 @@ +"""Initialize sub package.""" diff --git a/espnet/nets/pytorch_backend/lm/default.py b/espnet/nets/pytorch_backend/lm/default.py new file mode 100644 index 0000000000000000000000000000000000000000..01bb26ea4a071e1672952ee0cfb754d16ad6d8e6 --- /dev/null +++ b/espnet/nets/pytorch_backend/lm/default.py @@ -0,0 +1,431 @@ +"""Default Recurrent Neural Network Languge Model in `lm_train.py`.""" + +from typing import Any +from typing import List +from typing import Tuple + +import logging +import torch +import torch.nn as nn +import torch.nn.functional as F + +from espnet.nets.lm_interface import LMInterface +from espnet.nets.pytorch_backend.e2e_asr import to_device +from espnet.nets.scorer_interface import BatchScorerInterface +from espnet.utils.cli_utils import strtobool + + +class DefaultRNNLM(BatchScorerInterface, LMInterface, nn.Module): + """Default RNNLM for `LMInterface` Implementation. + + Note: + PyTorch seems to have memory leak when one GPU compute this after data parallel. + If parallel GPUs compute this, it seems to be fine. + See also https://github.com/espnet/espnet/issues/1075 + + """ + + @staticmethod + def add_arguments(parser): + """Add arguments to command line argument parser.""" + parser.add_argument( + "--type", + type=str, + default="lstm", + nargs="?", + choices=["lstm", "gru"], + help="Which type of RNN to use", + ) + parser.add_argument( + "--layer", "-l", type=int, default=2, help="Number of hidden layers" + ) + parser.add_argument( + "--unit", "-u", type=int, default=650, help="Number of hidden units" + ) + parser.add_argument( + "--embed-unit", + default=None, + type=int, + help="Number of hidden units in embedding layer, " + "if it is not specified, it keeps the same number with hidden units.", + ) + parser.add_argument( + "--dropout-rate", type=float, default=0.5, help="dropout probability" + ) + parser.add_argument( + "--emb-dropout-rate", + type=float, + default=0.0, + help="emb dropout probability", + ) + parser.add_argument( + "--tie-weights", + type=strtobool, + default=False, + help="Tie input and output embeddings", + ) + return parser + + def __init__(self, n_vocab, args): + """Initialize class. + + Args: + n_vocab (int): The size of the vocabulary + args (argparse.Namespace): configurations. see py:method:`add_arguments` + + """ + nn.Module.__init__(self) + # NOTE: for a compatibility with less than 0.5.0 version models + dropout_rate = getattr(args, "dropout_rate", 0.0) + # NOTE: for a compatibility with less than 0.6.1 version models + embed_unit = getattr(args, "embed_unit", None) + # NOTE: for a compatibility with less than 0.9.7 version models + emb_dropout_rate = getattr(args, "emb_dropout_rate", 0.0) + # NOTE: for a compatibility with less than 0.9.7 version models + tie_weights = getattr(args, "tie_weights", False) + + self.model = ClassifierWithState( + RNNLM( + n_vocab, + args.layer, + args.unit, + embed_unit, + args.type, + dropout_rate, + emb_dropout_rate, + tie_weights, + ) + ) + + def state_dict(self): + """Dump state dict.""" + return self.model.state_dict() + + def load_state_dict(self, d): + """Load state dict.""" + self.model.load_state_dict(d) + + def forward(self, x, t): + """Compute LM loss value from buffer sequences. + + Args: + x (torch.Tensor): Input ids. (batch, len) + t (torch.Tensor): Target ids. (batch, len) + + Returns: + tuple[torch.Tensor, torch.Tensor, torch.Tensor]: Tuple of + loss to backward (scalar), + negative log-likelihood of t: -log p(t) (scalar) and + the number of elements in x (scalar) + + Notes: + The last two return values are used + in perplexity: p(t)^{-n} = exp(-log p(t) / n) + + """ + loss = 0 + logp = 0 + count = torch.tensor(0).long() + state = None + batch_size, sequence_length = x.shape + for i in range(sequence_length): + # Compute the loss at this time step and accumulate it + state, loss_batch = self.model(state, x[:, i], t[:, i]) + non_zeros = torch.sum(x[:, i] != 0, dtype=loss_batch.dtype) + loss += loss_batch.mean() * non_zeros + logp += torch.sum(loss_batch * non_zeros) + count += int(non_zeros) + return loss / batch_size, loss, count.to(loss.device) + + def score(self, y, state, x): + """Score new token. + + Args: + y (torch.Tensor): 1D torch.int64 prefix tokens. + state: Scorer state for prefix tokens + x (torch.Tensor): 2D encoder feature that generates ys. + + Returns: + tuple[torch.Tensor, Any]: Tuple of + torch.float32 scores for next token (n_vocab) + and next state for ys + + """ + new_state, scores = self.model.predict(state, y[-1].unsqueeze(0)) + return scores.squeeze(0), new_state + + def final_score(self, state): + """Score eos. + + Args: + state: Scorer state for prefix tokens + + Returns: + float: final score + + """ + return self.model.final(state) + + # batch beam search API (see BatchScorerInterface) + def batch_score( + self, ys: torch.Tensor, states: List[Any], xs: torch.Tensor + ) -> Tuple[torch.Tensor, List[Any]]: + """Score new token batch. + + Args: + ys (torch.Tensor): torch.int64 prefix tokens (n_batch, ylen). + states (List[Any]): Scorer states for prefix tokens. + xs (torch.Tensor): + The encoder feature that generates ys (n_batch, xlen, n_feat). + + Returns: + tuple[torch.Tensor, List[Any]]: Tuple of + batchfied scores for next token with shape of `(n_batch, n_vocab)` + and next state list for ys. + + """ + # merge states + n_batch = len(ys) + n_layers = self.model.predictor.n_layers + if self.model.predictor.typ == "lstm": + keys = ("c", "h") + else: + keys = ("h",) + + if states[0] is None: + states = None + else: + # transpose state of [batch, key, layer] into [key, layer, batch] + states = { + k: [ + torch.stack([states[b][k][i] for b in range(n_batch)]) + for i in range(n_layers) + ] + for k in keys + } + states, logp = self.model.predict(states, ys[:, -1]) + + # transpose state of [key, layer, batch] into [batch, key, layer] + return ( + logp, + [ + {k: [states[k][i][b] for i in range(n_layers)] for k in keys} + for b in range(n_batch) + ], + ) + + +class ClassifierWithState(nn.Module): + """A wrapper for pytorch RNNLM.""" + + def __init__( + self, predictor, lossfun=nn.CrossEntropyLoss(reduction="none"), label_key=-1 + ): + """Initialize class. + + :param torch.nn.Module predictor : The RNNLM + :param function lossfun : The loss function to use + :param int/str label_key : + + """ + if not (isinstance(label_key, (int, str))): + raise TypeError("label_key must be int or str, but is %s" % type(label_key)) + super(ClassifierWithState, self).__init__() + self.lossfun = lossfun + self.y = None + self.loss = None + self.label_key = label_key + self.predictor = predictor + + def forward(self, state, *args, **kwargs): + """Compute the loss value for an input and label pair. + + Notes: + It also computes accuracy and stores it to the attribute. + When ``label_key`` is ``int``, the corresponding element in ``args`` + is treated as ground truth labels. And when it is ``str``, the + element in ``kwargs`` is used. + The all elements of ``args`` and ``kwargs`` except the groundtruth + labels are features. + It feeds features to the predictor and compare the result + with ground truth labels. + + :param torch.Tensor state : the LM state + :param list[torch.Tensor] args : Input minibatch + :param dict[torch.Tensor] kwargs : Input minibatch + :return loss value + :rtype torch.Tensor + + """ + if isinstance(self.label_key, int): + if not (-len(args) <= self.label_key < len(args)): + msg = "Label key %d is out of bounds" % self.label_key + raise ValueError(msg) + t = args[self.label_key] + if self.label_key == -1: + args = args[:-1] + else: + args = args[: self.label_key] + args[self.label_key + 1 :] + elif isinstance(self.label_key, str): + if self.label_key not in kwargs: + msg = 'Label key "%s" is not found' % self.label_key + raise ValueError(msg) + t = kwargs[self.label_key] + del kwargs[self.label_key] + + self.y = None + self.loss = None + state, self.y = self.predictor(state, *args, **kwargs) + self.loss = self.lossfun(self.y, t) + return state, self.loss + + def predict(self, state, x): + """Predict log probabilities for given state and input x using the predictor. + + :param torch.Tensor state : The current state + :param torch.Tensor x : The input + :return a tuple (new state, log prob vector) + :rtype (torch.Tensor, torch.Tensor) + """ + if hasattr(self.predictor, "normalized") and self.predictor.normalized: + return self.predictor(state, x) + else: + state, z = self.predictor(state, x) + return state, F.log_softmax(z, dim=1) + + def buff_predict(self, state, x, n): + """Predict new tokens from buffered inputs.""" + if self.predictor.__class__.__name__ == "RNNLM": + return self.predict(state, x) + + new_state = [] + new_log_y = [] + for i in range(n): + state_i = None if state is None else state[i] + state_i, log_y = self.predict(state_i, x[i].unsqueeze(0)) + new_state.append(state_i) + new_log_y.append(log_y) + + return new_state, torch.cat(new_log_y) + + def final(self, state, index=None): + """Predict final log probabilities for given state using the predictor. + + :param state: The state + :return The final log probabilities + :rtype torch.Tensor + """ + if hasattr(self.predictor, "final"): + if index is not None: + return self.predictor.final(state[index]) + else: + return self.predictor.final(state) + else: + return 0.0 + + +# Definition of a recurrent net for language modeling +class RNNLM(nn.Module): + """A pytorch RNNLM.""" + + def __init__( + self, + n_vocab, + n_layers, + n_units, + n_embed=None, + typ="lstm", + dropout_rate=0.5, + emb_dropout_rate=0.0, + tie_weights=False, + ): + """Initialize class. + + :param int n_vocab: The size of the vocabulary + :param int n_layers: The number of layers to create + :param int n_units: The number of units per layer + :param str typ: The RNN type + """ + super(RNNLM, self).__init__() + if n_embed is None: + n_embed = n_units + + self.embed = nn.Embedding(n_vocab, n_embed) + + if emb_dropout_rate == 0.0: + self.embed_drop = None + else: + self.embed_drop = nn.Dropout(emb_dropout_rate) + + if typ == "lstm": + self.rnn = nn.ModuleList( + [nn.LSTMCell(n_embed, n_units)] + + [nn.LSTMCell(n_units, n_units) for _ in range(n_layers - 1)] + ) + else: + self.rnn = nn.ModuleList( + [nn.GRUCell(n_embed, n_units)] + + [nn.GRUCell(n_units, n_units) for _ in range(n_layers - 1)] + ) + + self.dropout = nn.ModuleList( + [nn.Dropout(dropout_rate) for _ in range(n_layers + 1)] + ) + self.lo = nn.Linear(n_units, n_vocab) + self.n_layers = n_layers + self.n_units = n_units + self.typ = typ + + logging.info("Tie weights set to {}".format(tie_weights)) + logging.info("Dropout set to {}".format(dropout_rate)) + logging.info("Emb Dropout set to {}".format(emb_dropout_rate)) + + if tie_weights: + assert ( + n_embed == n_units + ), "Tie Weights: True need embedding and final dimensions to match" + self.lo.weight = self.embed.weight + + # initialize parameters from uniform distribution + for param in self.parameters(): + param.data.uniform_(-0.1, 0.1) + + def zero_state(self, batchsize): + """Initialize state.""" + p = next(self.parameters()) + return torch.zeros(batchsize, self.n_units).to(device=p.device, dtype=p.dtype) + + def forward(self, state, x): + """Forward neural networks.""" + if state is None: + h = [to_device(x, self.zero_state(x.size(0))) for n in range(self.n_layers)] + state = {"h": h} + if self.typ == "lstm": + c = [ + to_device(x, self.zero_state(x.size(0))) + for n in range(self.n_layers) + ] + state = {"c": c, "h": h} + + h = [None] * self.n_layers + if self.embed_drop is not None: + emb = self.embed_drop(self.embed(x)) + else: + emb = self.embed(x) + if self.typ == "lstm": + c = [None] * self.n_layers + h[0], c[0] = self.rnn[0]( + self.dropout[0](emb), (state["h"][0], state["c"][0]) + ) + for n in range(1, self.n_layers): + h[n], c[n] = self.rnn[n]( + self.dropout[n](h[n - 1]), (state["h"][n], state["c"][n]) + ) + state = {"c": c, "h": h} + else: + h[0] = self.rnn[0](self.dropout[0](emb), state["h"][0]) + for n in range(1, self.n_layers): + h[n] = self.rnn[n](self.dropout[n](h[n - 1]), state["h"][n]) + state = {"h": h} + y = self.lo(self.dropout[-1](h[-1])) + return state, y diff --git a/espnet/nets/pytorch_backend/lm/seq_rnn.py b/espnet/nets/pytorch_backend/lm/seq_rnn.py new file mode 100644 index 0000000000000000000000000000000000000000..ee5f026e3811c790f283dc9298e1221d783c0e4f --- /dev/null +++ b/espnet/nets/pytorch_backend/lm/seq_rnn.py @@ -0,0 +1,178 @@ +"""Sequential implementation of Recurrent Neural Network Language Model.""" + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from espnet.nets.lm_interface import LMInterface + + +class SequentialRNNLM(LMInterface, torch.nn.Module): + """Sequential RNNLM. + + See also: + https://github.com/pytorch/examples/blob/4581968193699de14b56527296262dd76ab43557/word_language_model/model.py + + """ + + @staticmethod + def add_arguments(parser): + """Add arguments to command line argument parser.""" + parser.add_argument( + "--type", + type=str, + default="lstm", + nargs="?", + choices=["lstm", "gru"], + help="Which type of RNN to use", + ) + parser.add_argument( + "--layer", "-l", type=int, default=2, help="Number of hidden layers" + ) + parser.add_argument( + "--unit", "-u", type=int, default=650, help="Number of hidden units" + ) + parser.add_argument( + "--dropout-rate", type=float, default=0.5, help="dropout probability" + ) + return parser + + def __init__(self, n_vocab, args): + """Initialize class. + + Args: + n_vocab (int): The size of the vocabulary + args (argparse.Namespace): configurations. see py:method:`add_arguments` + + """ + torch.nn.Module.__init__(self) + self._setup( + rnn_type=args.type.upper(), + ntoken=n_vocab, + ninp=args.unit, + nhid=args.unit, + nlayers=args.layer, + dropout=args.dropout_rate, + ) + + def _setup( + self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, tie_weights=False + ): + self.drop = nn.Dropout(dropout) + self.encoder = nn.Embedding(ntoken, ninp) + if rnn_type in ["LSTM", "GRU"]: + self.rnn = getattr(nn, rnn_type)(ninp, nhid, nlayers, dropout=dropout) + else: + try: + nonlinearity = {"RNN_TANH": "tanh", "RNN_RELU": "relu"}[rnn_type] + except KeyError: + raise ValueError( + "An invalid option for `--model` was supplied, " + "options are ['LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']" + ) + self.rnn = nn.RNN( + ninp, nhid, nlayers, nonlinearity=nonlinearity, dropout=dropout + ) + self.decoder = nn.Linear(nhid, ntoken) + + # Optionally tie weights as in: + # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) + # https://arxiv.org/abs/1608.05859 + # and + # "Tying Word Vectors and Word Classifiers: + # A Loss Framework for Language Modeling" (Inan et al. 2016) + # https://arxiv.org/abs/1611.01462 + if tie_weights: + if nhid != ninp: + raise ValueError( + "When using the tied flag, nhid must be equal to emsize" + ) + self.decoder.weight = self.encoder.weight + + self._init_weights() + + self.rnn_type = rnn_type + self.nhid = nhid + self.nlayers = nlayers + + def _init_weights(self): + # NOTE: original init in pytorch/examples + # initrange = 0.1 + # self.encoder.weight.data.uniform_(-initrange, initrange) + # self.decoder.bias.data.zero_() + # self.decoder.weight.data.uniform_(-initrange, initrange) + # NOTE: our default.py:RNNLM init + for param in self.parameters(): + param.data.uniform_(-0.1, 0.1) + + def forward(self, x, t): + """Compute LM loss value from buffer sequences. + + Args: + x (torch.Tensor): Input ids. (batch, len) + t (torch.Tensor): Target ids. (batch, len) + + Returns: + tuple[torch.Tensor, torch.Tensor, torch.Tensor]: Tuple of + loss to backward (scalar), + negative log-likelihood of t: -log p(t) (scalar) and + the number of elements in x (scalar) + + Notes: + The last two return values are used + in perplexity: p(t)^{-n} = exp(-log p(t) / n) + + """ + y = self._before_loss(x, None)[0] + mask = (x != 0).to(y.dtype) + loss = F.cross_entropy(y.view(-1, y.shape[-1]), t.view(-1), reduction="none") + logp = loss * mask.view(-1) + logp = logp.sum() + count = mask.sum() + return logp / count, logp, count + + def _before_loss(self, input, hidden): + emb = self.drop(self.encoder(input)) + output, hidden = self.rnn(emb, hidden) + output = self.drop(output) + decoded = self.decoder( + output.view(output.size(0) * output.size(1), output.size(2)) + ) + return decoded.view(output.size(0), output.size(1), decoded.size(1)), hidden + + def init_state(self, x): + """Get an initial state for decoding. + + Args: + x (torch.Tensor): The encoded feature tensor + + Returns: initial state + + """ + bsz = 1 + weight = next(self.parameters()) + if self.rnn_type == "LSTM": + return ( + weight.new_zeros(self.nlayers, bsz, self.nhid), + weight.new_zeros(self.nlayers, bsz, self.nhid), + ) + else: + return weight.new_zeros(self.nlayers, bsz, self.nhid) + + def score(self, y, state, x): + """Score new token. + + Args: + y (torch.Tensor): 1D torch.int64 prefix tokens. + state: Scorer state for prefix tokens + x (torch.Tensor): 2D encoder feature that generates ys. + + Returns: + tuple[torch.Tensor, Any]: Tuple of + torch.float32 scores for next token (n_vocab) + and next state for ys + + """ + y, new_state = self._before_loss(y[-1].view(1, 1), state) + logp = y.log_softmax(dim=-1).view(-1) + return logp, new_state diff --git a/espnet/nets/pytorch_backend/lm/transformer.py b/espnet/nets/pytorch_backend/lm/transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..42c2f86d461b5d6125f4b5455b7b31cd6944f75d --- /dev/null +++ b/espnet/nets/pytorch_backend/lm/transformer.py @@ -0,0 +1,252 @@ +"""Transformer language model.""" + +from typing import Any +from typing import List +from typing import Tuple + +import logging +import torch +import torch.nn as nn +import torch.nn.functional as F + +from espnet.nets.lm_interface import LMInterface +from espnet.nets.pytorch_backend.transformer.embedding import PositionalEncoding +from espnet.nets.pytorch_backend.transformer.encoder import Encoder +from espnet.nets.pytorch_backend.transformer.mask import subsequent_mask +from espnet.nets.scorer_interface import BatchScorerInterface +from espnet.utils.cli_utils import strtobool + + +class TransformerLM(nn.Module, LMInterface, BatchScorerInterface): + """Transformer language model.""" + + @staticmethod + def add_arguments(parser): + """Add arguments to command line argument parser.""" + parser.add_argument( + "--layer", type=int, default=4, help="Number of hidden layers" + ) + parser.add_argument( + "--unit", + type=int, + default=1024, + help="Number of hidden units in feedforward layer", + ) + parser.add_argument( + "--att-unit", + type=int, + default=256, + help="Number of hidden units in attention layer", + ) + parser.add_argument( + "--embed-unit", + type=int, + default=128, + help="Number of hidden units in embedding layer", + ) + parser.add_argument( + "--head", type=int, default=2, help="Number of multi head attention" + ) + parser.add_argument( + "--dropout-rate", type=float, default=0.5, help="dropout probability" + ) + parser.add_argument( + "--att-dropout-rate", + type=float, + default=0.0, + help="att dropout probability", + ) + parser.add_argument( + "--emb-dropout-rate", + type=float, + default=0.0, + help="emb dropout probability", + ) + parser.add_argument( + "--tie-weights", + type=strtobool, + default=False, + help="Tie input and output embeddings", + ) + parser.add_argument( + "--pos-enc", + default="sinusoidal", + choices=["sinusoidal", "none"], + help="positional encoding", + ) + return parser + + def __init__(self, n_vocab, args): + """Initialize class. + + Args: + n_vocab (int): The size of the vocabulary + args (argparse.Namespace): configurations. see py:method:`add_arguments` + + """ + nn.Module.__init__(self) + + # NOTE: for a compatibility with less than 0.9.7 version models + emb_dropout_rate = getattr(args, "emb_dropout_rate", 0.0) + # NOTE: for a compatibility with less than 0.9.7 version models + tie_weights = getattr(args, "tie_weights", False) + # NOTE: for a compatibility with less than 0.9.7 version models + att_dropout_rate = getattr(args, "att_dropout_rate", 0.0) + + if args.pos_enc == "sinusoidal": + pos_enc_class = PositionalEncoding + elif args.pos_enc == "none": + + def pos_enc_class(*args, **kwargs): + return nn.Sequential() # indentity + + else: + raise ValueError(f"unknown pos-enc option: {args.pos_enc}") + + self.embed = nn.Embedding(n_vocab, args.embed_unit) + + if emb_dropout_rate == 0.0: + self.embed_drop = None + else: + self.embed_drop = nn.Dropout(emb_dropout_rate) + + self.encoder = Encoder( + idim=args.embed_unit, + attention_dim=args.att_unit, + attention_heads=args.head, + linear_units=args.unit, + num_blocks=args.layer, + dropout_rate=args.dropout_rate, + attention_dropout_rate=att_dropout_rate, + input_layer="linear", + pos_enc_class=pos_enc_class, + ) + self.decoder = nn.Linear(args.att_unit, n_vocab) + + logging.info("Tie weights set to {}".format(tie_weights)) + logging.info("Dropout set to {}".format(args.dropout_rate)) + logging.info("Emb Dropout set to {}".format(emb_dropout_rate)) + logging.info("Att Dropout set to {}".format(att_dropout_rate)) + + if tie_weights: + assert ( + args.att_unit == args.embed_unit + ), "Tie Weights: True need embedding and final dimensions to match" + self.decoder.weight = self.embed.weight + + def _target_mask(self, ys_in_pad): + ys_mask = ys_in_pad != 0 + m = subsequent_mask(ys_mask.size(-1), device=ys_mask.device).unsqueeze(0) + return ys_mask.unsqueeze(-2) & m + + def forward( + self, x: torch.Tensor, t: torch.Tensor + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """Compute LM loss value from buffer sequences. + + Args: + x (torch.Tensor): Input ids. (batch, len) + t (torch.Tensor): Target ids. (batch, len) + + Returns: + tuple[torch.Tensor, torch.Tensor, torch.Tensor]: Tuple of + loss to backward (scalar), + negative log-likelihood of t: -log p(t) (scalar) and + the number of elements in x (scalar) + + Notes: + The last two return values are used + in perplexity: p(t)^{-n} = exp(-log p(t) / n) + + """ + xm = x != 0 + + if self.embed_drop is not None: + emb = self.embed_drop(self.embed(x)) + else: + emb = self.embed(x) + + h, _ = self.encoder(emb, self._target_mask(x)) + y = self.decoder(h) + loss = F.cross_entropy(y.view(-1, y.shape[-1]), t.view(-1), reduction="none") + mask = xm.to(dtype=loss.dtype) + logp = loss * mask.view(-1) + logp = logp.sum() + count = mask.sum() + return logp / count, logp, count + + def score( + self, y: torch.Tensor, state: Any, x: torch.Tensor + ) -> Tuple[torch.Tensor, Any]: + """Score new token. + + Args: + y (torch.Tensor): 1D torch.int64 prefix tokens. + state: Scorer state for prefix tokens + x (torch.Tensor): encoder feature that generates ys. + + Returns: + tuple[torch.Tensor, Any]: Tuple of + torch.float32 scores for next token (n_vocab) + and next state for ys + + """ + y = y.unsqueeze(0) + + if self.embed_drop is not None: + emb = self.embed_drop(self.embed(y)) + else: + emb = self.embed(y) + + h, _, cache = self.encoder.forward_one_step( + emb, self._target_mask(y), cache=state + ) + h = self.decoder(h[:, -1]) + logp = h.log_softmax(dim=-1).squeeze(0) + return logp, cache + + # batch beam search API (see BatchScorerInterface) + def batch_score( + self, ys: torch.Tensor, states: List[Any], xs: torch.Tensor + ) -> Tuple[torch.Tensor, List[Any]]: + """Score new token batch (required). + + Args: + ys (torch.Tensor): torch.int64 prefix tokens (n_batch, ylen). + states (List[Any]): Scorer states for prefix tokens. + xs (torch.Tensor): + The encoder feature that generates ys (n_batch, xlen, n_feat). + + Returns: + tuple[torch.Tensor, List[Any]]: Tuple of + batchfied scores for next token with shape of `(n_batch, n_vocab)` + and next state list for ys. + + """ + # merge states + n_batch = len(ys) + n_layers = len(self.encoder.encoders) + if states[0] is None: + batch_state = None + else: + # transpose state of [batch, layer] into [layer, batch] + batch_state = [ + torch.stack([states[b][i] for b in range(n_batch)]) + for i in range(n_layers) + ] + + if self.embed_drop is not None: + emb = self.embed_drop(self.embed(ys)) + else: + emb = self.embed(ys) + + # batch decoding + h, _, states = self.encoder.forward_one_step( + emb, self._target_mask(ys), cache=batch_state + ) + h = self.decoder(h[:, -1]) + logp = h.log_softmax(dim=-1) + + # transpose state of [layer, batch] into [batch, layer] + state_list = [[states[i][b] for i in range(n_layers)] for b in range(n_batch)] + return logp, state_list diff --git a/espnet/nets/pytorch_backend/nets_utils.py b/espnet/nets/pytorch_backend/nets_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..02dcbfa445a3212fac7316e6af666df328b6fcf3 --- /dev/null +++ b/espnet/nets/pytorch_backend/nets_utils.py @@ -0,0 +1,526 @@ +# -*- coding: utf-8 -*- + +"""Network related utility tools.""" + +import logging +from typing import Dict + +import numpy as np +import torch + + +def to_device(m, x): + """Send tensor into the device of the module. + + Args: + m (torch.nn.Module): Torch module. + x (Tensor): Torch tensor. + + Returns: + Tensor: Torch tensor located in the same place as torch module. + + """ + if isinstance(m, torch.nn.Module): + device = next(m.parameters()).device + elif isinstance(m, torch.Tensor): + device = m.device + else: + raise TypeError( + "Expected torch.nn.Module or torch.tensor, " f"bot got: {type(m)}" + ) + return x.to(device) + + +def pad_list(xs, pad_value): + """Perform padding for the list of tensors. + + Args: + xs (List): List of Tensors [(T_1, `*`), (T_2, `*`), ..., (T_B, `*`)]. + pad_value (float): Value for padding. + + Returns: + Tensor: Padded tensor (B, Tmax, `*`). + + Examples: + >>> x = [torch.ones(4), torch.ones(2), torch.ones(1)] + >>> x + [tensor([1., 1., 1., 1.]), tensor([1., 1.]), tensor([1.])] + >>> pad_list(x, 0) + tensor([[1., 1., 1., 1.], + [1., 1., 0., 0.], + [1., 0., 0., 0.]]) + + """ + n_batch = len(xs) + max_len = max(x.size(0) for x in xs) + pad = xs[0].new(n_batch, max_len, *xs[0].size()[1:]).fill_(pad_value) + + for i in range(n_batch): + pad[i, : xs[i].size(0)] = xs[i] + + return pad + + +def make_pad_mask(lengths, xs=None, length_dim=-1, maxlen=None): + """Make mask tensor containing indices of padded part. + + Args: + lengths (LongTensor or List): Batch of lengths (B,). + xs (Tensor, optional): The reference tensor. + If set, masks will be the same shape as this tensor. + length_dim (int, optional): Dimension indicator of the above tensor. + See the example. + + Returns: + Tensor: Mask tensor containing indices of padded part. + dtype=torch.uint8 in PyTorch 1.2- + dtype=torch.bool in PyTorch 1.2+ (including 1.2) + + Examples: + With only lengths. + + >>> lengths = [5, 3, 2] + >>> make_pad_mask(lengths) + masks = [[0, 0, 0, 0 ,0], + [0, 0, 0, 1, 1], + [0, 0, 1, 1, 1]] + + With the reference tensor. + + >>> xs = torch.zeros((3, 2, 4)) + >>> make_pad_mask(lengths, xs) + tensor([[[0, 0, 0, 0], + [0, 0, 0, 0]], + [[0, 0, 0, 1], + [0, 0, 0, 1]], + [[0, 0, 1, 1], + [0, 0, 1, 1]]], dtype=torch.uint8) + >>> xs = torch.zeros((3, 2, 6)) + >>> make_pad_mask(lengths, xs) + tensor([[[0, 0, 0, 0, 0, 1], + [0, 0, 0, 0, 0, 1]], + [[0, 0, 0, 1, 1, 1], + [0, 0, 0, 1, 1, 1]], + [[0, 0, 1, 1, 1, 1], + [0, 0, 1, 1, 1, 1]]], dtype=torch.uint8) + + With the reference tensor and dimension indicator. + + >>> xs = torch.zeros((3, 6, 6)) + >>> make_pad_mask(lengths, xs, 1) + tensor([[[0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [1, 1, 1, 1, 1, 1]], + [[0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1]], + [[0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1]]], dtype=torch.uint8) + >>> make_pad_mask(lengths, xs, 2) + tensor([[[0, 0, 0, 0, 0, 1], + [0, 0, 0, 0, 0, 1], + [0, 0, 0, 0, 0, 1], + [0, 0, 0, 0, 0, 1], + [0, 0, 0, 0, 0, 1], + [0, 0, 0, 0, 0, 1]], + [[0, 0, 0, 1, 1, 1], + [0, 0, 0, 1, 1, 1], + [0, 0, 0, 1, 1, 1], + [0, 0, 0, 1, 1, 1], + [0, 0, 0, 1, 1, 1], + [0, 0, 0, 1, 1, 1]], + [[0, 0, 1, 1, 1, 1], + [0, 0, 1, 1, 1, 1], + [0, 0, 1, 1, 1, 1], + [0, 0, 1, 1, 1, 1], + [0, 0, 1, 1, 1, 1], + [0, 0, 1, 1, 1, 1]]], dtype=torch.uint8) + + """ + if length_dim == 0: + raise ValueError("length_dim cannot be 0: {}".format(length_dim)) + + if not isinstance(lengths, list): + lengths = lengths.tolist() + bs = int(len(lengths)) + if maxlen is None: + if xs is None: + maxlen = int(max(lengths)) + else: + maxlen = xs.size(length_dim) + else: + assert xs is None + assert maxlen >= int(max(lengths)) + + seq_range = torch.arange(0, maxlen, dtype=torch.int64) + seq_range_expand = seq_range.unsqueeze(0).expand(bs, maxlen) + seq_length_expand = seq_range_expand.new(lengths).unsqueeze(-1) + mask = seq_range_expand >= seq_length_expand + + if xs is not None: + assert xs.size(0) == bs, (xs.size(0), bs) + + if length_dim < 0: + length_dim = xs.dim() + length_dim + # ind = (:, None, ..., None, :, , None, ..., None) + ind = tuple( + slice(None) if i in (0, length_dim) else None for i in range(xs.dim()) + ) + mask = mask[ind].expand_as(xs).to(xs.device) + return mask + + +def make_non_pad_mask(lengths, xs=None, length_dim=-1): + """Make mask tensor containing indices of non-padded part. + + Args: + lengths (LongTensor or List): Batch of lengths (B,). + xs (Tensor, optional): The reference tensor. + If set, masks will be the same shape as this tensor. + length_dim (int, optional): Dimension indicator of the above tensor. + See the example. + + Returns: + ByteTensor: mask tensor containing indices of padded part. + dtype=torch.uint8 in PyTorch 1.2- + dtype=torch.bool in PyTorch 1.2+ (including 1.2) + + Examples: + With only lengths. + + >>> lengths = [5, 3, 2] + >>> make_non_pad_mask(lengths) + masks = [[1, 1, 1, 1 ,1], + [1, 1, 1, 0, 0], + [1, 1, 0, 0, 0]] + + With the reference tensor. + + >>> xs = torch.zeros((3, 2, 4)) + >>> make_non_pad_mask(lengths, xs) + tensor([[[1, 1, 1, 1], + [1, 1, 1, 1]], + [[1, 1, 1, 0], + [1, 1, 1, 0]], + [[1, 1, 0, 0], + [1, 1, 0, 0]]], dtype=torch.uint8) + >>> xs = torch.zeros((3, 2, 6)) + >>> make_non_pad_mask(lengths, xs) + tensor([[[1, 1, 1, 1, 1, 0], + [1, 1, 1, 1, 1, 0]], + [[1, 1, 1, 0, 0, 0], + [1, 1, 1, 0, 0, 0]], + [[1, 1, 0, 0, 0, 0], + [1, 1, 0, 0, 0, 0]]], dtype=torch.uint8) + + With the reference tensor and dimension indicator. + + >>> xs = torch.zeros((3, 6, 6)) + >>> make_non_pad_mask(lengths, xs, 1) + tensor([[[1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 0]], + [[1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0]], + [[1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0]]], dtype=torch.uint8) + >>> make_non_pad_mask(lengths, xs, 2) + tensor([[[1, 1, 1, 1, 1, 0], + [1, 1, 1, 1, 1, 0], + [1, 1, 1, 1, 1, 0], + [1, 1, 1, 1, 1, 0], + [1, 1, 1, 1, 1, 0], + [1, 1, 1, 1, 1, 0]], + [[1, 1, 1, 0, 0, 0], + [1, 1, 1, 0, 0, 0], + [1, 1, 1, 0, 0, 0], + [1, 1, 1, 0, 0, 0], + [1, 1, 1, 0, 0, 0], + [1, 1, 1, 0, 0, 0]], + [[1, 1, 0, 0, 0, 0], + [1, 1, 0, 0, 0, 0], + [1, 1, 0, 0, 0, 0], + [1, 1, 0, 0, 0, 0], + [1, 1, 0, 0, 0, 0], + [1, 1, 0, 0, 0, 0]]], dtype=torch.uint8) + + """ + return ~make_pad_mask(lengths, xs, length_dim) + + +def mask_by_length(xs, lengths, fill=0): + """Mask tensor according to length. + + Args: + xs (Tensor): Batch of input tensor (B, `*`). + lengths (LongTensor or List): Batch of lengths (B,). + fill (int or float): Value to fill masked part. + + Returns: + Tensor: Batch of masked input tensor (B, `*`). + + Examples: + >>> x = torch.arange(5).repeat(3, 1) + 1 + >>> x + tensor([[1, 2, 3, 4, 5], + [1, 2, 3, 4, 5], + [1, 2, 3, 4, 5]]) + >>> lengths = [5, 3, 2] + >>> mask_by_length(x, lengths) + tensor([[1, 2, 3, 4, 5], + [1, 2, 3, 0, 0], + [1, 2, 0, 0, 0]]) + + """ + assert xs.size(0) == len(lengths) + ret = xs.data.new(*xs.size()).fill_(fill) + for i, l in enumerate(lengths): + ret[i, :l] = xs[i, :l] + return ret + + +def th_accuracy(pad_outputs, pad_targets, ignore_label): + """Calculate accuracy. + + Args: + pad_outputs (Tensor): Prediction tensors (B * Lmax, D). + pad_targets (LongTensor): Target label tensors (B, Lmax, D). + ignore_label (int): Ignore label id. + + Returns: + float: Accuracy value (0.0 - 1.0). + + """ + pad_pred = pad_outputs.view( + pad_targets.size(0), pad_targets.size(1), pad_outputs.size(1) + ).argmax(2) + mask = pad_targets != ignore_label + numerator = torch.sum( + pad_pred.masked_select(mask) == pad_targets.masked_select(mask) + ) + denominator = torch.sum(mask) + return float(numerator) / float(denominator) + + +def to_torch_tensor(x): + """Change to torch.Tensor or ComplexTensor from numpy.ndarray. + + Args: + x: Inputs. It should be one of numpy.ndarray, Tensor, ComplexTensor, and dict. + + Returns: + Tensor or ComplexTensor: Type converted inputs. + + Examples: + >>> xs = np.ones(3, dtype=np.float32) + >>> xs = to_torch_tensor(xs) + tensor([1., 1., 1.]) + >>> xs = torch.ones(3, 4, 5) + >>> assert to_torch_tensor(xs) is xs + >>> xs = {'real': xs, 'imag': xs} + >>> to_torch_tensor(xs) + ComplexTensor( + Real: + tensor([1., 1., 1.]) + Imag; + tensor([1., 1., 1.]) + ) + + """ + # If numpy, change to torch tensor + if isinstance(x, np.ndarray): + if x.dtype.kind == "c": + # Dynamically importing because torch_complex requires python3 + from torch_complex.tensor import ComplexTensor + + return ComplexTensor(x) + else: + return torch.from_numpy(x) + + # If {'real': ..., 'imag': ...}, convert to ComplexTensor + elif isinstance(x, dict): + # Dynamically importing because torch_complex requires python3 + from torch_complex.tensor import ComplexTensor + + if "real" not in x or "imag" not in x: + raise ValueError("has 'real' and 'imag' keys: {}".format(list(x))) + # Relative importing because of using python3 syntax + return ComplexTensor(x["real"], x["imag"]) + + # If torch.Tensor, as it is + elif isinstance(x, torch.Tensor): + return x + + else: + error = ( + "x must be numpy.ndarray, torch.Tensor or a dict like " + "{{'real': torch.Tensor, 'imag': torch.Tensor}}, " + "but got {}".format(type(x)) + ) + try: + from torch_complex.tensor import ComplexTensor + except Exception: + # If PY2 + raise ValueError(error) + else: + # If PY3 + if isinstance(x, ComplexTensor): + return x + else: + raise ValueError(error) + + +def get_subsample(train_args, mode, arch): + """Parse the subsampling factors from the args for the specified `mode` and `arch`. + + Args: + train_args: argument Namespace containing options. + mode: one of ('asr', 'mt', 'st') + arch: one of ('rnn', 'rnn-t', 'rnn_mix', 'rnn_mulenc', 'transformer') + + Returns: + np.ndarray / List[np.ndarray]: subsampling factors. + """ + if arch == "transformer": + return np.array([1]) + + elif mode == "mt" and arch == "rnn": + # +1 means input (+1) and layers outputs (train_args.elayer) + subsample = np.ones(train_args.elayers + 1, dtype=np.int) + logging.warning("Subsampling is not performed for machine translation.") + logging.info("subsample: " + " ".join([str(x) for x in subsample])) + return subsample + + elif ( + (mode == "asr" and arch in ("rnn", "rnn-t")) + or (mode == "mt" and arch == "rnn") + or (mode == "st" and arch == "rnn") + ): + subsample = np.ones(train_args.elayers + 1, dtype=np.int) + if train_args.etype.endswith("p") and not train_args.etype.startswith("vgg"): + ss = train_args.subsample.split("_") + for j in range(min(train_args.elayers + 1, len(ss))): + subsample[j] = int(ss[j]) + else: + logging.warning( + "Subsampling is not performed for vgg*. " + "It is performed in max pooling layers at CNN." + ) + logging.info("subsample: " + " ".join([str(x) for x in subsample])) + return subsample + + elif mode == "asr" and arch == "rnn_mix": + subsample = np.ones( + train_args.elayers_sd + train_args.elayers + 1, dtype=np.int + ) + if train_args.etype.endswith("p") and not train_args.etype.startswith("vgg"): + ss = train_args.subsample.split("_") + for j in range( + min(train_args.elayers_sd + train_args.elayers + 1, len(ss)) + ): + subsample[j] = int(ss[j]) + else: + logging.warning( + "Subsampling is not performed for vgg*. " + "It is performed in max pooling layers at CNN." + ) + logging.info("subsample: " + " ".join([str(x) for x in subsample])) + return subsample + + elif mode == "asr" and arch == "rnn_mulenc": + subsample_list = [] + for idx in range(train_args.num_encs): + subsample = np.ones(train_args.elayers[idx] + 1, dtype=np.int) + if train_args.etype[idx].endswith("p") and not train_args.etype[ + idx + ].startswith("vgg"): + ss = train_args.subsample[idx].split("_") + for j in range(min(train_args.elayers[idx] + 1, len(ss))): + subsample[j] = int(ss[j]) + else: + logging.warning( + "Encoder %d: Subsampling is not performed for vgg*. " + "It is performed in max pooling layers at CNN.", + idx + 1, + ) + logging.info("subsample: " + " ".join([str(x) for x in subsample])) + subsample_list.append(subsample) + return subsample_list + + else: + raise ValueError("Invalid options: mode={}, arch={}".format(mode, arch)) + + +def rename_state_dict( + old_prefix: str, new_prefix: str, state_dict: Dict[str, torch.Tensor] +): + """Replace keys of old prefix with new prefix in state dict.""" + # need this list not to break the dict iterator + old_keys = [k for k in state_dict if k.startswith(old_prefix)] + if len(old_keys) > 0: + logging.warning(f"Rename: {old_prefix} -> {new_prefix}") + for k in old_keys: + v = state_dict.pop(k) + new_k = k.replace(old_prefix, new_prefix) + state_dict[new_k] = v + + +def get_activation(act): + """Return activation function.""" + # Lazy load to avoid unused import + from espnet.nets.pytorch_backend.conformer.swish import Swish + + activation_funcs = { + "hardtanh": torch.nn.Hardtanh, + "tanh": torch.nn.Tanh, + "relu": torch.nn.ReLU, + "selu": torch.nn.SELU, + "swish": Swish, + } + + return activation_funcs[act]() + + +class MLPHead(torch.nn.Module): + def __init__(self, idim, hdim, odim, norm="batchnorm"): + super(MLPHead, self).__init__() + self.norm = norm + + self.fc1 = torch.nn.Linear(idim, hdim) + if norm == "batchnorm": + self.bn1 = torch.nn.BatchNorm1d(hdim) + elif norm == "layernorm": + self.norm1 = torch.nn.LayerNorm(hdim) + self.nonlin1 = torch.nn.ReLU(inplace=True) + self.fc2 = torch.nn.Linear( hdim, odim) + + def forward(self, x): + x = self.fc1(x) + if self.norm == "batchnorm": + x = self.bn1(x.transpose(1,2)).transpose(1,2) + elif self.norm == "layernorm": + x = self.norm1(x) + x = self.nonlin1(x) + x = self.fc2(x) + return x diff --git a/espnet/nets/pytorch_backend/transformer/__init__.py b/espnet/nets/pytorch_backend/transformer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b7f177368e62a5578b8706300e101f831a3972ac --- /dev/null +++ b/espnet/nets/pytorch_backend/transformer/__init__.py @@ -0,0 +1 @@ +"""Initialize sub package.""" diff --git a/espnet/nets/pytorch_backend/transformer/add_sos_eos.py b/espnet/nets/pytorch_backend/transformer/add_sos_eos.py new file mode 100644 index 0000000000000000000000000000000000000000..c550c5e58bc4525d7890b63b2b723e9495329016 --- /dev/null +++ b/espnet/nets/pytorch_backend/transformer/add_sos_eos.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Copyright 2019 Shigeki Karita +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +"""Unility funcitons for Transformer.""" + +import torch + + +def add_sos_eos(ys_pad, sos, eos, ignore_id): + """Add and labels. + + :param torch.Tensor ys_pad: batch of padded target sequences (B, Lmax) + :param int sos: index of + :param int eos: index of + :param int ignore_id: index of padding + :return: padded tensor (B, Lmax) + :rtype: torch.Tensor + :return: padded tensor (B, Lmax) + :rtype: torch.Tensor + """ + from espnet.nets.pytorch_backend.nets_utils import pad_list + + _sos = ys_pad.new([sos]) + _eos = ys_pad.new([eos]) + ys = [y[y != ignore_id] for y in ys_pad] # parse padded ys + ys_in = [torch.cat([_sos, y], dim=0) for y in ys] + ys_out = [torch.cat([y, _eos], dim=0) for y in ys] + return pad_list(ys_in, eos), pad_list(ys_out, ignore_id) diff --git a/espnet/nets/pytorch_backend/transformer/attention.py b/espnet/nets/pytorch_backend/transformer/attention.py new file mode 100644 index 0000000000000000000000000000000000000000..888ce2bff2aa669af5f1f1ac9fc314d41a79d4e0 --- /dev/null +++ b/espnet/nets/pytorch_backend/transformer/attention.py @@ -0,0 +1,280 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Copyright 2019 Shigeki Karita +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +"""Multi-Head Attention layer definition.""" + +import math + +import numpy +import torch +from torch import nn + + +class MultiHeadedAttention(nn.Module): + """Multi-Head Attention layer. + Args: + n_head (int): The number of heads. + n_feat (int): The number of features. + dropout_rate (float): Dropout rate. + """ + + def __init__(self, n_head, n_feat, dropout_rate): + """Construct an MultiHeadedAttention object.""" + super(MultiHeadedAttention, self).__init__() + assert n_feat % n_head == 0 + # We assume d_v always equals d_k + self.d_k = n_feat // n_head + self.h = n_head + self.linear_q = nn.Linear(n_feat, n_feat) + self.linear_k = nn.Linear(n_feat, n_feat) + self.linear_v = nn.Linear(n_feat, n_feat) + self.linear_out = nn.Linear(n_feat, n_feat) + self.attn = None + self.dropout = nn.Dropout(p=dropout_rate) + + def forward_qkv(self, query, key, value): + """Transform query, key and value. + Args: + query (torch.Tensor): Query tensor (#batch, time1, size). + key (torch.Tensor): Key tensor (#batch, time2, size). + value (torch.Tensor): Value tensor (#batch, time2, size). + Returns: + torch.Tensor: Transformed query tensor (#batch, n_head, time1, d_k). + torch.Tensor: Transformed key tensor (#batch, n_head, time2, d_k). + torch.Tensor: Transformed value tensor (#batch, n_head, time2, d_k). + """ + n_batch = query.size(0) + q = self.linear_q(query).view(n_batch, -1, self.h, self.d_k) + k = self.linear_k(key).view(n_batch, -1, self.h, self.d_k) + v = self.linear_v(value).view(n_batch, -1, self.h, self.d_k) + q = q.transpose(1, 2) # (batch, head, time1, d_k) + k = k.transpose(1, 2) # (batch, head, time2, d_k) + v = v.transpose(1, 2) # (batch, head, time2, d_k) + + return q, k, v + + def forward_attention(self, value, scores, mask, rtn_attn=False): + """Compute attention context vector. + Args: + value (torch.Tensor): Transformed value (#batch, n_head, time2, d_k). + scores (torch.Tensor): Attention score (#batch, n_head, time1, time2). + mask (torch.Tensor): Mask (#batch, 1, time2) or (#batch, time1, time2). + rtn_attn (boolean): Flag of return attention score + Returns: + torch.Tensor: Transformed value (#batch, time1, d_model) + weighted by the attention score (#batch, time1, time2). + """ + n_batch = value.size(0) + if mask is not None: + mask = mask.unsqueeze(1).eq(0) # (batch, 1, *, time2) + min_value = float( + numpy.finfo(torch.tensor(0, dtype=scores.dtype).numpy().dtype).min + ) + scores = scores.masked_fill(mask, min_value) + self.attn = torch.softmax(scores, dim=-1).masked_fill( + mask, 0.0 + ) # (batch, head, time1, time2) + else: + self.attn = torch.softmax(scores, dim=-1) # (batch, head, time1, time2) + + p_attn = self.dropout(self.attn) + x = torch.matmul(p_attn, value) # (batch, head, time1, d_k) + x = ( + x.transpose(1, 2).contiguous().view(n_batch, -1, self.h * self.d_k) + ) # (batch, time1, d_model) + if rtn_attn: + return self.linear_out(x), self.attn + return self.linear_out(x) # (batch, time1, d_model) + + def forward(self, query, key, value, mask, rtn_attn=False): + """Compute scaled dot product attention. + Args: + query (torch.Tensor): Query tensor (#batch, time1, size). + key (torch.Tensor): Key tensor (#batch, time2, size). + value (torch.Tensor): Value tensor (#batch, time2, size). + mask (torch.Tensor): Mask tensor (#batch, 1, time2) or + (#batch, time1, time2). + rtn_attn (boolean): Flag of return attention score + Returns: + torch.Tensor: Output tensor (#batch, time1, d_model). + """ + q, k, v = self.forward_qkv(query, key, value) + scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k) + return self.forward_attention(v, scores, mask, rtn_attn) + + +class LegacyRelPositionMultiHeadedAttention(MultiHeadedAttention): + """Multi-Head Attention layer with relative position encoding (old version). + Details can be found in https://github.com/espnet/espnet/pull/2816. + Paper: https://arxiv.org/abs/1901.02860 + Args: + n_head (int): The number of heads. + n_feat (int): The number of features. + dropout_rate (float): Dropout rate. + zero_triu (bool): Whether to zero the upper triangular part of attention matrix. + """ + + def __init__(self, n_head, n_feat, dropout_rate, zero_triu=False): + """Construct an RelPositionMultiHeadedAttention object.""" + super().__init__(n_head, n_feat, dropout_rate) + self.zero_triu = zero_triu + # linear transformation for positional encoding + self.linear_pos = nn.Linear(n_feat, n_feat, bias=False) + # these two learnable bias are used in matrix c and matrix d + # as described in https://arxiv.org/abs/1901.02860 Section 3.3 + self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k)) + self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k)) + torch.nn.init.xavier_uniform_(self.pos_bias_u) + torch.nn.init.xavier_uniform_(self.pos_bias_v) + + def rel_shift(self, x): + """Compute relative positional encoding. + Args: + x (torch.Tensor): Input tensor (batch, head, time1, time2). + Returns: + torch.Tensor: Output tensor. + """ + zero_pad = torch.zeros((*x.size()[:3], 1), device=x.device, dtype=x.dtype) + x_padded = torch.cat([zero_pad, x], dim=-1) + + x_padded = x_padded.view(*x.size()[:2], x.size(3) + 1, x.size(2)) + x = x_padded[:, :, 1:].view_as(x) + + if self.zero_triu: + ones = torch.ones((x.size(2), x.size(3))) + x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :] + + return x + + def forward(self, query, key, value, pos_emb, mask): + """Compute 'Scaled Dot Product Attention' with rel. positional encoding. + Args: + query (torch.Tensor): Query tensor (#batch, time1, size). + key (torch.Tensor): Key tensor (#batch, time2, size). + value (torch.Tensor): Value tensor (#batch, time2, size). + pos_emb (torch.Tensor): Positional embedding tensor (#batch, time1, size). + mask (torch.Tensor): Mask tensor (#batch, 1, time2) or + (#batch, time1, time2). + Returns: + torch.Tensor: Output tensor (#batch, time1, d_model). + """ + q, k, v = self.forward_qkv(query, key, value) + q = q.transpose(1, 2) # (batch, time1, head, d_k) + + n_batch_pos = pos_emb.size(0) + p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k) + p = p.transpose(1, 2) # (batch, head, time1, d_k) + + # (batch, head, time1, d_k) + q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2) + # (batch, head, time1, d_k) + q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2) + + # compute attention score + # first compute matrix a and matrix c + # as described in https://arxiv.org/abs/1901.02860 Section 3.3 + # (batch, head, time1, time2) + matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1)) + + # compute matrix b and matrix d + # (batch, head, time1, time1) + matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1)) + matrix_bd = self.rel_shift(matrix_bd) + + scores = (matrix_ac + matrix_bd) / math.sqrt( + self.d_k + ) # (batch, head, time1, time2) + + return self.forward_attention(v, scores, mask) + + +class RelPositionMultiHeadedAttention(MultiHeadedAttention): + """Multi-Head Attention layer with relative position encoding (new implementation). + Details can be found in https://github.com/espnet/espnet/pull/2816. + Paper: https://arxiv.org/abs/1901.02860 + Args: + n_head (int): The number of heads. + n_feat (int): The number of features. + dropout_rate (float): Dropout rate. + zero_triu (bool): Whether to zero the upper triangular part of attention matrix. + """ + + def __init__(self, n_head, n_feat, dropout_rate, zero_triu=False): + """Construct an RelPositionMultiHeadedAttention object.""" + super().__init__(n_head, n_feat, dropout_rate) + self.zero_triu = zero_triu + # linear transformation for positional encoding + self.linear_pos = nn.Linear(n_feat, n_feat, bias=False) + # these two learnable bias are used in matrix c and matrix d + # as described in https://arxiv.org/abs/1901.02860 Section 3.3 + self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k)) + self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k)) + torch.nn.init.xavier_uniform_(self.pos_bias_u) + torch.nn.init.xavier_uniform_(self.pos_bias_v) + + def rel_shift(self, x): + """Compute relative positional encoding. + Args: + x (torch.Tensor): Input tensor (batch, head, time1, 2*time1-1). + time1 means the length of query vector. + Returns: + torch.Tensor: Output tensor. + """ + zero_pad = torch.zeros((*x.size()[:3], 1), device=x.device, dtype=x.dtype) + x_padded = torch.cat([zero_pad, x], dim=-1) + + x_padded = x_padded.view(*x.size()[:2], x.size(3) + 1, x.size(2)) + x = x_padded[:, :, 1:].view_as(x)[ + :, :, :, : x.size(-1) // 2 + 1 + ] # only keep the positions from 0 to time2 + + if self.zero_triu: + ones = torch.ones((x.size(2), x.size(3)), device=x.device) + x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :] + + return x + + def forward(self, query, key, value, pos_emb, mask): + """Compute 'Scaled Dot Product Attention' with rel. positional encoding. + Args: + query (torch.Tensor): Query tensor (#batch, time1, size). + key (torch.Tensor): Key tensor (#batch, time2, size). + value (torch.Tensor): Value tensor (#batch, time2, size). + pos_emb (torch.Tensor): Positional embedding tensor + (#batch, 2*time1-1, size). + mask (torch.Tensor): Mask tensor (#batch, 1, time2) or + (#batch, time1, time2). + Returns: + torch.Tensor: Output tensor (#batch, time1, d_model). + """ + q, k, v = self.forward_qkv(query, key, value) + q = q.transpose(1, 2) # (batch, time1, head, d_k) + + n_batch_pos = pos_emb.size(0) + p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k) + p = p.transpose(1, 2) # (batch, head, 2*time1-1, d_k) + + # (batch, head, time1, d_k) + q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2) + # (batch, head, time1, d_k) + q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2) + + # compute attention score + # first compute matrix a and matrix c + # as described in https://arxiv.org/abs/1901.02860 Section 3.3 + # (batch, head, time1, time2) + matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1)) + + # compute matrix b and matrix d + # (batch, head, time1, 2*time1-1) + matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1)) + matrix_bd = self.rel_shift(matrix_bd) + + scores = (matrix_ac + matrix_bd) / math.sqrt( + self.d_k + ) # (batch, head, time1, time2) + + return self.forward_attention(v, scores, mask) diff --git a/espnet/nets/pytorch_backend/transformer/convolution.py b/espnet/nets/pytorch_backend/transformer/convolution.py new file mode 100644 index 0000000000000000000000000000000000000000..d659b0307b00521977af2b7df0abad4feaef2376 --- /dev/null +++ b/espnet/nets/pytorch_backend/transformer/convolution.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Copyright 2020 Johns Hopkins University (Shinji Watanabe) +# Northwestern Polytechnical University (Pengcheng Guo) +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +"""ConvolutionModule definition.""" + +import torch +from torch import nn + + +class ConvolutionModule(nn.Module): + """ConvolutionModule in Conformer model. + + :param int channels: channels of cnn + :param int kernel_size: kernerl size of cnn + + """ + + def __init__(self, channels, kernel_size, bias=True): + """Construct an ConvolutionModule object.""" + super(ConvolutionModule, self).__init__() + # kernerl_size should be a odd number for 'SAME' padding + assert (kernel_size - 1) % 2 == 0 + + self.pointwise_cov1 = nn.Conv1d( + channels, 2 * channels, kernel_size=1, stride=1, padding=0, bias=bias, + ) + self.depthwise_conv = nn.Conv1d( + channels, + channels, + kernel_size, + stride=1, + padding=(kernel_size - 1) // 2, + groups=channels, + bias=bias, + ) + self.norm = nn.BatchNorm1d(channels) + self.pointwise_cov2 = nn.Conv1d( + channels, channels, kernel_size=1, stride=1, padding=0, bias=bias, + ) + self.activation = Swish() + + def forward(self, x): + """Compute covolution module. + + :param torch.Tensor x: (batch, time, size) + :return torch.Tensor: convoluted `value` (batch, time, d_model) + """ + # exchange the temporal dimension and the feature dimension + x = x.transpose(1, 2) + + # GLU mechanism + x = self.pointwise_cov1(x) # (batch, 2*channel, dim) + x = nn.functional.glu(x, dim=1) # (batch, channel, dim) + + # 1D Depthwise Conv + x = self.depthwise_conv(x) + x = self.activation(self.norm(x)) + + x = self.pointwise_cov2(x) + + return x.transpose(1, 2) + + +class Swish(nn.Module): + """Construct an Swish object.""" + + def forward(self, x): + """Return Swich activation function.""" + return x * torch.sigmoid(x) diff --git a/espnet/nets/pytorch_backend/transformer/decoder.py b/espnet/nets/pytorch_backend/transformer/decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..a0cd052ef97dc13dcf560ac3dd8d2e6763c9cb51 --- /dev/null +++ b/espnet/nets/pytorch_backend/transformer/decoder.py @@ -0,0 +1,229 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Copyright 2019 Shigeki Karita +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +"""Decoder definition.""" + +from typing import Any +from typing import List +from typing import Tuple + +import torch + +from espnet.nets.pytorch_backend.nets_utils import rename_state_dict +from espnet.nets.pytorch_backend.transformer.attention import MultiHeadedAttention +from espnet.nets.pytorch_backend.transformer.decoder_layer import DecoderLayer +from espnet.nets.pytorch_backend.transformer.embedding import PositionalEncoding +from espnet.nets.pytorch_backend.transformer.layer_norm import LayerNorm +from espnet.nets.pytorch_backend.transformer.mask import subsequent_mask +from espnet.nets.pytorch_backend.transformer.positionwise_feed_forward import ( + PositionwiseFeedForward, # noqa: H301 +) +from espnet.nets.pytorch_backend.transformer.repeat import repeat +from espnet.nets.scorer_interface import BatchScorerInterface + + +def _pre_hook( + state_dict, + prefix, + local_metadata, + strict, + missing_keys, + unexpected_keys, + error_msgs, +): + # https://github.com/espnet/espnet/commit/3d422f6de8d4f03673b89e1caef698745ec749ea#diff-bffb1396f038b317b2b64dd96e6d3563 + rename_state_dict(prefix + "output_norm.", prefix + "after_norm.", state_dict) + + +class Decoder(BatchScorerInterface, torch.nn.Module): + """Transfomer decoder module. + + :param int odim: output dim + :param int attention_dim: dimention of attention + :param int attention_heads: the number of heads of multi head attention + :param int linear_units: the number of units of position-wise feed forward + :param int num_blocks: the number of decoder blocks + :param float dropout_rate: dropout rate + :param float attention_dropout_rate: dropout rate for attention + :param str or torch.nn.Module input_layer: input layer type + :param bool use_output_layer: whether to use output layer + :param class pos_enc_class: PositionalEncoding or ScaledPositionalEncoding + :param bool normalize_before: whether to use layer_norm before the first block + :param bool concat_after: whether to concat attention layer's input and output + if True, additional linear will be applied. + i.e. x -> x + linear(concat(x, att(x))) + if False, no additional linear will be applied. i.e. x -> x + att(x) + """ + + def __init__( + self, + odim, + attention_dim=256, + attention_heads=4, + linear_units=2048, + num_blocks=6, + dropout_rate=0.1, + positional_dropout_rate=0.1, + self_attention_dropout_rate=0.0, + src_attention_dropout_rate=0.0, + input_layer="embed", + use_output_layer=True, + pos_enc_class=PositionalEncoding, + normalize_before=True, + concat_after=False, + ): + """Construct an Decoder object.""" + torch.nn.Module.__init__(self) + self._register_load_state_dict_pre_hook(_pre_hook) + if input_layer == "embed": + self.embed = torch.nn.Sequential( + torch.nn.Embedding(odim, attention_dim), + pos_enc_class(attention_dim, positional_dropout_rate), + ) + elif input_layer == "linear": + self.embed = torch.nn.Sequential( + torch.nn.Linear(odim, attention_dim), + torch.nn.LayerNorm(attention_dim), + torch.nn.Dropout(dropout_rate), + torch.nn.ReLU(), + pos_enc_class(attention_dim, positional_dropout_rate), + ) + elif isinstance(input_layer, torch.nn.Module): + self.embed = torch.nn.Sequential( + input_layer, pos_enc_class(attention_dim, positional_dropout_rate) + ) + else: + raise NotImplementedError("only `embed` or torch.nn.Module is supported.") + self.normalize_before = normalize_before + self.decoders = repeat( + num_blocks, + lambda: DecoderLayer( + attention_dim, + MultiHeadedAttention( + attention_heads, attention_dim, self_attention_dropout_rate + ), + MultiHeadedAttention( + attention_heads, attention_dim, src_attention_dropout_rate + ), + PositionwiseFeedForward(attention_dim, linear_units, dropout_rate), + dropout_rate, + normalize_before, + concat_after, + ), + ) + if self.normalize_before: + self.after_norm = LayerNorm(attention_dim) + if use_output_layer: + self.output_layer = torch.nn.Linear(attention_dim, odim) + else: + self.output_layer = None + + def forward(self, tgt, tgt_mask, memory, memory_mask): + """Forward decoder. + :param torch.Tensor tgt: input token ids, int64 (batch, maxlen_out) + if input_layer == "embed" + input tensor (batch, maxlen_out, #mels) + in the other cases + :param torch.Tensor tgt_mask: input token mask, (batch, maxlen_out) + dtype=torch.uint8 in PyTorch 1.2- + dtype=torch.bool in PyTorch 1.2+ (include 1.2) + :param torch.Tensor memory: encoded memory, float32 (batch, maxlen_in, feat) + :param torch.Tensor memory_mask: encoded memory mask, (batch, maxlen_in) + dtype=torch.uint8 in PyTorch 1.2- + dtype=torch.bool in PyTorch 1.2+ (include 1.2) + :return x: decoded token score before softmax (batch, maxlen_out, token) + if use_output_layer is True, + final block outputs (batch, maxlen_out, attention_dim) + in the other cases + :rtype: torch.Tensor + :return tgt_mask: score mask before softmax (batch, maxlen_out) + :rtype: torch.Tensor + """ + x = self.embed(tgt) + x, tgt_mask, memory, memory_mask = self.decoders( + x, tgt_mask, memory, memory_mask + ) + if self.normalize_before: + x = self.after_norm(x) + if self.output_layer is not None: + x = self.output_layer(x) + return x, tgt_mask + + def forward_one_step(self, tgt, tgt_mask, memory, memory_mask=None, cache=None): + """Forward one step. + :param torch.Tensor tgt: input token ids, int64 (batch, maxlen_out) + :param torch.Tensor tgt_mask: input token mask, (batch, maxlen_out) + dtype=torch.uint8 in PyTorch 1.2- + dtype=torch.bool in PyTorch 1.2+ (include 1.2) + :param torch.Tensor memory: encoded memory, float32 (batch, maxlen_in, feat) + :param List[torch.Tensor] cache: + cached output list of (batch, max_time_out-1, size) + :return y, cache: NN output value and cache per `self.decoders`. + `y.shape` is (batch, maxlen_out, token) + :rtype: Tuple[torch.Tensor, List[torch.Tensor]] + """ + x = self.embed(tgt) + if cache is None: + cache = [None] * len(self.decoders) + new_cache = [] + for c, decoder in zip(cache, self.decoders): + x, tgt_mask, memory, memory_mask = decoder( + x, tgt_mask, memory, memory_mask, cache=c + ) + new_cache.append(x) + + if self.normalize_before: + y = self.after_norm(x[:, -1]) + else: + y = x[:, -1] + if self.output_layer is not None: + y = torch.log_softmax(self.output_layer(y), dim=-1) + + return y, new_cache + + # beam search API (see ScorerInterface) + def score(self, ys, state, x): + """Score.""" + ys_mask = subsequent_mask(len(ys), device=x.device).unsqueeze(0) + logp, state = self.forward_one_step( + ys.unsqueeze(0), ys_mask, x.unsqueeze(0), cache=state + ) + return logp.squeeze(0), state + + # batch beam search API (see BatchScorerInterface) + def batch_score( + self, ys: torch.Tensor, states: List[Any], xs: torch.Tensor + ) -> Tuple[torch.Tensor, List[Any]]: + """Score new token batch (required). + Args: + ys (torch.Tensor): torch.int64 prefix tokens (n_batch, ylen). + states (List[Any]): Scorer states for prefix tokens. + xs (torch.Tensor): + The encoder feature that generates ys (n_batch, xlen, n_feat). + Returns: + tuple[torch.Tensor, List[Any]]: Tuple of + batchfied scores for next token with shape of `(n_batch, n_vocab)` + and next state list for ys. + """ + # merge states + n_batch = len(ys) + n_layers = len(self.decoders) + if states[0] is None: + batch_state = None + else: + # transpose state of [batch, layer] into [layer, batch] + batch_state = [ + torch.stack([states[b][l] for b in range(n_batch)]) + for l in range(n_layers) + ] + + # batch decoding + ys_mask = subsequent_mask(ys.size(-1), device=xs.device).unsqueeze(0) + logp, states = self.forward_one_step(ys, ys_mask, xs, cache=batch_state) + + # transpose state of [layer, batch] into [batch, layer] + state_list = [[states[l][b] for l in range(n_layers)] for b in range(n_batch)] + return logp, state_list diff --git a/espnet/nets/pytorch_backend/transformer/decoder_layer.py b/espnet/nets/pytorch_backend/transformer/decoder_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..c9658d9c603d3c8bc8a8a68d9d6c9904c03886ba --- /dev/null +++ b/espnet/nets/pytorch_backend/transformer/decoder_layer.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Copyright 2019 Shigeki Karita +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +"""Decoder self-attention layer definition.""" + +import torch +from torch import nn + +from espnet.nets.pytorch_backend.transformer.layer_norm import LayerNorm + + +class DecoderLayer(nn.Module): + """Single decoder layer module. + :param int size: input dim + :param espnet.nets.pytorch_backend.transformer.attention.MultiHeadedAttention + self_attn: self attention module + :param espnet.nets.pytorch_backend.transformer.attention.MultiHeadedAttention + src_attn: source attention module + :param espnet.nets.pytorch_backend.transformer.positionwise_feed_forward. + PositionwiseFeedForward feed_forward: feed forward layer module + :param float dropout_rate: dropout rate + :param bool normalize_before: whether to use layer_norm before the first block + :param bool concat_after: whether to concat attention layer's input and output + if True, additional linear will be applied. + i.e. x -> x + linear(concat(x, att(x))) + if False, no additional linear will be applied. i.e. x -> x + att(x) + """ + + def __init__( + self, + size, + self_attn, + src_attn, + feed_forward, + dropout_rate, + normalize_before=True, + concat_after=False, + ): + """Construct an DecoderLayer object.""" + super(DecoderLayer, self).__init__() + self.size = size + self.self_attn = self_attn + self.src_attn = src_attn + self.feed_forward = feed_forward + self.norm1 = LayerNorm(size) + self.norm2 = LayerNorm(size) + self.norm3 = LayerNorm(size) + self.dropout = nn.Dropout(dropout_rate) + self.normalize_before = normalize_before + self.concat_after = concat_after + if self.concat_after: + self.concat_linear1 = nn.Linear(size + size, size) + self.concat_linear2 = nn.Linear(size + size, size) + + def forward(self, tgt, tgt_mask, memory, memory_mask, cache=None): + """Compute decoded features. + Args: + tgt (torch.Tensor): + decoded previous target features (batch, max_time_out, size) + tgt_mask (torch.Tensor): mask for x (batch, max_time_out) + memory (torch.Tensor): encoded source features (batch, max_time_in, size) + memory_mask (torch.Tensor): mask for memory (batch, max_time_in) + cache (torch.Tensor): cached output (batch, max_time_out-1, size) + """ + residual = tgt + if self.normalize_before: + tgt = self.norm1(tgt) + + if cache is None: + tgt_q = tgt + tgt_q_mask = tgt_mask + else: + # compute only the last frame query keeping dim: max_time_out -> 1 + assert cache.shape == ( + tgt.shape[0], + tgt.shape[1] - 1, + self.size, + ), f"{cache.shape} == {(tgt.shape[0], tgt.shape[1] - 1, self.size)}" + tgt_q = tgt[:, -1:, :] + residual = residual[:, -1:, :] + tgt_q_mask = None + if tgt_mask is not None: + tgt_q_mask = tgt_mask[:, -1:, :] + + if self.concat_after: + tgt_concat = torch.cat( + (tgt_q, self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)), dim=-1 + ) + x = residual + self.concat_linear1(tgt_concat) + else: + x = residual + self.dropout(self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)) + if not self.normalize_before: + x = self.norm1(x) + + residual = x + if self.normalize_before: + x = self.norm2(x) + if self.concat_after: + x_concat = torch.cat( + (x, self.src_attn(x, memory, memory, memory_mask)), dim=-1 + ) + x = residual + self.concat_linear2(x_concat) + else: + x = residual + self.dropout(self.src_attn(x, memory, memory, memory_mask)) + if not self.normalize_before: + x = self.norm2(x) + + residual = x + if self.normalize_before: + x = self.norm3(x) + x = residual + self.dropout(self.feed_forward(x)) + if not self.normalize_before: + x = self.norm3(x) + + if cache is not None: + x = torch.cat([cache, x], dim=1) + + return x, tgt_mask, memory, memory_mask diff --git a/espnet/nets/pytorch_backend/transformer/embedding.py b/espnet/nets/pytorch_backend/transformer/embedding.py new file mode 100644 index 0000000000000000000000000000000000000000..82f59bbb75efbcc4f9c11448a2b0598e2b707993 --- /dev/null +++ b/espnet/nets/pytorch_backend/transformer/embedding.py @@ -0,0 +1,217 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Copyright 2019 Shigeki Karita +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +"""Positional Encoding Module.""" + +import math + +import torch + + +def _pre_hook( + state_dict, + prefix, + local_metadata, + strict, + missing_keys, + unexpected_keys, + error_msgs, +): + """Perform pre-hook in load_state_dict for backward compatibility. + Note: + We saved self.pe until v.0.5.2 but we have omitted it later. + Therefore, we remove the item "pe" from `state_dict` for backward compatibility. + """ + k = prefix + "pe" + if k in state_dict: + state_dict.pop(k) + + +class PositionalEncoding(torch.nn.Module): + """Positional encoding. + Args: + d_model (int): Embedding dimension. + dropout_rate (float): Dropout rate. + max_len (int): Maximum input length. + reverse (bool): Whether to reverse the input position. Only for + the class LegacyRelPositionalEncoding. We remove it in the current + class RelPositionalEncoding. + """ + + def __init__(self, d_model, dropout_rate, max_len=5000, reverse=False): + """Construct an PositionalEncoding object.""" + super(PositionalEncoding, self).__init__() + self.d_model = d_model + self.reverse = reverse + self.xscale = math.sqrt(self.d_model) + self.dropout = torch.nn.Dropout(p=dropout_rate) + self.pe = None + self.extend_pe(torch.tensor(0.0).expand(1, max_len)) + self._register_load_state_dict_pre_hook(_pre_hook) + + def extend_pe(self, x): + """Reset the positional encodings.""" + if self.pe is not None: + if self.pe.size(1) >= x.size(1): + if self.pe.dtype != x.dtype or self.pe.device != x.device: + self.pe = self.pe.to(dtype=x.dtype, device=x.device) + return + pe = torch.zeros(x.size(1), self.d_model) + if self.reverse: + position = torch.arange( + x.size(1) - 1, -1, -1.0, dtype=torch.float32 + ).unsqueeze(1) + else: + position = torch.arange(0, x.size(1), dtype=torch.float32).unsqueeze(1) + div_term = torch.exp( + torch.arange(0, self.d_model, 2, dtype=torch.float32) + * -(math.log(10000.0) / self.d_model) + ) + pe[:, 0::2] = torch.sin(position * div_term) + pe[:, 1::2] = torch.cos(position * div_term) + pe = pe.unsqueeze(0) + self.pe = pe.to(device=x.device, dtype=x.dtype) + + def forward(self, x: torch.Tensor): + """Add positional encoding. + Args: + x (torch.Tensor): Input tensor (batch, time, `*`). + Returns: + torch.Tensor: Encoded tensor (batch, time, `*`). + """ + self.extend_pe(x) + x = x * self.xscale + self.pe[:, : x.size(1)] + return self.dropout(x) + + +class ScaledPositionalEncoding(PositionalEncoding): + """Scaled positional encoding module. + See Sec. 3.2 https://arxiv.org/abs/1809.08895 + Args: + d_model (int): Embedding dimension. + dropout_rate (float): Dropout rate. + max_len (int): Maximum input length. + """ + + def __init__(self, d_model, dropout_rate, max_len=5000): + """Initialize class.""" + super().__init__(d_model=d_model, dropout_rate=dropout_rate, max_len=max_len) + self.alpha = torch.nn.Parameter(torch.tensor(1.0)) + + def reset_parameters(self): + """Reset parameters.""" + self.alpha.data = torch.tensor(1.0) + + def forward(self, x): + """Add positional encoding. + Args: + x (torch.Tensor): Input tensor (batch, time, `*`). + Returns: + torch.Tensor: Encoded tensor (batch, time, `*`). + """ + self.extend_pe(x) + x = x + self.alpha * self.pe[:, : x.size(1)] + return self.dropout(x) + + +class LegacyRelPositionalEncoding(PositionalEncoding): + """Relative positional encoding module (old version). + Details can be found in https://github.com/espnet/espnet/pull/2816. + See : Appendix B in https://arxiv.org/abs/1901.02860 + Args: + d_model (int): Embedding dimension. + dropout_rate (float): Dropout rate. + max_len (int): Maximum input length. + """ + + def __init__(self, d_model, dropout_rate, max_len=5000): + """Initialize class.""" + super().__init__( + d_model=d_model, + dropout_rate=dropout_rate, + max_len=max_len, + reverse=True, + ) + + def forward(self, x): + """Compute positional encoding. + Args: + x (torch.Tensor): Input tensor (batch, time, `*`). + Returns: + torch.Tensor: Encoded tensor (batch, time, `*`). + torch.Tensor: Positional embedding tensor (1, time, `*`). + """ + self.extend_pe(x) + x = x * self.xscale + pos_emb = self.pe[:, : x.size(1)] + return self.dropout(x), self.dropout(pos_emb) + + +class RelPositionalEncoding(torch.nn.Module): + """Relative positional encoding module (new implementation). + Details can be found in https://github.com/espnet/espnet/pull/2816. + See : Appendix B in https://arxiv.org/abs/1901.02860 + Args: + d_model (int): Embedding dimension. + dropout_rate (float): Dropout rate. + max_len (int): Maximum input length. + """ + + def __init__(self, d_model, dropout_rate, max_len=5000): + """Construct an PositionalEncoding object.""" + super(RelPositionalEncoding, self).__init__() + self.d_model = d_model + self.xscale = math.sqrt(self.d_model) + self.dropout = torch.nn.Dropout(p=dropout_rate) + self.pe = None + self.extend_pe(torch.tensor(0.0).expand(1, max_len)) + + def extend_pe(self, x): + """Reset the positional encodings.""" + if self.pe is not None: + # self.pe contains both positive and negative parts + # the length of self.pe is 2 * input_len - 1 + if self.pe.size(1) >= x.size(1) * 2 - 1: + if self.pe.dtype != x.dtype or self.pe.device != x.device: + self.pe = self.pe.to(dtype=x.dtype, device=x.device) + return + # Suppose `i` means to the position of query vecotr and `j` means the + # position of key vector. We use position relative positions when keys + # are to the left (i>j) and negative relative positions otherwise (i x + linear(concat(x, att(x))) + if False, no additional linear will be applied. i.e. x -> x + att(x) + :param str positionwise_layer_type: linear of conv1d + :param int positionwise_conv_kernel_size: kernel size of positionwise conv1d layer + :param str encoder_attn_layer_type: encoder attention layer type + :param bool macaron_style: whether to use macaron style for positionwise layer + :param bool use_cnn_module: whether to use convolution module + :param bool zero_triu: whether to zero the upper triangular part of attention matrix + :param int cnn_module_kernel: kernerl size of convolution module + :param int padding_idx: padding_idx for input_layer=embed + """ + + def __init__( + self, + idim, + attention_dim=256, + attention_heads=4, + linear_units=2048, + num_blocks=6, + dropout_rate=0.1, + positional_dropout_rate=0.1, + attention_dropout_rate=0.0, + input_layer="conv2d", + pos_enc_class=PositionalEncoding, + normalize_before=True, + concat_after=False, + positionwise_layer_type="linear", + positionwise_conv_kernel_size=1, + macaron_style=False, + encoder_attn_layer_type="mha", + use_cnn_module=False, + zero_triu=False, + cnn_module_kernel=31, + padding_idx=-1, + relu_type="prelu", + a_upsample_ratio=1, + ): + """Construct an Encoder object.""" + super(Encoder, self).__init__() + self._register_load_state_dict_pre_hook(_pre_hook) + + if encoder_attn_layer_type == "rel_mha": + pos_enc_class = RelPositionalEncoding + elif encoder_attn_layer_type == "legacy_rel_mha": + pos_enc_class = LegacyRelPositionalEncoding + # -- frontend module. + if input_layer == "conv1d": + self.frontend = Conv1dResNet( + relu_type=relu_type, + a_upsample_ratio=a_upsample_ratio, + ) + elif input_layer == "conv3d": + self.frontend = Conv3dResNet(relu_type=relu_type) + else: + self.frontend = None + # -- backend module. + if input_layer == "linear": + self.embed = torch.nn.Sequential( + torch.nn.Linear(idim, attention_dim), + torch.nn.LayerNorm(attention_dim), + torch.nn.Dropout(dropout_rate), + torch.nn.ReLU(), + pos_enc_class(attention_dim, positional_dropout_rate), + ) + elif input_layer == "conv2d": + self.embed = Conv2dSubsampling( + idim, + attention_dim, + dropout_rate, + pos_enc_class(attention_dim, dropout_rate), + ) + elif input_layer == "vgg2l": + self.embed = VGG2L(idim, attention_dim) + elif input_layer == "embed": + self.embed = torch.nn.Sequential( + torch.nn.Embedding(idim, attention_dim, padding_idx=padding_idx), + pos_enc_class(attention_dim, positional_dropout_rate), + ) + elif isinstance(input_layer, torch.nn.Module): + self.embed = torch.nn.Sequential( + input_layer, pos_enc_class(attention_dim, positional_dropout_rate), + ) + elif input_layer in ["conv1d", "conv3d"]: + self.embed = torch.nn.Sequential( + torch.nn.Linear(512, attention_dim), + pos_enc_class(attention_dim, positional_dropout_rate) + ) + elif input_layer is None: + self.embed = torch.nn.Sequential( + pos_enc_class(attention_dim, positional_dropout_rate) + ) + else: + raise ValueError("unknown input_layer: " + input_layer) + self.normalize_before = normalize_before + if positionwise_layer_type == "linear": + positionwise_layer = PositionwiseFeedForward + positionwise_layer_args = (attention_dim, linear_units, dropout_rate) + elif positionwise_layer_type == "conv1d": + positionwise_layer = MultiLayeredConv1d + positionwise_layer_args = ( + attention_dim, + linear_units, + positionwise_conv_kernel_size, + dropout_rate, + ) + elif positionwise_layer_type == "conv1d-linear": + positionwise_layer = Conv1dLinear + positionwise_layer_args = ( + attention_dim, + linear_units, + positionwise_conv_kernel_size, + dropout_rate, + ) + else: + raise NotImplementedError("Support only linear or conv1d.") + + if encoder_attn_layer_type == "mha": + encoder_attn_layer = MultiHeadedAttention + encoder_attn_layer_args = ( + attention_heads, + attention_dim, + attention_dropout_rate, + ) + elif encoder_attn_layer_type == "legacy_rel_mha": + encoder_attn_layer = LegacyRelPositionMultiHeadedAttention + encoder_attn_layer_args = ( + attention_heads, + attention_dim, + attention_dropout_rate, + ) + elif encoder_attn_layer_type == "rel_mha": + encoder_attn_layer = RelPositionMultiHeadedAttention + encoder_attn_layer_args = ( + attention_heads, + attention_dim, + attention_dropout_rate, + zero_triu, + ) + else: + raise ValueError("unknown encoder_attn_layer: " + encoder_attn_layer) + + convolution_layer = ConvolutionModule + convolution_layer_args = (attention_dim, cnn_module_kernel) + + self.encoders = repeat( + num_blocks, + lambda: EncoderLayer( + attention_dim, + encoder_attn_layer(*encoder_attn_layer_args), + positionwise_layer(*positionwise_layer_args), + convolution_layer(*convolution_layer_args) if use_cnn_module else None, + dropout_rate, + normalize_before, + concat_after, + macaron_style, + ), + ) + if self.normalize_before: + self.after_norm = LayerNorm(attention_dim) + + def forward(self, xs, masks, extract_resnet_feats=False): + """Encode input sequence. + + :param torch.Tensor xs: input tensor + :param torch.Tensor masks: input mask + :param str extract_features: the position for feature extraction + :return: position embedded tensor and mask + :rtype Tuple[torch.Tensor, torch.Tensor]: + """ + if isinstance(self.frontend, (Conv1dResNet, Conv3dResNet)): + xs = self.frontend(xs) + if extract_resnet_feats: + return xs + + if isinstance(self.embed, Conv2dSubsampling): + xs, masks = self.embed(xs, masks) + else: + xs = self.embed(xs) + + xs, masks = self.encoders(xs, masks) + + if isinstance(xs, tuple): + xs = xs[0] + + if self.normalize_before: + xs = self.after_norm(xs) + + return xs, masks + + def forward_one_step(self, xs, masks, cache=None): + """Encode input frame. + + :param torch.Tensor xs: input tensor + :param torch.Tensor masks: input mask + :param List[torch.Tensor] cache: cache tensors + :return: position embedded tensor, mask and new cache + :rtype Tuple[torch.Tensor, torch.Tensor, List[torch.Tensor]]: + """ + if isinstance(self.frontend, (Conv1dResNet, Conv3dResNet)): + xs = self.frontend(xs) + + if isinstance(self.embed, Conv2dSubsampling): + xs, masks = self.embed(xs, masks) + else: + xs = self.embed(xs) + if cache is None: + cache = [None for _ in range(len(self.encoders))] + new_cache = [] + for c, e in zip(cache, self.encoders): + xs, masks = e(xs, masks, cache=c) + new_cache.append(xs) + if self.normalize_before: + xs = self.after_norm(xs) + return xs, masks, new_cache diff --git a/espnet/nets/pytorch_backend/transformer/encoder_layer.py b/espnet/nets/pytorch_backend/transformer/encoder_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..528dd8cc92a7273fa57013d7f935c6f22ab0ca58 --- /dev/null +++ b/espnet/nets/pytorch_backend/transformer/encoder_layer.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Copyright 2019 Shigeki Karita +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +"""Encoder self-attention layer definition.""" + +import copy +import torch + +from torch import nn + +from espnet.nets.pytorch_backend.transformer.layer_norm import LayerNorm + + +class EncoderLayer(nn.Module): + """Encoder layer module. + + :param int size: input dim + :param espnet.nets.pytorch_backend.transformer.attention. + MultiHeadedAttention self_attn: self attention module + RelPositionMultiHeadedAttention self_attn: self attention module + :param espnet.nets.pytorch_backend.transformer.positionwise_feed_forward. + PositionwiseFeedForward feed_forward: + feed forward module + :param espnet.nets.pytorch_backend.transformer.convolution. + ConvolutionModule feed_foreard: + feed forward module + :param float dropout_rate: dropout rate + :param bool normalize_before: whether to use layer_norm before the first block + :param bool concat_after: whether to concat attention layer's input and output + if True, additional linear will be applied. + i.e. x -> x + linear(concat(x, att(x))) + if False, no additional linear will be applied. i.e. x -> x + att(x) + :param bool macaron_style: whether to use macaron style for PositionwiseFeedForward + + """ + + def __init__( + self, + size, + self_attn, + feed_forward, + conv_module, + dropout_rate, + normalize_before=True, + concat_after=False, + macaron_style=False, + ): + """Construct an EncoderLayer object.""" + super(EncoderLayer, self).__init__() + self.self_attn = self_attn + self.feed_forward = feed_forward + self.ff_scale = 1.0 + self.conv_module = conv_module + self.macaron_style = macaron_style + self.norm_ff = LayerNorm(size) # for the FNN module + self.norm_mha = LayerNorm(size) # for the MHA module + if self.macaron_style: + self.feed_forward_macaron = copy.deepcopy(feed_forward) + self.ff_scale = 0.5 + # for another FNN module in macaron style + self.norm_ff_macaron = LayerNorm(size) + if self.conv_module is not None: + self.norm_conv = LayerNorm(size) # for the CNN module + self.norm_final = LayerNorm(size) # for the final output of the block + self.dropout = nn.Dropout(dropout_rate) + self.size = size + self.normalize_before = normalize_before + self.concat_after = concat_after + if self.concat_after: + self.concat_linear = nn.Linear(size + size, size) + + def forward(self, x_input, mask, cache=None): + """Compute encoded features. + + :param torch.Tensor x_input: encoded source features (batch, max_time_in, size) + :param torch.Tensor mask: mask for x (batch, max_time_in) + :param torch.Tensor cache: cache for x (batch, max_time_in - 1, size) + :rtype: Tuple[torch.Tensor, torch.Tensor] + """ + if isinstance(x_input, tuple): + x, pos_emb = x_input[0], x_input[1] + else: + x, pos_emb = x_input, None + + # whether to use macaron style + if self.macaron_style: + residual = x + if self.normalize_before: + x = self.norm_ff_macaron(x) + x = residual + self.ff_scale * self.dropout(self.feed_forward_macaron(x)) + if not self.normalize_before: + x = self.norm_ff_macaron(x) + + # multi-headed self-attention module + residual = x + if self.normalize_before: + x = self.norm_mha(x) + + if cache is None: + x_q = x + else: + assert cache.shape == (x.shape[0], x.shape[1] - 1, self.size) + x_q = x[:, -1:, :] + residual = residual[:, -1:, :] + mask = None if mask is None else mask[:, -1:, :] + + if pos_emb is not None: + x_att = self.self_attn(x_q, x, x, pos_emb, mask) + else: + x_att = self.self_attn(x_q, x, x, mask) + + if self.concat_after: + x_concat = torch.cat((x, x_att), dim=-1) + x = residual + self.concat_linear(x_concat) + else: + x = residual + self.dropout(x_att) + if not self.normalize_before: + x = self.norm_mha(x) + + # convolution module + if self.conv_module is not None: + residual = x + if self.normalize_before: + x = self.norm_conv(x) + x = residual + self.dropout(self.conv_module(x)) + if not self.normalize_before: + x = self.norm_conv(x) + + # feed forward module + residual = x + if self.normalize_before: + x = self.norm_ff(x) + x = residual + self.ff_scale * self.dropout(self.feed_forward(x)) + if not self.normalize_before: + x = self.norm_ff(x) + + if self.conv_module is not None: + x = self.norm_final(x) + + if cache is not None: + x = torch.cat([cache, x], dim=1) + + if pos_emb is not None: + return (x, pos_emb), mask + else: + return x, mask diff --git a/espnet/nets/pytorch_backend/transformer/label_smoothing_loss.py b/espnet/nets/pytorch_backend/transformer/label_smoothing_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..0d8b30338a778da9ba27870d51db24afd10d9b24 --- /dev/null +++ b/espnet/nets/pytorch_backend/transformer/label_smoothing_loss.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Copyright 2019 Shigeki Karita +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +"""Label smoothing module.""" + +import torch +from torch import nn + + +class LabelSmoothingLoss(nn.Module): + """Label-smoothing loss. + + :param int size: the number of class + :param int padding_idx: ignored class id + :param float smoothing: smoothing rate (0.0 means the conventional CE) + :param bool normalize_length: normalize loss by sequence length if True + :param torch.nn.Module criterion: loss function to be smoothed + """ + + def __init__( + self, + size, + padding_idx, + smoothing, + normalize_length=False, + criterion=nn.KLDivLoss(reduction="none"), + ): + """Construct an LabelSmoothingLoss object.""" + super(LabelSmoothingLoss, self).__init__() + self.criterion = criterion + self.padding_idx = padding_idx + self.confidence = 1.0 - smoothing + self.smoothing = smoothing + self.size = size + self.true_dist = None + self.normalize_length = normalize_length + + def forward(self, x, target): + """Compute loss between x and target. + + :param torch.Tensor x: prediction (batch, seqlen, class) + :param torch.Tensor target: + target signal masked with self.padding_id (batch, seqlen) + :return: scalar float value + :rtype torch.Tensor + """ + assert x.size(2) == self.size + batch_size = x.size(0) + x = x.view(-1, self.size) + target = target.view(-1) + with torch.no_grad(): + true_dist = x.clone() + true_dist.fill_(self.smoothing / (self.size - 1)) + ignore = target == self.padding_idx # (B,) + total = len(target) - ignore.sum().item() + target = target.masked_fill(ignore, 0) # avoid -1 index + true_dist.scatter_(1, target.unsqueeze(1), self.confidence) + kl = self.criterion(torch.log_softmax(x, dim=1), true_dist) + denom = total if self.normalize_length else batch_size + return kl.masked_fill(ignore.unsqueeze(1), 0).sum() / denom diff --git a/espnet/nets/pytorch_backend/transformer/layer_norm.py b/espnet/nets/pytorch_backend/transformer/layer_norm.py new file mode 100644 index 0000000000000000000000000000000000000000..db8be30ff70554edb179109037665e51c04510ec --- /dev/null +++ b/espnet/nets/pytorch_backend/transformer/layer_norm.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Copyright 2019 Shigeki Karita +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +"""Layer normalization module.""" + +import torch + + +class LayerNorm(torch.nn.LayerNorm): + """Layer normalization module. + + :param int nout: output dim size + :param int dim: dimension to be normalized + """ + + def __init__(self, nout, dim=-1): + """Construct an LayerNorm object.""" + super(LayerNorm, self).__init__(nout, eps=1e-12) + self.dim = dim + + def forward(self, x): + """Apply layer normalization. + + :param torch.Tensor x: input tensor + :return: layer normalized tensor + :rtype torch.Tensor + """ + if self.dim == -1: + return super(LayerNorm, self).forward(x) + return super(LayerNorm, self).forward(x.transpose(1, -1)).transpose(1, -1) diff --git a/espnet/nets/pytorch_backend/transformer/mask.py b/espnet/nets/pytorch_backend/transformer/mask.py new file mode 100644 index 0000000000000000000000000000000000000000..127f7a200e22674916ae976e045c6eacee81bc1c --- /dev/null +++ b/espnet/nets/pytorch_backend/transformer/mask.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 Shigeki Karita +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +"""Mask module.""" + +from distutils.version import LooseVersion + +import torch + +is_torch_1_2_plus = LooseVersion(torch.__version__) >= LooseVersion("1.2.0") +# LooseVersion('1.2.0') == LooseVersion(torch.__version__) can't include e.g. 1.2.0+aaa +is_torch_1_2 = ( + LooseVersion("1.3") > LooseVersion(torch.__version__) >= LooseVersion("1.2") +) +datatype = torch.bool if is_torch_1_2_plus else torch.uint8 + + +def subsequent_mask(size, device="cpu", dtype=datatype): + """Create mask for subsequent steps (1, size, size). + + :param int size: size of mask + :param str device: "cpu" or "cuda" or torch.Tensor.device + :param torch.dtype dtype: result dtype + :rtype: torch.Tensor + >>> subsequent_mask(3) + [[1, 0, 0], + [1, 1, 0], + [1, 1, 1]] + """ + if is_torch_1_2 and dtype == torch.bool: + # torch=1.2 doesn't support tril for bool tensor + ret = torch.ones(size, size, device=device, dtype=torch.uint8) + return torch.tril(ret, out=ret).type(dtype) + else: + ret = torch.ones(size, size, device=device, dtype=dtype) + return torch.tril(ret, out=ret) + + +def target_mask(ys_in_pad, ignore_id): + """Create mask for decoder self-attention. + + :param torch.Tensor ys_pad: batch of padded target sequences (B, Lmax) + :param int ignore_id: index of padding + :param torch.dtype dtype: result dtype + :rtype: torch.Tensor + """ + ys_mask = ys_in_pad != ignore_id + m = subsequent_mask(ys_mask.size(-1), device=ys_mask.device).unsqueeze(0) + return ys_mask.unsqueeze(-2) & m diff --git a/espnet/nets/pytorch_backend/transformer/multi_layer_conv.py b/espnet/nets/pytorch_backend/transformer/multi_layer_conv.py new file mode 100644 index 0000000000000000000000000000000000000000..fdb7fe70810eda54c727367efc986ce02ce581cc --- /dev/null +++ b/espnet/nets/pytorch_backend/transformer/multi_layer_conv.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Copyright 2019 Tomoki Hayashi +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +"""Layer modules for FFT block in FastSpeech (Feed-forward Transformer).""" + +import torch + + +class MultiLayeredConv1d(torch.nn.Module): + """Multi-layered conv1d for Transformer block. + + This is a module of multi-leyered conv1d designed + to replace positionwise feed-forward network + in Transforner block, which is introduced in + `FastSpeech: Fast, Robust and Controllable Text to Speech`_. + + .. _`FastSpeech: Fast, Robust and Controllable Text to Speech`: + https://arxiv.org/pdf/1905.09263.pdf + + """ + + def __init__(self, in_chans, hidden_chans, kernel_size, dropout_rate): + """Initialize MultiLayeredConv1d module. + + Args: + in_chans (int): Number of input channels. + hidden_chans (int): Number of hidden channels. + kernel_size (int): Kernel size of conv1d. + dropout_rate (float): Dropout rate. + + """ + super(MultiLayeredConv1d, self).__init__() + self.w_1 = torch.nn.Conv1d( + in_chans, + hidden_chans, + kernel_size, + stride=1, + padding=(kernel_size - 1) // 2, + ) + self.w_2 = torch.nn.Conv1d( + hidden_chans, + in_chans, + kernel_size, + stride=1, + padding=(kernel_size - 1) // 2, + ) + self.dropout = torch.nn.Dropout(dropout_rate) + + def forward(self, x): + """Calculate forward propagation. + + Args: + x (Tensor): Batch of input tensors (B, ..., in_chans). + + Returns: + Tensor: Batch of output tensors (B, ..., hidden_chans). + + """ + x = torch.relu(self.w_1(x.transpose(-1, 1))).transpose(-1, 1) + return self.w_2(self.dropout(x).transpose(-1, 1)).transpose(-1, 1) + + +class Conv1dLinear(torch.nn.Module): + """Conv1D + Linear for Transformer block. + + A variant of MultiLayeredConv1d, which replaces second conv-layer to linear. + + """ + + def __init__(self, in_chans, hidden_chans, kernel_size, dropout_rate): + """Initialize Conv1dLinear module. + + Args: + in_chans (int): Number of input channels. + hidden_chans (int): Number of hidden channels. + kernel_size (int): Kernel size of conv1d. + dropout_rate (float): Dropout rate. + + """ + super(Conv1dLinear, self).__init__() + self.w_1 = torch.nn.Conv1d( + in_chans, + hidden_chans, + kernel_size, + stride=1, + padding=(kernel_size - 1) // 2, + ) + self.w_2 = torch.nn.Linear(hidden_chans, in_chans) + self.dropout = torch.nn.Dropout(dropout_rate) + + def forward(self, x): + """Calculate forward propagation. + + Args: + x (Tensor): Batch of input tensors (B, ..., in_chans). + + Returns: + Tensor: Batch of output tensors (B, ..., hidden_chans). + + """ + x = torch.relu(self.w_1(x.transpose(-1, 1))).transpose(-1, 1) + return self.w_2(self.dropout(x)) diff --git a/espnet/nets/pytorch_backend/transformer/optimizer.py b/espnet/nets/pytorch_backend/transformer/optimizer.py new file mode 100644 index 0000000000000000000000000000000000000000..7f3f36259bec33ad388be46ec2dccfcadaf249be --- /dev/null +++ b/espnet/nets/pytorch_backend/transformer/optimizer.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Copyright 2019 Shigeki Karita +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +"""Optimizer module.""" + +import torch + + +class NoamOpt(object): + """Optim wrapper that implements rate.""" + + def __init__(self, model_size, factor, warmup, optimizer): + """Construct an NoamOpt object.""" + self.optimizer = optimizer + self._step = 0 + self.warmup = warmup + self.factor = factor + self.model_size = model_size + self._rate = 0 + + @property + def param_groups(self): + """Return param_groups.""" + return self.optimizer.param_groups + + def step(self): + """Update parameters and rate.""" + self._step += 1 + rate = self.rate() + for p in self.optimizer.param_groups: + p["lr"] = rate + self._rate = rate + self.optimizer.step() + + def rate(self, step=None): + """Implement `lrate` above.""" + if step is None: + step = self._step + return ( + self.factor + * self.model_size ** (-0.5) + * min(step ** (-0.5), step * self.warmup ** (-1.5)) + ) + + def zero_grad(self): + """Reset gradient.""" + self.optimizer.zero_grad() + + def state_dict(self): + """Return state_dict.""" + return { + "_step": self._step, + "warmup": self.warmup, + "factor": self.factor, + "model_size": self.model_size, + "_rate": self._rate, + "optimizer": self.optimizer.state_dict(), + } + + def load_state_dict(self, state_dict): + """Load state_dict.""" + for key, value in state_dict.items(): + if key == "optimizer": + self.optimizer.load_state_dict(state_dict["optimizer"]) + else: + setattr(self, key, value) + + +def get_std_opt(model, d_model, warmup, factor): + """Get standard NoamOpt.""" + base = torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9) + return NoamOpt(d_model, factor, warmup, base) diff --git a/espnet/nets/pytorch_backend/transformer/plot.py b/espnet/nets/pytorch_backend/transformer/plot.py new file mode 100644 index 0000000000000000000000000000000000000000..82413c9608c1ac09efc6b9b11c0bd0ad98aaa3f1 --- /dev/null +++ b/espnet/nets/pytorch_backend/transformer/plot.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Copyright 2019 Shigeki Karita +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +import logging + +import matplotlib.pyplot as plt +import numpy + +from espnet.asr import asr_utils + + +def _plot_and_save_attention(att_w, filename, xtokens=None, ytokens=None): + # dynamically import matplotlib due to not found error + from matplotlib.ticker import MaxNLocator + import os + + d = os.path.dirname(filename) + if not os.path.exists(d): + os.makedirs(d) + w, h = plt.figaspect(1.0 / len(att_w)) + fig = plt.Figure(figsize=(w * 2, h * 2)) + axes = fig.subplots(1, len(att_w)) + if len(att_w) == 1: + axes = [axes] + for ax, aw in zip(axes, att_w): + # plt.subplot(1, len(att_w), h) + ax.imshow(aw.astype(numpy.float32), aspect="auto") + ax.set_xlabel("Input") + ax.set_ylabel("Output") + ax.xaxis.set_major_locator(MaxNLocator(integer=True)) + ax.yaxis.set_major_locator(MaxNLocator(integer=True)) + # Labels for major ticks + if xtokens is not None: + ax.set_xticks(numpy.linspace(0, len(xtokens) - 1, len(xtokens))) + ax.set_xticks(numpy.linspace(0, len(xtokens) - 1, 1), minor=True) + ax.set_xticklabels(xtokens + [""], rotation=40) + if ytokens is not None: + ax.set_yticks(numpy.linspace(0, len(ytokens) - 1, len(ytokens))) + ax.set_yticks(numpy.linspace(0, len(ytokens) - 1, 1), minor=True) + ax.set_yticklabels(ytokens + [""]) + fig.tight_layout() + return fig + + +def savefig(plot, filename): + plot.savefig(filename) + plt.clf() + + +def plot_multi_head_attention( + data, + attn_dict, + outdir, + suffix="png", + savefn=savefig, + ikey="input", + iaxis=0, + okey="output", + oaxis=0, +): + """Plot multi head attentions. + + :param dict data: utts info from json file + :param dict[str, torch.Tensor] attn_dict: multi head attention dict. + values should be torch.Tensor (head, input_length, output_length) + :param str outdir: dir to save fig + :param str suffix: filename suffix including image type (e.g., png) + :param savefn: function to save + + """ + for name, att_ws in attn_dict.items(): + for idx, att_w in enumerate(att_ws): + filename = "%s/%s.%s.%s" % (outdir, data[idx][0], name, suffix) + dec_len = int(data[idx][1][okey][oaxis]["shape"][0]) + enc_len = int(data[idx][1][ikey][iaxis]["shape"][0]) + xtokens, ytokens = None, None + if "encoder" in name: + att_w = att_w[:, :enc_len, :enc_len] + # for MT + if "token" in data[idx][1][ikey][iaxis].keys(): + xtokens = data[idx][1][ikey][iaxis]["token"].split() + ytokens = xtokens[:] + elif "decoder" in name: + if "self" in name: + att_w = att_w[:, : dec_len + 1, : dec_len + 1] # +1 for + else: + att_w = att_w[:, : dec_len + 1, :enc_len] # +1 for + # for MT + if "token" in data[idx][1][ikey][iaxis].keys(): + xtokens = data[idx][1][ikey][iaxis]["token"].split() + # for ASR/ST/MT + if "token" in data[idx][1][okey][oaxis].keys(): + ytokens = [""] + data[idx][1][okey][oaxis]["token"].split() + if "self" in name: + xtokens = ytokens[:] + else: + logging.warning("unknown name for shaping attention") + fig = _plot_and_save_attention(att_w, filename, xtokens, ytokens) + savefn(fig, filename) + + +class PlotAttentionReport(asr_utils.PlotAttentionReport): + def plotfn(self, *args, **kwargs): + kwargs["ikey"] = self.ikey + kwargs["iaxis"] = self.iaxis + kwargs["okey"] = self.okey + kwargs["oaxis"] = self.oaxis + plot_multi_head_attention(*args, **kwargs) + + def __call__(self, trainer): + attn_dict = self.get_attention_weights() + suffix = "ep.{.updater.epoch}.png".format(trainer) + self.plotfn(self.data, attn_dict, self.outdir, suffix, savefig) + + def get_attention_weights(self): + batch = self.converter([self.transform(self.data)], self.device) + if isinstance(batch, tuple): + att_ws = self.att_vis_fn(*batch) + elif isinstance(batch, dict): + att_ws = self.att_vis_fn(**batch) + return att_ws + + def log_attentions(self, logger, step): + def log_fig(plot, filename): + from os.path import basename + + logger.add_figure(basename(filename), plot, step) + plt.clf() + + attn_dict = self.get_attention_weights() + self.plotfn(self.data, attn_dict, self.outdir, "", log_fig) diff --git a/espnet/nets/pytorch_backend/transformer/positionwise_feed_forward.py b/espnet/nets/pytorch_backend/transformer/positionwise_feed_forward.py new file mode 100644 index 0000000000000000000000000000000000000000..219679209c2c4ab8ac71bbaf174bb21db65499dc --- /dev/null +++ b/espnet/nets/pytorch_backend/transformer/positionwise_feed_forward.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Copyright 2019 Shigeki Karita +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +"""Positionwise feed forward layer definition.""" + +import torch + + +class PositionwiseFeedForward(torch.nn.Module): + """Positionwise feed forward layer. + + :param int idim: input dimenstion + :param int hidden_units: number of hidden units + :param float dropout_rate: dropout rate + + """ + + def __init__(self, idim, hidden_units, dropout_rate): + """Construct an PositionwiseFeedForward object.""" + super(PositionwiseFeedForward, self).__init__() + self.w_1 = torch.nn.Linear(idim, hidden_units) + self.w_2 = torch.nn.Linear(hidden_units, idim) + self.dropout = torch.nn.Dropout(dropout_rate) + + def forward(self, x): + """Forward funciton.""" + return self.w_2(self.dropout(torch.relu(self.w_1(x)))) diff --git a/espnet/nets/pytorch_backend/transformer/raw_embeddings.py b/espnet/nets/pytorch_backend/transformer/raw_embeddings.py new file mode 100644 index 0000000000000000000000000000000000000000..22d4074dc1613912afed007d9063f030113b003d --- /dev/null +++ b/espnet/nets/pytorch_backend/transformer/raw_embeddings.py @@ -0,0 +1,77 @@ +import torch +import logging + +from espnet.nets.pytorch_backend.backbones.conv3d_extractor import Conv3dResNet +from espnet.nets.pytorch_backend.backbones.conv1d_extractor import Conv1dResNet + + +class VideoEmbedding(torch.nn.Module): + """Video Embedding + + :param int idim: input dim + :param int odim: output dim + :param flaot dropout_rate: dropout rate + """ + + def __init__(self, idim, odim, dropout_rate, pos_enc_class, backbone_type="resnet", relu_type="prelu"): + super(VideoEmbedding, self).__init__() + self.trunk = Conv3dResNet( + backbone_type=backbone_type, + relu_type=relu_type + ) + self.out = torch.nn.Sequential( + torch.nn.Linear(idim, odim), + pos_enc_class, + ) + + def forward(self, x, x_mask, extract_feats=None): + """video embedding for x + + :param torch.Tensor x: input tensor + :param torch.Tensor x_mask: input mask + :param str extract_features: the position for feature extraction + :return: subsampled x and mask + :rtype Tuple[torch.Tensor, torch.Tensor] + """ + x_resnet, x_mask = self.trunk(x, x_mask) + x = self.out(x_resnet) + if extract_feats: + return x, x_mask, x_resnet + else: + return x, x_mask + + +class AudioEmbedding(torch.nn.Module): + """Audio Embedding + + :param int idim: input dim + :param int odim: output dim + :param flaot dropout_rate: dropout rate + """ + + def __init__(self, idim, odim, dropout_rate, pos_enc_class, relu_type="prelu", a_upsample_ratio=1): + super(AudioEmbedding, self).__init__() + self.trunk = Conv1dResNet( + relu_type=relu_type, + a_upsample_ratio=a_upsample_ratio, + ) + self.out = torch.nn.Sequential( + torch.nn.Linear(idim, odim), + pos_enc_class, + ) + + def forward(self, x, x_mask, extract_feats=None): + """audio embedding for x + + :param torch.Tensor x: input tensor + :param torch.Tensor x_mask: input mask + :param str extract_features: the position for feature extraction + :return: subsampled x and mask + :rtype Tuple[torch.Tensor, torch.Tensor] + """ + x_resnet, x_mask = self.trunk(x, x_mask) + x = self.out(x_resnet) + if extract_feats: + return x, x_mask, x_resnet + else: + return x, x_mask diff --git a/espnet/nets/pytorch_backend/transformer/repeat.py b/espnet/nets/pytorch_backend/transformer/repeat.py new file mode 100644 index 0000000000000000000000000000000000000000..5298fd3aeaf378e7a30999f66529e2d710b8c78d --- /dev/null +++ b/espnet/nets/pytorch_backend/transformer/repeat.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Copyright 2019 Shigeki Karita +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +"""Repeat the same layer definition.""" + +import torch + + +class MultiSequential(torch.nn.Sequential): + """Multi-input multi-output torch.nn.Sequential.""" + + def forward(self, *args): + """Repeat.""" + for m in self: + args = m(*args) + return args + + +def repeat(N, fn): + """Repeat module N times. + + :param int N: repeat time + :param function fn: function to generate module + :return: repeated modules + :rtype: MultiSequential + """ + return MultiSequential(*[fn() for _ in range(N)]) diff --git a/espnet/nets/pytorch_backend/transformer/subsampling.py b/espnet/nets/pytorch_backend/transformer/subsampling.py new file mode 100644 index 0000000000000000000000000000000000000000..612d6d6cf0b9fc8c6ea44d141df205e34d34fd38 --- /dev/null +++ b/espnet/nets/pytorch_backend/transformer/subsampling.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Copyright 2019 Shigeki Karita +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +"""Subsampling layer definition.""" + +import torch + + +class Conv2dSubsampling(torch.nn.Module): + """Convolutional 2D subsampling (to 1/4 length). + + :param int idim: input dim + :param int odim: output dim + :param flaot dropout_rate: dropout rate + :param nn.Module pos_enc_class: positional encoding layer + + """ + + def __init__(self, idim, odim, dropout_rate, pos_enc_class): + """Construct an Conv2dSubsampling object.""" + super(Conv2dSubsampling, self).__init__() + self.conv = torch.nn.Sequential( + torch.nn.Conv2d(1, odim, 3, 2), + torch.nn.ReLU(), + torch.nn.Conv2d(odim, odim, 3, 2), + torch.nn.ReLU(), + ) + self.out = torch.nn.Sequential( + torch.nn.Linear(odim * (((idim - 1) // 2 - 1) // 2), odim), pos_enc_class, + ) + + def forward(self, x, x_mask): + """Subsample x. + + :param torch.Tensor x: input tensor + :param torch.Tensor x_mask: input mask + :return: subsampled x and mask + :rtype Tuple[torch.Tensor, torch.Tensor] + or Tuple[Tuple[torch.Tensor, torch.Tensor], torch.Tensor] + """ + x = x.unsqueeze(1) # (b, c, t, f) + x = self.conv(x) + b, c, t, f = x.size() + # if RelPositionalEncoding, x: Tuple[torch.Tensor, torch.Tensor] + # else x: torch.Tensor + x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f)) + if x_mask is None: + return x, None + return x, x_mask[:, :, :-2:2][:, :, :-2:2] diff --git a/espnet/nets/scorer_interface.py b/espnet/nets/scorer_interface.py new file mode 100644 index 0000000000000000000000000000000000000000..946ec6be317603d87cfd938cc096d11b7bcbfbdf --- /dev/null +++ b/espnet/nets/scorer_interface.py @@ -0,0 +1,188 @@ +"""Scorer interface module.""" + +from typing import Any +from typing import List +from typing import Tuple + +import torch +import warnings + + +class ScorerInterface: + """Scorer interface for beam search. + + The scorer performs scoring of the all tokens in vocabulary. + + Examples: + * Search heuristics + * :class:`espnet.nets.scorers.length_bonus.LengthBonus` + * Decoder networks of the sequence-to-sequence models + * :class:`espnet.nets.pytorch_backend.nets.transformer.decoder.Decoder` + * :class:`espnet.nets.pytorch_backend.nets.rnn.decoders.Decoder` + * Neural language models + * :class:`espnet.nets.pytorch_backend.lm.transformer.TransformerLM` + * :class:`espnet.nets.pytorch_backend.lm.default.DefaultRNNLM` + * :class:`espnet.nets.pytorch_backend.lm.seq_rnn.SequentialRNNLM` + + """ + + def init_state(self, x: torch.Tensor) -> Any: + """Get an initial state for decoding (optional). + + Args: + x (torch.Tensor): The encoded feature tensor + + Returns: initial state + + """ + return None + + def select_state(self, state: Any, i: int, new_id: int = None) -> Any: + """Select state with relative ids in the main beam search. + + Args: + state: Decoder state for prefix tokens + i (int): Index to select a state in the main beam search + new_id (int): New label index to select a state if necessary + + Returns: + state: pruned state + + """ + return None if state is None else state[i] + + def score( + self, y: torch.Tensor, state: Any, x: torch.Tensor + ) -> Tuple[torch.Tensor, Any]: + """Score new token (required). + + Args: + y (torch.Tensor): 1D torch.int64 prefix tokens. + state: Scorer state for prefix tokens + x (torch.Tensor): The encoder feature that generates ys. + + Returns: + tuple[torch.Tensor, Any]: Tuple of + scores for next token that has a shape of `(n_vocab)` + and next state for ys + + """ + raise NotImplementedError + + def final_score(self, state: Any) -> float: + """Score eos (optional). + + Args: + state: Scorer state for prefix tokens + + Returns: + float: final score + + """ + return 0.0 + + +class BatchScorerInterface(ScorerInterface): + """Batch scorer interface.""" + + def batch_init_state(self, x: torch.Tensor) -> Any: + """Get an initial state for decoding (optional). + + Args: + x (torch.Tensor): The encoded feature tensor + + Returns: initial state + + """ + return self.init_state(x) + + def batch_score( + self, ys: torch.Tensor, states: List[Any], xs: torch.Tensor + ) -> Tuple[torch.Tensor, List[Any]]: + """Score new token batch (required). + + Args: + ys (torch.Tensor): torch.int64 prefix tokens (n_batch, ylen). + states (List[Any]): Scorer states for prefix tokens. + xs (torch.Tensor): + The encoder feature that generates ys (n_batch, xlen, n_feat). + + Returns: + tuple[torch.Tensor, List[Any]]: Tuple of + batchfied scores for next token with shape of `(n_batch, n_vocab)` + and next state list for ys. + + """ + warnings.warn( + "{} batch score is implemented through for loop not parallelized".format( + self.__class__.__name__ + ) + ) + scores = list() + outstates = list() + for i, (y, state, x) in enumerate(zip(ys, states, xs)): + score, outstate = self.score(y, state, x) + outstates.append(outstate) + scores.append(score) + scores = torch.cat(scores, 0).view(ys.shape[0], -1) + return scores, outstates + + +class PartialScorerInterface(ScorerInterface): + """Partial scorer interface for beam search. + + The partial scorer performs scoring when non-partial scorer finished scoring, + and receives pre-pruned next tokens to score because it is too heavy to score + all the tokens. + + Examples: + * Prefix search for connectionist-temporal-classification models + * :class:`espnet.nets.scorers.ctc.CTCPrefixScorer` + + """ + + def score_partial( + self, y: torch.Tensor, next_tokens: torch.Tensor, state: Any, x: torch.Tensor + ) -> Tuple[torch.Tensor, Any]: + """Score new token (required). + + Args: + y (torch.Tensor): 1D prefix token + next_tokens (torch.Tensor): torch.int64 next token to score + state: decoder state for prefix tokens + x (torch.Tensor): The encoder feature that generates ys + + Returns: + tuple[torch.Tensor, Any]: + Tuple of a score tensor for y that has a shape `(len(next_tokens),)` + and next state for ys + + """ + raise NotImplementedError + + +class BatchPartialScorerInterface(BatchScorerInterface, PartialScorerInterface): + """Batch partial scorer interface for beam search.""" + + def batch_score_partial( + self, + ys: torch.Tensor, + next_tokens: torch.Tensor, + states: List[Any], + xs: torch.Tensor, + ) -> Tuple[torch.Tensor, Any]: + """Score new token (required). + + Args: + ys (torch.Tensor): torch.int64 prefix tokens (n_batch, ylen). + next_tokens (torch.Tensor): torch.int64 tokens to score (n_batch, n_token). + states (List[Any]): Scorer states for prefix tokens. + xs (torch.Tensor): + The encoder feature that generates ys (n_batch, xlen, n_feat). + + Returns: + tuple[torch.Tensor, Any]: + Tuple of a score tensor for ys that has a shape `(n_batch, n_vocab)` + and next states for ys + """ + raise NotImplementedError diff --git a/espnet/nets/scorers/__init__.py b/espnet/nets/scorers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b7f177368e62a5578b8706300e101f831a3972ac --- /dev/null +++ b/espnet/nets/scorers/__init__.py @@ -0,0 +1 @@ +"""Initialize sub package.""" diff --git a/espnet/nets/scorers/ctc.py b/espnet/nets/scorers/ctc.py new file mode 100644 index 0000000000000000000000000000000000000000..1d12ce6e2a2839e05b83c1b3c4484ef8ae1df855 --- /dev/null +++ b/espnet/nets/scorers/ctc.py @@ -0,0 +1,158 @@ +"""ScorerInterface implementation for CTC.""" + +import numpy as np +import torch + +from espnet.nets.ctc_prefix_score import CTCPrefixScore +from espnet.nets.ctc_prefix_score import CTCPrefixScoreTH +from espnet.nets.scorer_interface import BatchPartialScorerInterface + + +class CTCPrefixScorer(BatchPartialScorerInterface): + """Decoder interface wrapper for CTCPrefixScore.""" + + def __init__(self, ctc: torch.nn.Module, eos: int): + """Initialize class. + + Args: + ctc (torch.nn.Module): The CTC implementation. + For example, :class:`espnet.nets.pytorch_backend.ctc.CTC` + eos (int): The end-of-sequence id. + + """ + self.ctc = ctc + self.eos = eos + self.impl = None + + def init_state(self, x: torch.Tensor): + """Get an initial state for decoding. + + Args: + x (torch.Tensor): The encoded feature tensor + + Returns: initial state + + """ + logp = self.ctc.log_softmax(x.unsqueeze(0)).detach().squeeze(0).cpu().numpy() + # TODO(karita): use CTCPrefixScoreTH + self.impl = CTCPrefixScore(logp, 0, self.eos, np) + return 0, self.impl.initial_state() + + def select_state(self, state, i, new_id=None): + """Select state with relative ids in the main beam search. + + Args: + state: Decoder state for prefix tokens + i (int): Index to select a state in the main beam search + new_id (int): New label id to select a state if necessary + + Returns: + state: pruned state + + """ + if type(state) == tuple: + if len(state) == 2: # for CTCPrefixScore + sc, st = state + return sc[i], st[i] + else: # for CTCPrefixScoreTH (need new_id > 0) + r, log_psi, f_min, f_max, scoring_idmap = state + s = log_psi[i, new_id].expand(log_psi.size(1)) + if scoring_idmap is not None: + return r[:, :, i, scoring_idmap[i, new_id]], s, f_min, f_max + else: + return r[:, :, i, new_id], s, f_min, f_max + return None if state is None else state[i] + + def score_partial(self, y, ids, state, x): + """Score new token. + + Args: + y (torch.Tensor): 1D prefix token + next_tokens (torch.Tensor): torch.int64 next token to score + state: decoder state for prefix tokens + x (torch.Tensor): 2D encoder feature that generates ys + + Returns: + tuple[torch.Tensor, Any]: + Tuple of a score tensor for y that has a shape `(len(next_tokens),)` + and next state for ys + + """ + prev_score, state = state + presub_score, new_st = self.impl(y.cpu(), ids.cpu(), state) + tscore = torch.as_tensor( + presub_score - prev_score, device=x.device, dtype=x.dtype + ) + return tscore, (presub_score, new_st) + + def batch_init_state(self, x: torch.Tensor): + """Get an initial state for decoding. + + Args: + x (torch.Tensor): The encoded feature tensor + + Returns: initial state + + """ + logp = self.ctc.log_softmax(x.unsqueeze(0)) # assuming batch_size = 1 + xlen = torch.tensor([logp.size(1)]) + self.impl = CTCPrefixScoreTH(logp, xlen, 0, self.eos) + return None + + def batch_score_partial(self, y, ids, state, x): + """Score new token. + + Args: + y (torch.Tensor): 1D prefix token + ids (torch.Tensor): torch.int64 next token to score + state: decoder state for prefix tokens + x (torch.Tensor): 2D encoder feature that generates ys + + Returns: + tuple[torch.Tensor, Any]: + Tuple of a score tensor for y that has a shape `(len(next_tokens),)` + and next state for ys + + """ + batch_state = ( + ( + torch.stack([s[0] for s in state], dim=2), + torch.stack([s[1] for s in state]), + state[0][2], + state[0][3], + ) + if state[0] is not None + else None + ) + return self.impl(y, batch_state, ids) + + def extend_prob(self, x: torch.Tensor): + """Extend probs for decoding. + + This extension is for streaming decoding + as in Eq (14) in https://arxiv.org/abs/2006.14941 + + Args: + x (torch.Tensor): The encoded feature tensor + + """ + logp = self.ctc.log_softmax(x.unsqueeze(0)) + self.impl.extend_prob(logp) + + def extend_state(self, state): + """Extend state for decoding. + + This extension is for streaming decoding + as in Eq (14) in https://arxiv.org/abs/2006.14941 + + Args: + state: The states of hyps + + Returns: exteded state + + """ + new_state = [] + for s in state: + new_state.append(self.impl.extend_state(s)) + + return new_state diff --git a/espnet/nets/scorers/length_bonus.py b/espnet/nets/scorers/length_bonus.py new file mode 100644 index 0000000000000000000000000000000000000000..fe32a616211591308c8e7ade144e856230d211d4 --- /dev/null +++ b/espnet/nets/scorers/length_bonus.py @@ -0,0 +1,61 @@ +"""Length bonus module.""" +from typing import Any +from typing import List +from typing import Tuple + +import torch + +from espnet.nets.scorer_interface import BatchScorerInterface + + +class LengthBonus(BatchScorerInterface): + """Length bonus in beam search.""" + + def __init__(self, n_vocab: int): + """Initialize class. + + Args: + n_vocab (int): The number of tokens in vocabulary for beam search + + """ + self.n = n_vocab + + def score(self, y, state, x): + """Score new token. + + Args: + y (torch.Tensor): 1D torch.int64 prefix tokens. + state: Scorer state for prefix tokens + x (torch.Tensor): 2D encoder feature that generates ys. + + Returns: + tuple[torch.Tensor, Any]: Tuple of + torch.float32 scores for next token (n_vocab) + and None + + """ + return torch.tensor([1.0], device=x.device, dtype=x.dtype).expand(self.n), None + + def batch_score( + self, ys: torch.Tensor, states: List[Any], xs: torch.Tensor + ) -> Tuple[torch.Tensor, List[Any]]: + """Score new token batch. + + Args: + ys (torch.Tensor): torch.int64 prefix tokens (n_batch, ylen). + states (List[Any]): Scorer states for prefix tokens. + xs (torch.Tensor): + The encoder feature that generates ys (n_batch, xlen, n_feat). + + Returns: + tuple[torch.Tensor, List[Any]]: Tuple of + batchfied scores for next token with shape of `(n_batch, n_vocab)` + and next state list for ys. + + """ + return ( + torch.tensor([1.0], device=xs.device, dtype=xs.dtype).expand( + ys.shape[0], self.n + ), + None, + ) diff --git a/espnet/utils/cli_utils.py b/espnet/utils/cli_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..c4a4cd15b72f832d9118aa7a7377a13de16c329b --- /dev/null +++ b/espnet/utils/cli_utils.py @@ -0,0 +1,65 @@ +from collections.abc import Sequence +from distutils.util import strtobool as dist_strtobool +import sys + +import numpy + + +def strtobool(x): + # distutils.util.strtobool returns integer, but it's confusing, + return bool(dist_strtobool(x)) + + +def get_commandline_args(): + extra_chars = [ + " ", + ";", + "&", + "(", + ")", + "|", + "^", + "<", + ">", + "?", + "*", + "[", + "]", + "$", + "`", + '"', + "\\", + "!", + "{", + "}", + ] + + # Escape the extra characters for shell + argv = [ + arg.replace("'", "'\\''") + if all(char not in arg for char in extra_chars) + else "'" + arg.replace("'", "'\\''") + "'" + for arg in sys.argv + ] + + return sys.executable + " " + " ".join(argv) + + +def is_scipy_wav_style(value): + # If Tuple[int, numpy.ndarray] or not + return ( + isinstance(value, Sequence) + and len(value) == 2 + and isinstance(value[0], int) + and isinstance(value[1], numpy.ndarray) + ) + + +def assert_scipy_wav_style(value): + assert is_scipy_wav_style( + value + ), "Must be Tuple[int, numpy.ndarray], but got {}".format( + type(value) + if not isinstance(value, Sequence) + else "{}[{}]".format(type(value), ", ".join(str(type(v)) for v in value)) + ) diff --git a/espnet/utils/dynamic_import.py b/espnet/utils/dynamic_import.py new file mode 100644 index 0000000000000000000000000000000000000000..db885d0069bfb8f59dcf03f5477c13706574b217 --- /dev/null +++ b/espnet/utils/dynamic_import.py @@ -0,0 +1,23 @@ +import importlib + + +def dynamic_import(import_path, alias=dict()): + """dynamic import module and class + + :param str import_path: syntax 'module_name:class_name' + e.g., 'espnet.transform.add_deltas:AddDeltas' + :param dict alias: shortcut for registered class + :return: imported class + """ + if import_path not in alias and ":" not in import_path: + raise ValueError( + "import_path should be one of {} or " + 'include ":", e.g. "espnet.transform.add_deltas:AddDeltas" : ' + "{}".format(set(alias), import_path) + ) + if ":" not in import_path: + import_path = alias[import_path] + + module_name, objname = import_path.split(":") + m = importlib.import_module(module_name) + return getattr(m, objname) diff --git a/espnet/utils/fill_missing_args.py b/espnet/utils/fill_missing_args.py new file mode 100644 index 0000000000000000000000000000000000000000..a0fd117529569976780436c0d79e7ce158cd44e9 --- /dev/null +++ b/espnet/utils/fill_missing_args.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- + +# Copyright 2018 Nagoya University (Tomoki Hayashi) +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +import argparse +import logging + + +def fill_missing_args(args, add_arguments): + """Fill missing arguments in args. + + Args: + args (Namespace or None): Namesapce containing hyperparameters. + add_arguments (function): Function to add arguments. + + Returns: + Namespace: Arguments whose missing ones are filled with default value. + + Examples: + >>> from argparse import Namespace + >>> from espnet.nets.pytorch_backend.e2e_tts_tacotron2 import Tacotron2 + >>> args = Namespace() + >>> fill_missing_args(args, Tacotron2.add_arguments_fn) + Namespace(aconv_chans=32, aconv_filts=15, adim=512, atype='location', ...) + + """ + # check argument type + assert isinstance(args, argparse.Namespace) or args is None + assert callable(add_arguments) + + # get default arguments + default_args, _ = add_arguments(argparse.ArgumentParser()).parse_known_args() + + # convert to dict + args = {} if args is None else vars(args) + default_args = vars(default_args) + + for key, value in default_args.items(): + if key not in args: + logging.info( + 'attribute "%s" does not exist. use default %s.' % (key, str(value)) + ) + args[key] = value + + return argparse.Namespace(**args) diff --git a/pipelines/.DS_Store b/pipelines/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..4e746b2e413c60c4bbae49875e5f9bf1451c8fae Binary files /dev/null and b/pipelines/.DS_Store differ diff --git a/pipelines/data/.DS_Store b/pipelines/data/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 Binary files /dev/null and b/pipelines/data/.DS_Store differ diff --git a/pipelines/data/data_module.py b/pipelines/data/data_module.py new file mode 100644 index 0000000000000000000000000000000000000000..576da503b4a1d36775b7acaffbc5335ef1b777fb --- /dev/null +++ b/pipelines/data/data_module.py @@ -0,0 +1,68 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright 2023 Imperial College London (Pingchuan Ma) +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +import torch +import torchaudio +import torchvision +from .transforms import AudioTransform, VideoTransform + + +class AVSRDataLoader: + def __init__(self, modality, speed_rate=1, transform=True, detector="retinaface", convert_gray=True): + self.modality = modality + self.transform = transform + if self.modality in ["audio", "audiovisual"]: + self.audio_transform = AudioTransform() + if self.modality in ["video", "audiovisual"]: + if detector == "mediapipe": + from pipelines.detectors.mediapipe.video_process import VideoProcess + self.video_process = VideoProcess(convert_gray=convert_gray) + if detector == "retinaface": + from pipelines.detectors.retinaface.video_process import VideoProcess + self.video_process = VideoProcess(convert_gray=convert_gray) + self.video_transform = VideoTransform(speed_rate=speed_rate) + + + def load_data(self, data_filename, landmarks=None, transform=True): + if self.modality == "audio": + audio, sample_rate = self.load_audio(data_filename) + audio = self.audio_process(audio, sample_rate) + return self.audio_transform(audio) if self.transform else audio + if self.modality == "video": + video = self.load_video(data_filename) + video = self.video_process(video, landmarks) + video = torch.tensor(video) + return self.video_transform(video) if self.transform else video + if self.modality == "audiovisual": + rate_ratio = 640 + audio, sample_rate = self.load_audio(data_filename) + audio = self.audio_process(audio, sample_rate) + video = self.load_video(data_filename) + video = self.video_process(video, landmarks) + video = torch.tensor(video) + min_t = min(len(video), audio.size(1) // rate_ratio) + audio = audio[:, :min_t*rate_ratio] + video = video[:min_t] + if self.transform: + audio = self.audio_transform(audio) + video = self.video_transform(video) + return video, audio + + + def load_audio(self, data_filename): + waveform, sample_rate = torchaudio.load(data_filename, normalize=True) + return waveform, sample_rate + + + def load_video(self, data_filename): + return torchvision.io.read_video(data_filename, pts_unit='sec')[0].numpy() + + + def audio_process(self, waveform, sample_rate, target_sample_rate=16000): + if sample_rate != target_sample_rate: + waveform = torchaudio.functional.resample(waveform, sample_rate, target_sample_rate) + waveform = torch.mean(waveform, dim=0, keepdim=True) + return waveform diff --git a/pipelines/data/transforms.py b/pipelines/data/transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..53a8d3513d9e19cd82d7df80692c335a940e56fc --- /dev/null +++ b/pipelines/data/transforms.py @@ -0,0 +1,44 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright 2023 Imperial College London (Pingchuan Ma) +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +import torch +import torchaudio +import torchvision + + +class FunctionalModule(torch.nn.Module): + def __init__(self, functional): + super().__init__() + self.functional = functional + + def forward(self, input): + return self.functional(input) + + +class VideoTransform: + def __init__(self, speed_rate): + self.video_pipeline = torch.nn.Sequential( + FunctionalModule(lambda x: x.unsqueeze(-1)), + FunctionalModule(lambda x: x if speed_rate == 1 else torch.index_select(x, dim=0, index=torch.linspace(0, x.shape[0]-1, int(x.shape[0] / speed_rate), dtype=torch.int64))), + FunctionalModule(lambda x: x.permute(3, 0, 1, 2)), + FunctionalModule(lambda x: x / 255.), + torchvision.transforms.CenterCrop(88), + torchvision.transforms.Normalize(0.421, 0.165), + ) + + def __call__(self, sample): + return self.video_pipeline(sample) + + +class AudioTransform: + def __init__(self): + self.audio_pipeline = torch.nn.Sequential( + FunctionalModule(lambda x: torch.nn.functional.layer_norm(x, x.shape, eps=0)), + FunctionalModule(lambda x: x.transpose(0, 1)), + ) + + def __call__(self, sample): + return self.audio_pipeline(sample) diff --git a/pipelines/detectors/.DS_Store b/pipelines/detectors/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..7b97b0b8349a56e17441fafb92fe013d6b1e82bc Binary files /dev/null and b/pipelines/detectors/.DS_Store differ diff --git a/pipelines/detectors/mediapipe/20words_mean_face.npy b/pipelines/detectors/mediapipe/20words_mean_face.npy new file mode 100755 index 0000000000000000000000000000000000000000..fc5cd3103270737752bebaec497c39b49b2af970 --- /dev/null +++ b/pipelines/detectors/mediapipe/20words_mean_face.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbf68b2044171e1160716df7c53e8bbfaa0ee8c61fb41171d04cb6092bb81422 +size 1168 diff --git a/pipelines/detectors/mediapipe/detector.py b/pipelines/detectors/mediapipe/detector.py new file mode 100644 index 0000000000000000000000000000000000000000..5f51c49418458851c517e956404b13d786ee7b6c --- /dev/null +++ b/pipelines/detectors/mediapipe/detector.py @@ -0,0 +1,57 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright 2021 Imperial College London (Pingchuan Ma) +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +import warnings +import torchvision +import mediapipe as mp +import os +import cv2 +import numpy as np + + +class LandmarksDetector: + def __init__(self): + self.mp_face_detection = mp.solutions.face_detection + self.short_range_detector = self.mp_face_detection.FaceDetection(min_detection_confidence=0.5, model_selection=0) + self.full_range_detector = self.mp_face_detection.FaceDetection(min_detection_confidence=0.5, model_selection=1) + + def __call__(self, filename): + video_frames = torchvision.io.read_video(filename, pts_unit='sec')[0].numpy() + landmarks = self.detect(video_frames, self.full_range_detector) + if all(element is None for element in landmarks): + landmarks = self.detect(video_frames, self.short_range_detector) + assert any(l is not None for l in landmarks), "Cannot detect any frames in the video" + return landmarks + + def detect(self, video_frames, detector): + landmarks = [] + for frame in video_frames: + results = detector.process(frame) + if not results.detections: + landmarks.append(None) + continue + face_points = [] + for idx, detected_faces in enumerate(results.detections): + max_id, max_size = 0, 0 + bboxC = detected_faces.location_data.relative_bounding_box + ih, iw, ic = frame.shape + bbox = int(bboxC.xmin * iw), int(bboxC.ymin * ih), int(bboxC.width * iw), int(bboxC.height * ih) + bbox_size = (bbox[2] - bbox[0]) + (bbox[3] - bbox[1]) + if bbox_size > max_size: + max_id, max_size = idx, bbox_size + lmx = [ + [int(detected_faces.location_data.relative_keypoints[self.mp_face_detection.FaceKeyPoint(0).value].x * iw), + int(detected_faces.location_data.relative_keypoints[self.mp_face_detection.FaceKeyPoint(0).value].y * ih)], + [int(detected_faces.location_data.relative_keypoints[self.mp_face_detection.FaceKeyPoint(1).value].x * iw), + int(detected_faces.location_data.relative_keypoints[self.mp_face_detection.FaceKeyPoint(1).value].y * ih)], + [int(detected_faces.location_data.relative_keypoints[self.mp_face_detection.FaceKeyPoint(2).value].x * iw), + int(detected_faces.location_data.relative_keypoints[self.mp_face_detection.FaceKeyPoint(2).value].y * ih)], + [int(detected_faces.location_data.relative_keypoints[self.mp_face_detection.FaceKeyPoint(3).value].x * iw), + int(detected_faces.location_data.relative_keypoints[self.mp_face_detection.FaceKeyPoint(3).value].y * ih)], + ] + face_points.append(lmx) + landmarks.append(np.array(face_points[max_id])) + return landmarks diff --git a/pipelines/detectors/mediapipe/video_process.py b/pipelines/detectors/mediapipe/video_process.py new file mode 100644 index 0000000000000000000000000000000000000000..6fd579fc042467c62f0ab4a7ee7be472362c5de7 --- /dev/null +++ b/pipelines/detectors/mediapipe/video_process.py @@ -0,0 +1,142 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright 2023 Imperial College London (Pingchuan Ma) +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +import os +import cv2 +import numpy as np +from skimage import transform as tf + + +def linear_interpolate(landmarks, start_idx, stop_idx): + start_landmarks = landmarks[start_idx] + stop_landmarks = landmarks[stop_idx] + delta = stop_landmarks - start_landmarks + for idx in range(1, stop_idx-start_idx): + landmarks[start_idx+idx] = start_landmarks + idx/float(stop_idx-start_idx) * delta + return landmarks + + +def warp_img(src, dst, img, std_size): + tform = tf.estimate_transform('similarity', src, dst) + warped = tf.warp(img, inverse_map=tform.inverse, output_shape=std_size) + warped = (warped * 255).astype('uint8') + return warped, tform + + +def apply_transform(transform, img, std_size): + warped = tf.warp(img, inverse_map=transform.inverse, output_shape=std_size) + warped = (warped * 255).astype('uint8') + return warped + + +def cut_patch(img, landmarks, height, width, threshold=5): + center_x, center_y = np.mean(landmarks, axis=0) + # Check for too much bias in height and width + if abs(center_y - img.shape[0] / 2) > height + threshold: + raise Exception('too much bias in height') + if abs(center_x - img.shape[1] / 2) > width + threshold: + raise Exception('too much bias in width') + # Calculate bounding box coordinates + y_min = int(round(np.clip(center_y - height, 0, img.shape[0]))) + y_max = int(round(np.clip(center_y + height, 0, img.shape[0]))) + x_min = int(round(np.clip(center_x - width, 0, img.shape[1]))) + x_max = int(round(np.clip(center_x + width, 0, img.shape[1]))) + # Cut the image + cutted_img = np.copy(img[y_min:y_max, x_min:x_max]) + return cutted_img + + +class VideoProcess: + def __init__(self, mean_face_path="20words_mean_face.npy", crop_width=96, crop_height=96, + start_idx=3, stop_idx=4, window_margin=12, convert_gray=True): + self.reference = np.load(os.path.join(os.path.dirname(__file__), mean_face_path)) + self.crop_width = crop_width + self.crop_height = crop_height + self.start_idx = start_idx + self.stop_idx = stop_idx + self.window_margin = window_margin + self.convert_gray = convert_gray + + def __call__(self, video, landmarks): + # Pre-process landmarks: interpolate frames that are not detected + preprocessed_landmarks = self.interpolate_landmarks(landmarks) + # Exclude corner cases: no landmark in all frames + if not preprocessed_landmarks: + return + # Affine transformation and crop patch + sequence = self.crop_patch(video, preprocessed_landmarks) + assert sequence is not None, f"cannot crop a patch from {filename}." + return sequence + + + def crop_patch(self, video, landmarks): + sequence = [] + for frame_idx, frame in enumerate(video): + window_margin = min(self.window_margin // 2, frame_idx, len(landmarks) - 1 - frame_idx) + smoothed_landmarks = np.mean([landmarks[x] for x in range(frame_idx - window_margin, frame_idx + window_margin + 1)], axis=0) + smoothed_landmarks += landmarks[frame_idx].mean(axis=0) - smoothed_landmarks.mean(axis=0) + transformed_frame, transformed_landmarks = self.affine_transform(frame,smoothed_landmarks,self.reference,grayscale=self.convert_gray) + patch = cut_patch(transformed_frame, transformed_landmarks[self.start_idx:self.stop_idx], self.crop_height//2, self.crop_width//2,) + sequence.append(patch) + return np.array(sequence) + + + def interpolate_landmarks(self, landmarks): + valid_frames_idx = [idx for idx, lm in enumerate(landmarks) if lm is not None] + + if not valid_frames_idx: + return None + + for idx in range(1, len(valid_frames_idx)): + if valid_frames_idx[idx] - valid_frames_idx[idx - 1] > 1: + landmarks = linear_interpolate(landmarks, valid_frames_idx[idx - 1], valid_frames_idx[idx]) + + valid_frames_idx = [idx for idx, lm in enumerate(landmarks) if lm is not None] + + # Handle corner case: keep frames at the beginning or at the end that failed to be detected + if valid_frames_idx: + landmarks[:valid_frames_idx[0]] = [landmarks[valid_frames_idx[0]]] * valid_frames_idx[0] + landmarks[valid_frames_idx[-1]:] = [landmarks[valid_frames_idx[-1]]] * (len(landmarks) - valid_frames_idx[-1]) + + assert all(lm is not None for lm in landmarks), "not every frame has landmark" + + return landmarks + + + def affine_transform(self, frame, landmarks, reference, grayscale=False, + target_size=(256, 256), reference_size=(256, 256), stable_points=(0, 1, 2, 3), + interpolation=cv2.INTER_LINEAR, border_mode=cv2.BORDER_CONSTANT, border_value=0): + if grayscale: + frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY) + stable_reference = self.get_stable_reference(reference, reference_size, target_size) + transform = self.estimate_affine_transform(landmarks, stable_points, stable_reference) + transformed_frame, transformed_landmarks = self.apply_affine_transform(frame, landmarks, transform, target_size, interpolation, border_mode, border_value) + + return transformed_frame, transformed_landmarks + + + def get_stable_reference(self, reference, reference_size, target_size): + # -- right eye, left eye, nose tip, mouth center + stable_reference = np.vstack([ + np.mean(reference[36:42], axis=0), + np.mean(reference[42:48], axis=0), + np.mean(reference[31:36], axis=0), + np.mean(reference[48:68], axis=0) + ]) + stable_reference[:, 0] -= (reference_size[0] - target_size[0]) / 2.0 + stable_reference[:, 1] -= (reference_size[1] - target_size[1]) / 2.0 + return stable_reference + + + def estimate_affine_transform(self, landmarks, stable_points, stable_reference): + return cv2.estimateAffinePartial2D(np.vstack([landmarks[x] for x in stable_points]), stable_reference, method=cv2.LMEDS)[0] + + + def apply_affine_transform(self, frame, landmarks, transform, target_size, interpolation, border_mode, border_value): + transformed_frame = cv2.warpAffine(frame, transform, dsize=(target_size[0], target_size[1]), + flags=interpolation, borderMode=border_mode, borderValue=border_value) + transformed_landmarks = np.matmul(landmarks, transform[:, :2].transpose()) + transform[:, 2].transpose() + return transformed_frame, transformed_landmarks diff --git a/pipelines/detectors/retinaface/20words_mean_face.npy b/pipelines/detectors/retinaface/20words_mean_face.npy new file mode 100755 index 0000000000000000000000000000000000000000..fc5cd3103270737752bebaec497c39b49b2af970 --- /dev/null +++ b/pipelines/detectors/retinaface/20words_mean_face.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbf68b2044171e1160716df7c53e8bbfaa0ee8c61fb41171d04cb6092bb81422 +size 1168 diff --git a/pipelines/detectors/retinaface/detector.py b/pipelines/detectors/retinaface/detector.py new file mode 100644 index 0000000000000000000000000000000000000000..ad699adf4b41b8f3eb269b8d9def4d89c87e9e09 --- /dev/null +++ b/pipelines/detectors/retinaface/detector.py @@ -0,0 +1,38 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright 2021 Imperial College London (Pingchuan Ma) +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +import warnings +import torchvision +from ibug.face_detection import RetinaFacePredictor +from ibug.face_alignment import FANPredictor +warnings.filterwarnings("ignore") + + +class LandmarksDetector: + def __init__(self, device="cuda:0", model_name='resnet50'): + self.face_detector = RetinaFacePredictor( + device=device, + threshold=0.8, + model=RetinaFacePredictor.get_model(model_name) + ) + self.landmark_detector = FANPredictor(device=device, model=None) + + def __call__(self, filename): + video_frames = torchvision.io.read_video(filename, pts_unit='sec')[0].numpy() + landmarks = [] + for frame in video_frames: + detected_faces = self.face_detector(frame, rgb=False) + face_points, _ = self.landmark_detector(frame, detected_faces, rgb=True) + if len(detected_faces) == 0: + landmarks.append(None) + else: + max_id, max_size = 0, 0 + for idx, bbox in enumerate(detected_faces): + bbox_size = (bbox[2] - bbox[0]) + (bbox[3] - bbox[1]) + if bbox_size > max_size: + max_id, max_size = idx, bbox_size + landmarks.append(face_points[max_id]) + return landmarks diff --git a/pipelines/detectors/retinaface/video_process.py b/pipelines/detectors/retinaface/video_process.py new file mode 100644 index 0000000000000000000000000000000000000000..383bd12c3e073a40309999c6b633f10501014ca8 --- /dev/null +++ b/pipelines/detectors/retinaface/video_process.py @@ -0,0 +1,136 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright 2023 Imperial College London (Pingchuan Ma) +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +import os +import cv2 +import numpy as np +from skimage import transform as tf + + +def linear_interpolate(landmarks, start_idx, stop_idx): + start_landmarks = landmarks[start_idx] + stop_landmarks = landmarks[stop_idx] + delta = stop_landmarks - start_landmarks + for idx in range(1, stop_idx-start_idx): + landmarks[start_idx+idx] = start_landmarks + idx/float(stop_idx-start_idx) * delta + return landmarks + + +def warp_img(src, dst, img, std_size): + tform = tf.estimate_transform('similarity', src, dst) + warped = tf.warp(img, inverse_map=tform.inverse, output_shape=std_size) + warped = (warped * 255).astype('uint8') + return warped, tform + + +def apply_transform(transform, img, std_size): + warped = tf.warp(img, inverse_map=transform.inverse, output_shape=std_size) + warped = (warped * 255).astype('uint8') + return warped + + +def cut_patch(img, landmarks, height, width, threshold=5): + center_x, center_y = np.mean(landmarks, axis=0) + # Check for too much bias in height and width + if abs(center_y - img.shape[0] / 2) > height + threshold: + raise Exception('too much bias in height') + if abs(center_x - img.shape[1] / 2) > width + threshold: + raise Exception('too much bias in width') + # Calculate bounding box coordinates + y_min = int(round(np.clip(center_y - height, 0, img.shape[0]))) + y_max = int(round(np.clip(center_y + height, 0, img.shape[0]))) + x_min = int(round(np.clip(center_x - width, 0, img.shape[1]))) + x_max = int(round(np.clip(center_x + width, 0, img.shape[1]))) + # Cut the image + cutted_img = np.copy(img[y_min:y_max, x_min:x_max]) + return cutted_img + + +class VideoProcess: + def __init__(self, mean_face_path="20words_mean_face.npy", crop_width=96, crop_height=96, + start_idx=48, stop_idx=68, window_margin=12, convert_gray=True): + self.reference = np.load(os.path.join(os.path.dirname(__file__), mean_face_path)) + self.crop_width = crop_width + self.crop_height = crop_height + self.start_idx = start_idx + self.stop_idx = stop_idx + self.window_margin = window_margin + self.convert_gray = convert_gray + + def __call__(self, video, landmarks): + # Pre-process landmarks: interpolate frames that are not detected + preprocessed_landmarks = self.interpolate_landmarks(landmarks) + # Exclude corner cases: no landmark in all frames or number of frames is less than window length + if not preprocessed_landmarks or len(preprocessed_landmarks) < self.window_margin: + return + # Affine transformation and crop patch + sequence = self.crop_patch(video, preprocessed_landmarks) + assert sequence is not None, f"cannot crop a patch from {filename}." + return sequence + + + def crop_patch(self, video, landmarks): + sequence = [] + for frame_idx, frame in enumerate(video): + window_margin = min(self.window_margin // 2, frame_idx, len(landmarks) - 1 - frame_idx) + smoothed_landmarks = np.mean([landmarks[x] for x in range(frame_idx - window_margin, frame_idx + window_margin + 1)], axis=0) + smoothed_landmarks += landmarks[frame_idx].mean(axis=0) - smoothed_landmarks.mean(axis=0) + transformed_frame, transformed_landmarks = self.affine_transform(frame,smoothed_landmarks,self.reference,grayscale=self.convert_gray) + patch = cut_patch(transformed_frame, transformed_landmarks[self.start_idx:self.stop_idx], self.crop_height//2, self.crop_width//2,) + sequence.append(patch) + return np.array(sequence) + + + def interpolate_landmarks(self, landmarks): + valid_frames_idx = [idx for idx, lm in enumerate(landmarks) if lm is not None] + + if not valid_frames_idx: + return None + + for idx in range(1, len(valid_frames_idx)): + if valid_frames_idx[idx] - valid_frames_idx[idx - 1] > 1: + landmarks = linear_interpolate(landmarks, valid_frames_idx[idx - 1], valid_frames_idx[idx]) + + valid_frames_idx = [idx for idx, lm in enumerate(landmarks) if lm is not None] + + # Handle corner case: keep frames at the beginning or at the end that failed to be detected + if valid_frames_idx: + landmarks[:valid_frames_idx[0]] = [landmarks[valid_frames_idx[0]]] * valid_frames_idx[0] + landmarks[valid_frames_idx[-1]:] = [landmarks[valid_frames_idx[-1]]] * (len(landmarks) - valid_frames_idx[-1]) + + assert all(lm is not None for lm in landmarks), "not every frame has landmark" + + return landmarks + + + def affine_transform(self, frame, landmarks, reference, grayscale=True, + target_size=(256, 256), reference_size=(256, 256), stable_points=(28, 33, 36, 39, 42, 45, 48, 54), + interpolation=cv2.INTER_LINEAR, border_mode=cv2.BORDER_CONSTANT, border_value=0): + if grayscale: + frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY) + stable_reference = self.get_stable_reference(reference, stable_points, reference_size, target_size) + transform = self.estimate_affine_transform(landmarks, stable_points, stable_reference) + transformed_frame, transformed_landmarks = self.apply_affine_transform(frame, landmarks, transform, target_size, interpolation, border_mode, border_value) + + return transformed_frame, transformed_landmarks + + + def get_stable_reference(self, reference, stable_points, reference_size, target_size): + stable_reference = np.vstack([reference[x] for x in stable_points]) + stable_reference[:, 0] -= (reference_size[0] - target_size[0]) / 2.0 + stable_reference[:, 1] -= (reference_size[1] - target_size[1]) / 2.0 + return stable_reference + + + def estimate_affine_transform(self, landmarks, stable_points, stable_reference): + return cv2.estimateAffinePartial2D(np.vstack([landmarks[x] for x in stable_points]), stable_reference, method=cv2.LMEDS)[0] + + + def apply_affine_transform(self, frame, landmarks, transform, target_size, interpolation, border_mode, border_value): + transformed_frame = cv2.warpAffine(frame, transform, dsize=(target_size[0], target_size[1]), + flags=interpolation, borderMode=border_mode, borderValue=border_value) + transformed_landmarks = np.matmul(landmarks, transform[:, :2].transpose()) + transform[:, 2].transpose() + return transformed_frame, transformed_landmarks diff --git a/pipelines/metrics/measures.py b/pipelines/metrics/measures.py new file mode 100644 index 0000000000000000000000000000000000000000..a28e3626f6e7dc6774018b5ca15026111e7f24f9 --- /dev/null +++ b/pipelines/metrics/measures.py @@ -0,0 +1,43 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright 2021 Imperial College London (Pingchuan Ma) +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +# This code refers https://github.com/espnet/espnet/blob/24c3676a8d4c2e60d2726e9bcd9bdbed740610e0/espnet/nets/e2e_asr_common.py#L213-L249 + +import numpy as np + +def get_wer(s, ref): + return get_er(s.split(), ref.split()) + +def get_cer(s, ref): + return get_er(s.replace(" ", ""), ref.replace(" ", "")) + +def get_er(s, ref): + """ + FROM wikipedia levenshtein distance + s: list of words/char in sentence to measure + ref: list of words/char in reference + """ + + costs = np.zeros((len(s) + 1, len(ref) + 1)) + for i in range(len(s) + 1): + costs[i, 0] = i + for j in range(len(ref) + 1): + costs[0, j] = j + + for j in range(1, len(ref) + 1): + for i in range(1, len(s) + 1): + cost = None + if s[i-1] == ref[j-1]: + cost = 0 + else: + cost = 1 + costs[i,j] = min( + costs[i-1, j] + 1, + costs[i, j-1] + 1, + costs[i-1, j-1] + cost + ) + + return costs[-1,-1] / len(ref) diff --git a/pipelines/model.py b/pipelines/model.py new file mode 100644 index 0000000000000000000000000000000000000000..515987ab5f87f08727785218247064b2b7d23405 --- /dev/null +++ b/pipelines/model.py @@ -0,0 +1,99 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright 2023 Imperial College London (Pingchuan Ma) +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +import os +import json +import torch +import argparse +import numpy as np + +from espnet.asr.asr_utils import torch_load +from espnet.asr.asr_utils import get_model_conf +from espnet.asr.asr_utils import add_results_to_json +from espnet.nets.batch_beam_search import BatchBeamSearch +from espnet.nets.lm_interface import dynamic_import_lm +from espnet.nets.scorers.length_bonus import LengthBonus +from espnet.nets.pytorch_backend.e2e_asr_transformer import E2E + + +class AVSR(torch.nn.Module): + def __init__(self, modality, model_path, model_conf, rnnlm=None, rnnlm_conf=None, + penalty=0., ctc_weight=0.1, lm_weight=0., beam_size=40, device="cuda:0"): + super(AVSR, self).__init__() + self.device = device + + if modality == "audiovisual": + from espnet.nets.pytorch_backend.e2e_asr_transformer_av import E2E + else: + from espnet.nets.pytorch_backend.e2e_asr_transformer import E2E + + with open(model_conf, "rb") as f: + confs = json.load(f) + args = confs if isinstance(confs, dict) else confs[2] + self.train_args = argparse.Namespace(**args) + + labels_type = getattr(self.train_args, "labels_type", "char") + if labels_type == "char": + self.token_list = self.train_args.char_list + elif labels_type == "unigram5000": + file_path = os.path.join(os.path.dirname(__file__), "tokens", "unigram5000_units.txt") + self.token_list = [''] + [word.split()[0] for word in open(file_path).read().splitlines()] + [''] + self.odim = len(self.token_list) + + self.model = E2E(self.odim, self.train_args) + self.model.load_state_dict(torch.load(model_path, map_location=lambda storage, loc: storage)) + self.model.to(device=self.device).eval() + + self.beam_search = get_beam_search_decoder(self.model, self.token_list, rnnlm, rnnlm_conf, penalty, ctc_weight, lm_weight, beam_size) + self.beam_search.to(device=self.device).eval() + + def infer(self, data): + with torch.no_grad(): + if isinstance(data, tuple): + enc_feats = self.model.encode(data[0].to(self.device), data[1].to(self.device)) + else: + enc_feats = self.model.encode(data.to(self.device)) + nbest_hyps = self.beam_search(enc_feats) + nbest_hyps = [h.asdict() for h in nbest_hyps[: min(len(nbest_hyps), 1)]] + transcription = add_results_to_json(nbest_hyps, self.token_list) + transcription = transcription.replace("▁", " ").strip() + return transcription.replace("", "") + + +def get_beam_search_decoder(model, token_list, rnnlm=None, rnnlm_conf=None, penalty=0, ctc_weight=0.1, lm_weight=0., beam_size=40): + sos = model.odim - 1 + eos = model.odim - 1 + scorers = model.scorers() + + if not rnnlm: + lm = None + else: + lm_args = get_model_conf(rnnlm, rnnlm_conf) + lm_model_module = getattr(lm_args, "model_module", "default") + lm_class = dynamic_import_lm(lm_model_module, lm_args.backend) + lm = lm_class(len(token_list), lm_args) + torch_load(rnnlm, lm) + lm.eval() + + scorers["lm"] = lm + scorers["length_bonus"] = LengthBonus(len(token_list)) + weights = dict( + decoder=1.0 - ctc_weight, + ctc=ctc_weight, + lm=lm_weight, + length_bonus=penalty, + ) + + return BatchBeamSearch( + beam_size=beam_size, + vocab_size=len(token_list), + weights=weights, + scorers=scorers, + sos=sos, + eos=eos, + token_list=token_list, + pre_beam_score_key=None if ctc_weight == 1.0 else "decoder", + ) diff --git a/pipelines/pipeline.py b/pipelines/pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..94e620bdb69690ab9c4b78bd456d19586115b6e4 --- /dev/null +++ b/pipelines/pipeline.py @@ -0,0 +1,73 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright 2023 Imperial College London (Pingchuan Ma) +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +import os +import torch +import pickle +from configparser import ConfigParser + +from pipelines.model import AVSR +from pipelines.data.data_module import AVSRDataLoader + + +class InferencePipeline(torch.nn.Module): + def __init__(self, config_filename, detector="retinaface", face_track=False, device="cuda:0"): + super(InferencePipeline, self).__init__() + assert os.path.isfile(config_filename), f"config_filename: {config_filename} does not exist." + + config = ConfigParser() + config.read(config_filename) + + # modality configuration + modality = config.get("input", "modality") + + self.modality = modality + # data configuration + input_v_fps = config.getfloat("input", "v_fps") + model_v_fps = config.getfloat("model", "v_fps") + + # model configuration + model_path = config.get("model","model_path") + model_conf = config.get("model","model_conf") + + # language model configuration + rnnlm = config.get("model", "rnnlm") + rnnlm_conf = config.get("model", "rnnlm_conf") + penalty = config.getfloat("decode", "penalty") + ctc_weight = config.getfloat("decode", "ctc_weight") + lm_weight = config.getfloat("decode", "lm_weight") + beam_size = config.getint("decode", "beam_size") + + self.dataloader = AVSRDataLoader(modality, speed_rate=input_v_fps/model_v_fps, detector=detector) + self.model = AVSR(modality, model_path, model_conf, rnnlm, rnnlm_conf, penalty, ctc_weight, lm_weight, beam_size, device) + if face_track and self.modality in ["video", "audiovisual"]: + if detector == "mediapipe": + from pipelines.detectors.mediapipe.detector import LandmarksDetector + self.landmarks_detector = LandmarksDetector() + if detector == "retinaface": + from pipelines.detectors.retinaface.detector import LandmarksDetector + self.landmarks_detector = LandmarksDetector(device="cuda:0") + else: + self.landmarks_detector = None + + + def process_landmarks(self, data_filename, landmarks_filename): + if self.modality == "audio": + return None + if self.modality in ["video", "audiovisual"]: + if isinstance(landmarks_filename, str): + landmarks = pickle.load(open(landmarks_filename, "rb")) + else: + landmarks = self.landmarks_detector(data_filename) + return landmarks + + + def forward(self, data_filename, landmarks_filename=None): + assert os.path.isfile(data_filename), f"data_filename: {data_filename} does not exist." + landmarks = self.process_landmarks(data_filename, landmarks_filename) + data = self.dataloader.load_data(data_filename, landmarks) + transcript = self.model.infer(data) + return transcript \ No newline at end of file diff --git a/pipelines/tokens/unigram5000_units.txt b/pipelines/tokens/unigram5000_units.txt new file mode 100755 index 0000000000000000000000000000000000000000..1b5c74fc7cfded197692e4631fd894a3ba275da1 --- /dev/null +++ b/pipelines/tokens/unigram5000_units.txt @@ -0,0 +1,5047 @@ + 1 +' 2 +0 3 +00 4 +000 5 +0000 6 +1 7 +2 8 +3 9 +4 10 +44 11 +46 12 +46664 13 +467 14 +47 15 +474 16 +47748 17 +48 18 +484 19 +5 20 +6 21 +64 22 +646 23 +647 24 +66 25 +664 26 +67 27 +677 28 +68 29 +687 30 +688 31 +7 32 +74 33 +747 34 +76 35 +766 36 +77 37 +776 38 +777 39 +78 40 +7864 41 +787 42 +8 43 +84 44 +847 45 +848 46 +86 47 +864 48 +867 49 +87 50 +874 51 +876 52 +877 53 +878 54 +88 55 +884 56 +886 57 +887 58 +888 59 +9 60 +A 61 +AB 62 +ABILITY 63 +ABLE 64 +ABLY 65 +AC 66 +ACH 67 +ACTIVE 68 +AD 69 +ADE 70 +AFT 71 +AG 72 +AGE 73 +AH 74 +AI 75 +AIN 76 +AK 77 +AKE 78 +AL 79 +ALLY 80 +AM 81 +AN 82 +ANA 83 +ANCE 84 +ANG 85 +ANT 86 +AP 87 +AR 88 +ARD 89 +ARIAN 90 +ARIES 91 +ARILY 92 +ARY 93 +AS 94 +AT 95 +ATE 96 +ATED 97 +ATH 98 +ATING 99 +ATION 100 +ATIVE 101 +ATOMIC 102 +AU 103 +AW 104 +AY 105 +B 106 +BA 107 +BACK 108 +BBIE 109 +BE 110 +BER 111 +BI 112 +BLE 113 +BO 114 +BOARD 115 +BOOK 116 +BORN 117 +BOX 118 +BRA 119 +BU 120 +BURG 121 +BY 122 +C 123 +CA 124 +CAR 125 +CC 126 +CE 127 +CENT 128 +CH 129 +CHE 130 +CI 131 +CK 132 +CL 133 +CLA 134 +CO 135 +COM 136 +CON 137 +CONVENIENT 138 +CR 139 +CRA 140 +CT 141 +CUBA 142 +CUL 143 +CY 144 +D 145 +DA 146 +DDING 147 +DE 148 +DEN 149 +DI 150 +DO 151 +DUCT 152 +DY 153 +E 154 +EA 155 +ECT 156 +ED 157 +EF 158 +EG 159 +EL 160 +EN 161 +ENCE 162 +ENCY 163 +ENT 164 +ENTREPRENEURSHIP 165 +EP 166 +ER 167 +ERS 168 +ES 169 +EST 170 +ET 171 +EV 172 +EX 173 +EY 174 +F 175 +FA 176 +FE 177 +FECTED 178 +FERENCE 179 +FF 180 +FI 181 +FIELD 182 +FLEX 183 +FLOW 184 +FOR 185 +FU 186 +FUL 187 +G 188 +GA 189 +GE 190 +GED 191 +GER 192 +GGED 193 +GGLE 194 +GIE 195 +GING 196 +GO 197 +GRAD 198 +GRAM 199 +GUA 200 +H 201 +HA 202 +HAN 203 +HE 204 +HEAD 205 +HER 206 +HI 207 +HO 208 +HOLD 209 +HOOD 210 +HOUSE 211 +HREW 212 +HUMAN 213 +HY 214 +I 215 +IA 216 +IAL 217 +IAN 218 +IBILITY 219 +IBLE 220 +IBLY 221 +IC 222 +ICAL 223 +ICALLY 224 +ICK 225 +ID 226 +IDE 227 +IE 228 +IER 229 +IES 230 +IF 231 +IFICATION 232 +IFIED 233 +IFY 234 +IG 235 +IGHT 236 +IL 237 +ILE 238 +ILL 239 +IN 240 +INA 241 +INE 242 +INESS 243 +ING 244 +INTENDED 245 +IO 246 +ION 247 +IOUS 248 +IP 249 +IR 250 +IS 251 +ISE 252 +ISH 253 +ISM 254 +IST 255 +ISTIC 256 +ISTS 257 +IT 258 +ITE 259 +ITIES 260 +ITION 261 +ITUDE 262 +ITY 263 +IUM 264 +IV 265 +IVE 266 +IZATION 267 +IZE 268 +IZED 269 +IZING 270 +J 271 +JA 272 +K 273 +KA 274 +KE 275 +KER 276 +KI 277 +KIN 278 +KING 279 +KU 280 +L 281 +LA 282 +LAN 283 +LAND 284 +LD 285 +LE 286 +LED 287 +LER 288 +LES 289 +LESS 290 +LESSNESS 291 +LEY 292 +LI 293 +LIA 294 +LIC 295 +LIE 296 +LIGHT 297 +LIN 298 +LINE 299 +LING 300 +LIT 301 +LL 302 +LLY 303 +LO 304 +LOG 305 +LOR 306 +LU 307 +LY 308 +M 309 +MA 310 +MAN 311 +ME 312 +MEN 313 +MENT 314 +METER 315 +MI 316 +MO 317 +N 318 +NA 319 +ND 320 +NDER 321 +NE 322 +NED 323 +NER 324 +NESS 325 +NG 326 +NGEST 327 +NI 328 +NIC 329 +NING 330 +NO 331 +NS 332 +NT 333 +NY 334 +O 335 +OG 336 +OGRAPH 337 +OK 338 +OL 339 +OLOGICAL 340 +OLOGIST 341 +OLOGY 342 +ON 343 +ONE 344 +OO 345 +OP 346 +OR 347 +ORS 348 +ORY 349 +OS 350 +OSE 351 +OSIS 352 +OT 353 +OU 354 +OUGHT 355 +OUND 356 +OUR 357 +OUS 358 +OUT 359 +OV 360 +OVER 361 +OW 362 +P 363 +PA 364 +PART 365 +PATHETIC 366 +PE 367 +PED 368 +PER 369 +PH 370 +PHOBIA 371 +PI 372 +PING 373 +PLACE 374 +PLAY 375 +PO 376 +POINT 377 +PORT 378 +POWER 379 +PP 380 +PR 381 +PRO 382 +PS 383 +PT 384 +Q 385 +QUA 386 +QUE 387 +R 388 +RA 389 +RAC 390 +RAN 391 +RD 392 +RE 393 +RESPONSIBILITIES 394 +RI 395 +RIB 396 +RIDGE 397 +RIN 398 +RING 399 +RK 400 +RO 401 +RON 402 +ROOM 403 +RS 404 +RU 405 +RY 406 +S 407 +SA 408 +SCRIPT 409 +SE 410 +SEMBL 411 +SH 412 +SHIP 413 +SIDE 414 +SOME 415 +SON 416 +SOURCING 417 +SPIRATION 418 +STAND 419 +STER 420 +STONE 421 +STREAM 422 +STRO 423 +STRUCK 424 +T 425 +TA 426 +TAN 427 +TE 428 +TEN 429 +TER 430 +TH 431 +THE 432 +THER 433 +TI 434 +TIC 435 +TIME 436 +TION 437 +TO 438 +TON 439 +TOP 440 +TOR 441 +TRA 442 +TRI 443 +TRIC 444 +TRIES 445 +TTING 446 +TURING 447 +TY 448 +U 449 +UAL 450 +UB 451 +UC 452 +UD 453 +UE 454 +UFF 455 +UG 456 +UGHT 457 +UIT 458 +UL 459 +ULATE 460 +UM 461 +UN 462 +UND 463 +UNG 464 +UP 465 +UR 466 +URE 467 +US 468 +USE 469 +UT 470 +V 471 +VA 472 +VAL 473 +VAN 474 +VE 475 +VER 476 +VERSE 477 +VERSION 478 +VI 479 +VILLE 480 +VING 481 +VISIBILITY 482 +W 483 +WA 484 +WARD 485 +WATER 486 +WAY 487 +WE 488 +WELL 489 +WI 490 +WN 491 +WOOD 492 +WORK 493 +WORTH 494 +X 495 +Y 496 +Z 497 +ZA 498 +ZE 499 +ZZ 500 +▁ 501 +▁1 502 +▁10 503 +▁100 504 +▁1000 505 +▁10000 506 +▁100000 507 +▁11 508 +▁12 509 +▁13 510 +▁15 511 +▁19 512 +▁1950 513 +▁199 514 +▁1990 515 +▁2 516 +▁20 517 +▁200 518 +▁2000 519 +▁2009 520 +▁201 521 +▁2010 522 +▁2011 523 +▁2012 524 +▁2013 525 +▁2015 526 +▁2050 527 +▁21 528 +▁22 529 +▁23 530 +▁25 531 +▁3 532 +▁30 533 +▁300 534 +▁3000 535 +▁35 536 +▁5 537 +▁50 538 +▁500 539 +▁9 540 +▁90 541 +▁95 542 +▁99 543 +▁A 544 +▁ABANDON 545 +▁ABILITIES 546 +▁ABILITY 547 +▁ABLE 548 +▁ABOUT 549 +▁ABOVE 550 +▁ABRAHAM 551 +▁ABROAD 552 +▁ABSENCE 553 +▁ABSOLUTE 554 +▁ABSOLUTELY 555 +▁ABSORB 556 +▁ABSTRACT 557 +▁ABSURD 558 +▁ABUNDAN 559 +▁ABUSE 560 +▁ABUSI 561 +▁ACADEMIC 562 +▁ACADEMY 563 +▁ACCELERATE 564 +▁ACCENT 565 +▁ACCEPT 566 +▁ACCEPTANCE 567 +▁ACCEPTED 568 +▁ACCESS 569 +▁ACCESSIBLE 570 +▁ACCIDENT 571 +▁ACCOMPLISH 572 +▁ACCORDING 573 +▁ACCOUNT 574 +▁ACCUMULAT 575 +▁ACCURACY 576 +▁ACCURATE 577 +▁ACCUS 578 +▁ACHIEVE 579 +▁ACHIEVEMENT 580 +▁ACHIEVING 581 +▁ACID 582 +▁ACKNOWLEDGE 583 +▁ACQUIRE 584 +▁ACROSS 585 +▁ACT 586 +▁ACTION 587 +▁ACTIONS 588 +▁ACTIVAT 589 +▁ACTIVATE 590 +▁ACTIVE 591 +▁ACTIVISM 592 +▁ACTIVIST 593 +▁ACTIVITIES 594 +▁ACTIVITY 595 +▁ACTOR 596 +▁ACTUAL 597 +▁ACTUALLY 598 +▁ADAM 599 +▁ADAPT 600 +▁ADD 601 +▁ADDED 602 +▁ADDICT 603 +▁ADDICTION 604 +▁ADDITION 605 +▁ADDITIONAL 606 +▁ADDRESS 607 +▁ADEQUATE 608 +▁ADHD 609 +▁ADJUST 610 +▁ADMINISTRATION 611 +▁ADMIT 612 +▁ADMITTED 613 +▁ADOLESCENT 614 +▁ADOPT 615 +▁ADULT 616 +▁ADULTHOOD 617 +▁ADULTS 618 +▁ADVANCE 619 +▁ADVANCED 620 +▁ADVANTAGE 621 +▁ADVENTURE 622 +▁ADVERSITY 623 +▁ADVERTISING 624 +▁ADVICE 625 +▁ADVISE 626 +▁ADVOCATE 627 +▁AESTHETIC 628 +▁AFFAIR 629 +▁AFFECT 630 +▁AFFECTED 631 +▁AFFIRM 632 +▁AFFORD 633 +▁AFGHAN 634 +▁AFGHANISTAN 635 +▁AFRAID 636 +▁AFRICA 637 +▁AFRICAN 638 +▁AFTER 639 +▁AFTERNOON 640 +▁AFTERWARDS 641 +▁AGAIN 642 +▁AGAINST 643 +▁AGE 644 +▁AGENCIES 645 +▁AGENCY 646 +▁AGENDA 647 +▁AGENT 648 +▁AGGREGAT 649 +▁AGGRESSIVE 650 +▁AGO 651 +▁AGREE 652 +▁AGREED 653 +▁AGRICULTURAL 654 +▁AGRICULTURE 655 +▁AHEAD 656 +▁AID 657 +▁AIM 658 +▁AIR 659 +▁AIRPLANE 660 +▁AIRPORT 661 +▁AL 662 +▁ALARM 663 +▁ALBERT 664 +▁ALBUM 665 +▁ALCOHOL 666 +▁ALGAE 667 +▁ALGORITHM 668 +▁ALIEN 669 +▁ALIGN 670 +▁ALIKE 671 +▁ALIVE 672 +▁ALL 673 +▁ALLOW 674 +▁ALLOWED 675 +▁ALLOWING 676 +▁ALLOWS 677 +▁ALMOST 678 +▁ALONE 679 +▁ALONG 680 +▁ALREADY 681 +▁ALRIGHT 682 +▁ALSO 683 +▁ALTER 684 +▁ALTERNATIVE 685 +▁ALTHOUGH 686 +▁ALTOGETHER 687 +▁ALWAYS 688 +▁ALZHEIMER 689 +▁AM 690 +▁AMAZED 691 +▁AMAZING 692 +▁AMAZON 693 +▁AMBASSADOR 694 +▁AMBITION 695 +▁AMBITIOUS 696 +▁AMBULANCE 697 +▁AMERICA 698 +▁AMERICAN 699 +▁AMERICANS 700 +▁AMONG 701 +▁AMONGST 702 +▁AMOUNT 703 +▁AMP 704 +▁AN 705 +▁ANALOG 706 +▁ANALYSIS 707 +▁ANALYTIC 708 +▁ANALYZE 709 +▁ANATOMY 710 +▁ANCESTORS 711 +▁ANCIENT 712 +▁AND 713 +▁ANGEL 714 +▁ANGELES 715 +▁ANGER 716 +▁ANGLE 717 +▁ANGRY 718 +▁ANIMAL 719 +▁ANIMALS 720 +▁ANIMATION 721 +▁ANNOUNCED 722 +▁ANNUAL 723 +▁ANONYMOUS 724 +▁ANOTHER 725 +▁ANSWER 726 +▁ANSWERS 727 +▁ANTI 728 +▁ANTIBIOTIC 729 +▁ANTICIPATE 730 +▁ANTIMATTER 731 +▁ANXIETY 732 +▁ANXIOUS 733 +▁ANY 734 +▁ANYBODY 735 +▁ANYMORE 736 +▁ANYONE 737 +▁ANYTHING 738 +▁ANYWAY 739 +▁ANYWHERE 740 +▁APART 741 +▁APARTMENT 742 +▁APOLOGIZE 743 +▁APP 744 +▁APPARENTLY 745 +▁APPEAL 746 +▁APPEAR 747 +▁APPEARANCE 748 +▁APPLAUSE 749 +▁APPLE 750 +▁APPLICATION 751 +▁APPLIED 752 +▁APPLIES 753 +▁APPLY 754 +▁APPOINTMENT 755 +▁APPRECIATE 756 +▁APPROACH 757 +▁APPROPRIATE 758 +▁APPROVAL 759 +▁APPROXIMATE 760 +▁APRIL 761 +▁ARAB 762 +▁ARCH 763 +▁ARCHAEOLOG 764 +▁ARCHITECT 765 +▁ARCHITECTURE 766 +▁ARCTIC 767 +▁ARE 768 +▁AREA 769 +▁AREAS 770 +▁AREN 771 +▁ARGUE 772 +▁ARGUMENT 773 +▁ARISE 774 +▁ARM 775 +▁ARMY 776 +▁AROUND 777 +▁ARRANGE 778 +▁ARREST 779 +▁ARRIV 780 +▁ARRIVE 781 +▁ARRIVED 782 +▁ARROW 783 +▁ART 784 +▁ARTICLE 785 +▁ARTIFICIAL 786 +▁ARTIST 787 +▁AS 788 +▁ASHAMED 789 +▁ASK 790 +▁ASKED 791 +▁ASKING 792 +▁ASLEEP 793 +▁ASPECT 794 +▁ASSAULT 795 +▁ASSEMBLE 796 +▁ASSESS 797 +▁ASSET 798 +▁ASSIGNMENT 799 +▁ASSISTANCE 800 +▁ASSISTANT 801 +▁ASSOCIATE 802 +▁ASSOCIATED 803 +▁ASSOCIATION 804 +▁ASSUME 805 +▁ASSUMPTION 806 +▁ASTEROID 807 +▁ASTONISHING 808 +▁ASTRONAUT 809 +▁ASTRONOMER 810 +▁AT 811 +▁ATHLETE 812 +▁ATHLETIC 813 +▁ATLANTIC 814 +▁ATMOSPHERE 815 +▁ATOMS 816 +▁ATTACH 817 +▁ATTACK 818 +▁ATTEMPT 819 +▁ATTEND 820 +▁ATTENTION 821 +▁ATTITUDE 822 +▁ATTORNEY 823 +▁ATTRACT 824 +▁ATTRACTIVE 825 +▁ATTRIBUTE 826 +▁AUDIENCE 827 +▁AUDIO 828 +▁AUDIT 829 +▁AUGMENT 830 +▁AUGUST 831 +▁AUNT 832 +▁AUSTRALIA 833 +▁AUTHENTIC 834 +▁AUTHOR 835 +▁AUTHORITY 836 +▁AUTISM 837 +▁AUTISTIC 838 +▁AUTO 839 +▁AUTOMATIC 840 +▁AUTOMATICALLY 841 +▁AUTONOMOUS 842 +▁AVAILABLE 843 +▁AVENUE 844 +▁AVERAGE 845 +▁AVOID 846 +▁AWAKE 847 +▁AWARE 848 +▁AWARENESS 849 +▁AWAY 850 +▁AWESOME 851 +▁AWFUL 852 +▁AWKWARD 853 +▁B 854 +▁BA 855 +▁BABIES 856 +▁BABY 857 +▁BACK 858 +▁BACKGROUND 859 +▁BACKPACK 860 +▁BACKWARDS 861 +▁BACKYARD 862 +▁BACTERIA 863 +▁BAD 864 +▁BAG 865 +▁BAKE 866 +▁BALANCE 867 +▁BALL 868 +▁BALLET 869 +▁BALLOON 870 +▁BALTIMORE 871 +▁BAN 872 +▁BANANA 873 +▁BAND 874 +▁BANG 875 +▁BANK 876 +▁BAR 877 +▁BARELY 878 +▁BARRIER 879 +▁BASE 880 +▁BASEBALL 881 +▁BASED 882 +▁BASIC 883 +▁BASICALLY 884 +▁BASIS 885 +▁BASKETBALL 886 +▁BATHROOM 887 +▁BATTERY 888 +▁BATTLE 889 +▁BE 890 +▁BEACH 891 +▁BEAR 892 +▁BEAT 893 +▁BEAUTIFUL 894 +▁BEAUTY 895 +▁BECAME 896 +▁BECAUSE 897 +▁BECOME 898 +▁BECOMES 899 +▁BECOMING 900 +▁BED 901 +▁BEDROOM 902 +▁BEEN 903 +▁BEFORE 904 +▁BEGAN 905 +▁BEGIN 906 +▁BEGINNING 907 +▁BEGUN 908 +▁BEHALF 909 +▁BEHAVE 910 +▁BEHAVIOR 911 +▁BEHAVIORAL 912 +▁BEHAVIORS 913 +▁BEHAVIOUR 914 +▁BEHIND 915 +▁BEING 916 +▁BEINGS 917 +▁BELIEF 918 +▁BELIEFS 919 +▁BELIEVE 920 +▁BELIEVED 921 +▁BELIEVING 922 +▁BELONG 923 +▁BELOW 924 +▁BENEFICIAL 925 +▁BENEFIT 926 +▁BENEFITS 927 +▁BEST 928 +▁BETTER 929 +▁BETWEEN 930 +▁BEYOND 931 +▁BI 932 +▁BIAS 933 +▁BICYCLE 934 +▁BIG 935 +▁BIGGER 936 +▁BIGGEST 937 +▁BIKE 938 +▁BILL 939 +▁BILLION 940 +▁BIN 941 +▁BINARY 942 +▁BIO 943 +▁BIODIVERSITY 944 +▁BIOLOGICAL 945 +▁BIOLOGIST 946 +▁BIOLOGY 947 +▁BIRD 948 +▁BIRDS 949 +▁BIRMINGHAM 950 +▁BIRTH 951 +▁BIRTHDAY 952 +▁BISEXUAL 953 +▁BIT 954 +▁BITCOIN 955 +▁BIZARRE 956 +▁BLA 957 +▁BLACK 958 +▁BLAME 959 +▁BLANK 960 +▁BLEND 961 +▁BLESS 962 +▁BLIND 963 +▁BLINK 964 +▁BLOCK 965 +▁BLOG 966 +▁BLOOD 967 +▁BLOW 968 +▁BLUE 969 +▁BLUR 970 +▁BO 971 +▁BOARD 972 +▁BOAT 973 +▁BODIES 974 +▁BODY 975 +▁BOLD 976 +▁BOMB 977 +▁BOND 978 +▁BONE 979 +▁BONUS 980 +▁BOO 981 +▁BOOK 982 +▁BOOKS 983 +▁BORDER 984 +▁BORED 985 +▁BORING 986 +▁BORN 987 +▁BORROW 988 +▁BOSS 989 +▁BOSTON 990 +▁BOTH 991 +▁BOTHER 992 +▁BOTTLE 993 +▁BOTTOM 994 +▁BOUGHT 995 +▁BOUNC 996 +▁BOUND 997 +▁BOUNDARIES 998 +▁BOW 999 +▁BOWL 1000 +▁BOX 1001 +▁BOXES 1002 +▁BOY 1003 +▁BOYFRIEND 1004 +▁BOYS 1005 +▁BR 1006 +▁BRA 1007 +▁BRAIN 1008 +▁BRAINS 1009 +▁BRANCH 1010 +▁BRAND 1011 +▁BRAVE 1012 +▁BRAZIL 1013 +▁BREAD 1014 +▁BREAK 1015 +▁BREAKFAST 1016 +▁BREAKTHROUGH 1017 +▁BREAST 1018 +▁BREATH 1019 +▁BREATHE 1020 +▁BREATHING 1021 +▁BREED 1022 +▁BRIDGE 1023 +▁BRIEF 1024 +▁BRIGHT 1025 +▁BRILLIANT 1026 +▁BRING 1027 +▁BRINGING 1028 +▁BRITAIN 1029 +▁BRITISH 1030 +▁BRO 1031 +▁BROAD 1032 +▁BROADCAST 1033 +▁BROKE 1034 +▁BROKEN 1035 +▁BROTHER 1036 +▁BROUGHT 1037 +▁BROWN 1038 +▁BRUSH 1039 +▁BRUTAL 1040 +▁BU 1041 +▁BUBBLE 1042 +▁BUCK 1043 +▁BUDGET 1044 +▁BUG 1045 +▁BUILD 1046 +▁BUILDING 1047 +▁BUILT 1048 +▁BULB 1049 +▁BULLET 1050 +▁BULLIED 1051 +▁BULLYING 1052 +▁BUMP 1053 +▁BUNCH 1054 +▁BUR 1055 +▁BURDEN 1056 +▁BURIED 1057 +▁BURN 1058 +▁BUS 1059 +▁BUSINESS 1060 +▁BUSINESSES 1061 +▁BUSY 1062 +▁BUT 1063 +▁BUTTON 1064 +▁BUY 1065 +▁BY 1066 +▁C 1067 +▁CA 1068 +▁CAKE 1069 +▁CALCULAT 1070 +▁CALCULATE 1071 +▁CALIFORNIA 1072 +▁CALL 1073 +▁CALLED 1074 +▁CALLING 1075 +▁CALM 1076 +▁CALORIES 1077 +▁CAMBODIA 1078 +▁CAMBRIDGE 1079 +▁CAME 1080 +▁CAMERA 1081 +▁CAMP 1082 +▁CAMPAIGN 1083 +▁CAMPUS 1084 +▁CAN 1085 +▁CANADA 1086 +▁CANCER 1087 +▁CANDIDATE 1088 +▁CANNOT 1089 +▁CAP 1090 +▁CAPABILITIES 1091 +▁CAPABILITY 1092 +▁CAPABLE 1093 +▁CAPACITY 1094 +▁CAPITA 1095 +▁CAPITAL 1096 +▁CAPITALISM 1097 +▁CAPTAIN 1098 +▁CAPTIV 1099 +▁CAPTURE 1100 +▁CAR 1101 +▁CARBON 1102 +▁CARD 1103 +▁CARE 1104 +▁CAREER 1105 +▁CAREFUL 1106 +▁CAREFULLY 1107 +▁CAROLINA 1108 +▁CARRIE 1109 +▁CARRY 1110 +▁CARRYING 1111 +▁CARS 1112 +▁CARTOON 1113 +▁CASE 1114 +▁CASES 1115 +▁CASH 1116 +▁CAST 1117 +▁CAT 1118 +▁CATALYST 1119 +▁CATCH 1120 +▁CATEGORIES 1121 +▁CATEGORY 1122 +▁CATHOLIC 1123 +▁CAUGHT 1124 +▁CAUSE 1125 +▁CAUSED 1126 +▁CAUSES 1127 +▁CAUSING 1128 +▁CAVE 1129 +▁CEILING 1130 +▁CELEBRATE 1131 +▁CELL 1132 +▁CELLS 1133 +▁CENSOR 1134 +▁CENT 1135 +▁CENTER 1136 +▁CENTRAL 1137 +▁CENTURIES 1138 +▁CENTURY 1139 +▁CEO 1140 +▁CEREMONY 1141 +▁CERTAIN 1142 +▁CERTAINLY 1143 +▁CH 1144 +▁CHA 1145 +▁CHAIN 1146 +▁CHAIR 1147 +▁CHALLENGE 1148 +▁CHALLENGES 1149 +▁CHALLENGING 1150 +▁CHAMPION 1151 +▁CHANCE 1152 +▁CHANGE 1153 +▁CHANGED 1154 +▁CHANGES 1155 +▁CHANGING 1156 +▁CHANNEL 1157 +▁CHAOS 1158 +▁CHAPTER 1159 +▁CHARACTER 1160 +▁CHARACTERISTICS 1161 +▁CHARGE 1162 +▁CHARITY 1163 +▁CHARLES 1164 +▁CHART 1165 +▁CHASE 1166 +▁CHEAP 1167 +▁CHEAPER 1168 +▁CHEAT 1169 +▁CHECK 1170 +▁CHEEK 1171 +▁CHEER 1172 +▁CHEESE 1173 +▁CHEMICAL 1174 +▁CHEMISTRY 1175 +▁CHEMOTHERAPY 1176 +▁CHEST 1177 +▁CHEW 1178 +▁CHI 1179 +▁CHICAGO 1180 +▁CHICKEN 1181 +▁CHIEF 1182 +▁CHILD 1183 +▁CHILDHOOD 1184 +▁CHILDREN 1185 +▁CHIMPANZEE 1186 +▁CHINA 1187 +▁CHINESE 1188 +▁CHIP 1189 +▁CHOCOLATE 1190 +▁CHOICE 1191 +▁CHOICES 1192 +▁CHOOSE 1193 +▁CHOOSING 1194 +▁CHOSE 1195 +▁CHOSEN 1196 +▁CHRIS 1197 +▁CHRISTIAN 1198 +▁CHRISTMAS 1199 +▁CHROMOSOME 1200 +▁CHRONIC 1201 +▁CHUNK 1202 +▁CHURCH 1203 +▁CHUTZPAH 1204 +▁CIGARETTE 1205 +▁CINEMA 1206 +▁CIRCLE 1207 +▁CIRCUIT 1208 +▁CIRCULA 1209 +▁CIRCUM 1210 +▁CIRCUMSTANCES 1211 +▁CITIES 1212 +▁CITIZEN 1213 +▁CITIZENS 1214 +▁CITY 1215 +▁CIVIC 1216 +▁CIVIL 1217 +▁CIVILIZATION 1218 +▁CL 1219 +▁CLAIM 1220 +▁CLARITY 1221 +▁CLASS 1222 +▁CLASSES 1223 +▁CLASSIC 1224 +▁CLASSICAL 1225 +▁CLASSMATES 1226 +▁CLASSROOM 1227 +▁CLEAN 1228 +▁CLEAR 1229 +▁CLEARLY 1230 +▁CLEVER 1231 +▁CLICK 1232 +▁CLIENT 1233 +▁CLIMATE 1234 +▁CLIMB 1235 +▁CLINIC 1236 +▁CLINICAL 1237 +▁CLIP 1238 +▁CLO 1239 +▁CLOCK 1240 +▁CLOSE 1241 +▁CLOSED 1242 +▁CLOSER 1243 +▁CLOSING 1244 +▁CLOTHES 1245 +▁CLOTHING 1246 +▁CLOUD 1247 +▁CLUB 1248 +▁CLUE 1249 +▁CLUSTER 1250 +▁CO 1251 +▁COACH 1252 +▁COAL 1253 +▁COAST 1254 +▁COCAINE 1255 +▁COCOA 1256 +▁CODE 1257 +▁COFFEE 1258 +▁COGNITIVE 1259 +▁COIN 1260 +▁COLD 1261 +▁COLLABORATE 1262 +▁COLLABORATION 1263 +▁COLLABORATIVE 1264 +▁COLLAPSE 1265 +▁COLLEAGUE 1266 +▁COLLEAGUES 1267 +▁COLLECT 1268 +▁COLLECTION 1269 +▁COLLECTIVE 1270 +▁COLLEGE 1271 +▁COLLIDE 1272 +▁COLLISION 1273 +▁COLOR 1274 +▁COLORADO 1275 +▁COLUMN 1276 +▁COM 1277 +▁COMBAT 1278 +▁COMBINATION 1279 +▁COMBINE 1280 +▁COMBINED 1281 +▁COME 1282 +▁COMES 1283 +▁COMFORT 1284 +▁COMFORTABLE 1285 +▁COMIC 1286 +▁COMING 1287 +▁COMMAND 1288 +▁COMMENT 1289 +▁COMMERCIAL 1290 +▁COMMISSION 1291 +▁COMMIT 1292 +▁COMMITMENT 1293 +▁COMMITTED 1294 +▁COMMITTEE 1295 +▁COMMON 1296 +▁COMMUNI 1297 +▁COMMUNICATE 1298 +▁COMMUNICATING 1299 +▁COMMUNICATION 1300 +▁COMMUNITIES 1301 +▁COMMUNITY 1302 +▁COMP 1303 +▁COMPANIES 1304 +▁COMPANY 1305 +▁COMPARE 1306 +▁COMPARED 1307 +▁COMPARISON 1308 +▁COMPASSION 1309 +▁COMPELLING 1310 +▁COMPETE 1311 +▁COMPETING 1312 +▁COMPETITION 1313 +▁COMPETITIVE 1314 +▁COMPLAIN 1315 +▁COMPLEMENT 1316 +▁COMPLETE 1317 +▁COMPLETELY 1318 +▁COMPLEX 1319 +▁COMPLEXITY 1320 +▁COMPLICATED 1321 +▁COMPLIMENT 1322 +▁COMPONENT 1323 +▁COMPOSER 1324 +▁COMPOST 1325 +▁COMPOUND 1326 +▁COMPREHENSI 1327 +▁COMPROMISE 1328 +▁COMPUTATION 1329 +▁COMPUTER 1330 +▁COMPUTERS 1331 +▁COMPUTING 1332 +▁CON 1333 +▁CONCENTRATE 1334 +▁CONCENTRATION 1335 +▁CONCEPT 1336 +▁CONCERN 1337 +▁CONCERNED 1338 +▁CONCERT 1339 +▁CONCLUDE 1340 +▁CONCLUSION 1341 +▁CONCRETE 1342 +▁CONDITION 1343 +▁CONDITIONS 1344 +▁CONDUCT 1345 +▁CONFERENCE 1346 +▁CONFIDENCE 1347 +▁CONFIDENT 1348 +▁CONFINE 1349 +▁CONFIRM 1350 +▁CONFLICT 1351 +▁CONFORM 1352 +▁CONFRONT 1353 +▁CONFUSED 1354 +▁CONFUSING 1355 +▁CONFUSION 1356 +▁CONGRESS 1357 +▁CONNECT 1358 +▁CONNECTED 1359 +▁CONNECTION 1360 +▁CONNECTIONS 1361 +▁CONQUER 1362 +▁CONSCIOUS 1363 +▁CONSCIOUSNESS 1364 +▁CONSENT 1365 +▁CONSEQUENCE 1366 +▁CONSEQUENCES 1367 +▁CONSERVATION 1368 +▁CONSERVATIVE 1369 +▁CONSIDER 1370 +▁CONSIDERED 1371 +▁CONSIST 1372 +▁CONSISTENT 1373 +▁CONSTANT 1374 +▁CONSTANTLY 1375 +▁CONSTITUTION 1376 +▁CONSTRAIN 1377 +▁CONSTRUCT 1378 +▁CONSTRUCTION 1379 +▁CONSULT 1380 +▁CONSUME 1381 +▁CONSUMER 1382 +▁CONSUMING 1383 +▁CONSUMPTION 1384 +▁CONTACT 1385 +▁CONTAIN 1386 +▁CONTEMPORARY 1387 +▁CONTENT 1388 +▁CONTEST 1389 +▁CONTEXT 1390 +▁CONTINENT 1391 +▁CONTINU 1392 +▁CONTINUE 1393 +▁CONTINUED 1394 +▁CONTRACT 1395 +▁CONTRADICT 1396 +▁CONTRARY 1397 +▁CONTRAST 1398 +▁CONTRIBUTE 1399 +▁CONTRIBUTING 1400 +▁CONTRIBUTION 1401 +▁CONTROL 1402 +▁CONTROLLED 1403 +▁CONTROVERSIAL 1404 +▁CONVENTION 1405 +▁CONVENTIONAL 1406 +▁CONVERSATION 1407 +▁CONVERSATIONS 1408 +▁CONVERT 1409 +▁CONVICT 1410 +▁CONVINCE 1411 +▁CONVINCED 1412 +▁CONVINCING 1413 +▁COOK 1414 +▁COOL 1415 +▁COOPERATION 1416 +▁COORDINATE 1417 +▁COP 1418 +▁COPE 1419 +▁COPY 1420 +▁COR 1421 +▁CORAL 1422 +▁CORE 1423 +▁CORN 1424 +▁CORNER 1425 +▁CORPORATE 1426 +▁CORPORATION 1427 +▁CORPS 1428 +▁CORRECT 1429 +▁CORRELATE 1430 +▁CORRESPOND 1431 +▁CORRUPTION 1432 +▁CORTEX 1433 +▁COSMIC 1434 +▁COST 1435 +▁COSTS 1436 +▁COSTUME 1437 +▁COUCH 1438 +▁COULD 1439 +▁COULDN 1440 +▁COUNCIL 1441 +▁COUNSEL 1442 +▁COUNT 1443 +▁COUNTER 1444 +▁COUNTLESS 1445 +▁COUNTRIES 1446 +▁COUNTRY 1447 +▁COUPLE 1448 +▁COURAGE 1449 +▁COURSE 1450 +▁COURT 1451 +▁COUSIN 1452 +▁COVER 1453 +▁COW 1454 +▁CR 1455 +▁CRACK 1456 +▁CRAFT 1457 +▁CRASH 1458 +▁CRAWL 1459 +▁CRAZY 1460 +▁CREAM 1461 +▁CREATE 1462 +▁CREATED 1463 +▁CREATING 1464 +▁CREATION 1465 +▁CREATIVE 1466 +▁CREATIVITY 1467 +▁CREATOR 1468 +▁CREATURE 1469 +▁CREDIT 1470 +▁CREW 1471 +▁CRIED 1472 +▁CRIME 1473 +▁CRIMINAL 1474 +▁CRISIS 1475 +▁CRITERIA 1476 +▁CRITIC 1477 +▁CRITICAL 1478 +▁CROP 1479 +▁CROSS 1480 +▁CROWD 1481 +▁CRU 1482 +▁CRUCIAL 1483 +▁CRUSH 1484 +▁CRY 1485 +▁CRYING 1486 +▁CRYSTAL 1487 +▁CU 1488 +▁CULTIVAT 1489 +▁CULTURAL 1490 +▁CULTURE 1491 +▁CUP 1492 +▁CURE 1493 +▁CURIOSITY 1494 +▁CURIOUS 1495 +▁CURRENCY 1496 +▁CURRENT 1497 +▁CURRENTLY 1498 +▁CURRICULUM 1499 +▁CURVE 1500 +▁CUSTODY 1501 +▁CUSTOM 1502 +▁CUT 1503 +▁CUTTING 1504 +▁CYBER 1505 +▁CYCLE 1506 +▁D 1507 +▁DA 1508 +▁DAD 1509 +▁DAILY 1510 +▁DAIRY 1511 +▁DAMAGE 1512 +▁DAMAGING 1513 +▁DAMN 1514 +▁DAN 1515 +▁DANCE 1516 +▁DANCING 1517 +▁DANGER 1518 +▁DANGEROUS 1519 +▁DANIEL 1520 +▁DAR 1521 +▁DARK 1522 +▁DARKNESS 1523 +▁DATA 1524 +▁DATABASE 1525 +▁DATE 1526 +▁DATING 1527 +▁DAUGHTER 1528 +▁DAVID 1529 +▁DAY 1530 +▁DAYS 1531 +▁DE 1532 +▁DEAD 1533 +▁DEAF 1534 +▁DEAL 1535 +▁DEALING 1536 +▁DEAR 1537 +▁DEATH 1538 +▁DEBATE 1539 +▁DEBT 1540 +▁DECADE 1541 +▁DECADES 1542 +▁DECEMBER 1543 +▁DECID 1544 +▁DECIDE 1545 +▁DECIDED 1546 +▁DECISION 1547 +▁DECISIONS 1548 +▁DECLARED 1549 +▁DECLINE 1550 +▁DECREASE 1551 +▁DEDICATED 1552 +▁DEEP 1553 +▁DEEPER 1554 +▁DEEPLY 1555 +▁DEF 1556 +▁DEFAULT 1557 +▁DEFEAT 1558 +▁DEFEND 1559 +▁DEFENSE 1560 +▁DEFICIT 1561 +▁DEFINE 1562 +▁DEFINED 1563 +▁DEFINING 1564 +▁DEFINITELY 1565 +▁DEFINITION 1566 +▁DEGREE 1567 +▁DELAY 1568 +▁DELIBERATE 1569 +▁DELICIOUS 1570 +▁DELIVER 1571 +▁DEMAND 1572 +▁DEMO 1573 +▁DEMOCRACY 1574 +▁DEMOCRAT 1575 +▁DEMOCRATIC 1576 +▁DEMOGRAPHIC 1577 +▁DEMONSTRAT 1578 +▁DEMONSTRATE 1579 +▁DENIAL 1580 +▁DENSE 1581 +▁DENSITY 1582 +▁DENVER 1583 +▁DEPARTMENT 1584 +▁DEPEND 1585 +▁DEPENDENT 1586 +▁DEPICT 1587 +▁DEPLOY 1588 +▁DEPRESSED 1589 +▁DEPRESSION 1590 +▁DEPRIV 1591 +▁DEPTH 1592 +▁DERIVE 1593 +▁DESCRIBE 1594 +▁DESCRIBED 1595 +▁DESCRIBING 1596 +▁DESCRIPTION 1597 +▁DESERT 1598 +▁DESERVE 1599 +▁DESIGN 1600 +▁DESIGNED 1601 +▁DESIGNER 1602 +▁DESIRE 1603 +▁DESK 1604 +▁DESPAIR 1605 +▁DESPERATE 1606 +▁DESPITE 1607 +▁DESTINATION 1608 +▁DESTINY 1609 +▁DESTROY 1610 +▁DESTRUCTION 1611 +▁DESTRUCTIVE 1612 +▁DETAIL 1613 +▁DETECT 1614 +▁DETERMINATION 1615 +▁DETERMINE 1616 +▁DETERMINED 1617 +▁DETROIT 1618 +▁DEVASTATING 1619 +▁DEVELOP 1620 +▁DEVELOPED 1621 +▁DEVELOPING 1622 +▁DEVELOPMENT 1623 +▁DEVICE 1624 +▁DEVICES 1625 +▁DEVIL 1626 +▁DEVO 1627 +▁DI 1628 +▁DIABETES 1629 +▁DIAGNOSED 1630 +▁DIAGNOSIS 1631 +▁DIALOGUE 1632 +▁DICTATE 1633 +▁DICTATOR 1634 +▁DICTIONARY 1635 +▁DID 1636 +▁DIDN 1637 +▁DIE 1638 +▁DIED 1639 +▁DIET 1640 +▁DIFFER 1641 +▁DIFFERENCE 1642 +▁DIFFERENCES 1643 +▁DIFFERENT 1644 +▁DIFFERENTLY 1645 +▁DIFFICULT 1646 +▁DIG 1647 +▁DIGITAL 1648 +▁DIGNITY 1649 +▁DILEMMA 1650 +▁DIMENSION 1651 +▁DIMINISH 1652 +▁DINNER 1653 +▁DINOSAUR 1654 +▁DIOXIDE 1655 +▁DIPLOMA 1656 +▁DIRECT 1657 +▁DIRECTION 1658 +▁DIRECTLY 1659 +▁DIRECTOR 1660 +▁DIRT 1661 +▁DIRTY 1662 +▁DIS 1663 +▁DISABILITIES 1664 +▁DISABILITY 1665 +▁DISABLED 1666 +▁DISADVANTAGE 1667 +▁DISAGREE 1668 +▁DISAPPEAR 1669 +▁DISAPPOINT 1670 +▁DISASTER 1671 +▁DISCIPLINE 1672 +▁DISCOMFORT 1673 +▁DISCONNECT 1674 +▁DISCOURSE 1675 +▁DISCOVER 1676 +▁DISCOVERED 1677 +▁DISCOVERY 1678 +▁DISCRIMINAT 1679 +▁DISCRIMINATION 1680 +▁DISCUSS 1681 +▁DISCUSSION 1682 +▁DISEASE 1683 +▁DISEASES 1684 +▁DISGUST 1685 +▁DISH 1686 +▁DISMISS 1687 +▁DISNEY 1688 +▁DISORDER 1689 +▁DISPLAY 1690 +▁DISRUPT 1691 +▁DISTANCE 1692 +▁DISTANT 1693 +▁DISTINCT 1694 +▁DISTINGUISH 1695 +▁DISTORT 1696 +▁DISTRACT 1697 +▁DISTRACTION 1698 +▁DISTRIBUT 1699 +▁DISTRIBUTION 1700 +▁DISTRICT 1701 +▁DISTURB 1702 +▁DIVERSE 1703 +▁DIVERSITY 1704 +▁DIVIDE 1705 +▁DIVINE 1706 +▁DIVISION 1707 +▁DIVORCE 1708 +▁DNA 1709 +▁DO 1710 +▁DOCTOR 1711 +▁DOCTORS 1712 +▁DOCUMENT 1713 +▁DOES 1714 +▁DOESN 1715 +▁DOG 1716 +▁DOGS 1717 +▁DOING 1718 +▁DOLLAR 1719 +▁DOLLARS 1720 +▁DOLPHIN 1721 +▁DOMAIN 1722 +▁DOMESTIC 1723 +▁DOMINANT 1724 +▁DON 1725 +▁DONE 1726 +▁DOOR 1727 +▁DOPAMINE 1728 +▁DOUBLE 1729 +▁DOUBT 1730 +▁DOWN 1731 +▁DOZEN 1732 +▁DR 1733 +▁DRAG 1734 +▁DRAIN 1735 +▁DRAMA 1736 +▁DRAMATIC 1737 +▁DRAMATICALLY 1738 +▁DRAW 1739 +▁DRAWING 1740 +▁DRAWN 1741 +▁DREAD 1742 +▁DREAM 1743 +▁DREAMS 1744 +▁DRESS 1745 +▁DREW 1746 +▁DRIFT 1747 +▁DRINK 1748 +▁DRINKING 1749 +▁DRIVE 1750 +▁DRIVEN 1751 +▁DRIVER 1752 +▁DRIVING 1753 +▁DRONE 1754 +▁DROP 1755 +▁DROPPED 1756 +▁DROVE 1757 +▁DROWN 1758 +▁DRUG 1759 +▁DRUGS 1760 +▁DRUM 1761 +▁DRUNK 1762 +▁DRY 1763 +▁DU 1764 +▁DUE 1765 +▁DUMB 1766 +▁DUMP 1767 +▁DURING 1768 +▁DUST 1769 +▁DUTCH 1770 +▁DUTY 1771 +▁DYING 1772 +▁DYNAMIC 1773 +▁DYSFUNCTION 1774 +▁E 1775 +▁EACH 1776 +▁EAGER 1777 +▁EAR 1778 +▁EARLIER 1779 +▁EARLIEST 1780 +▁EARLY 1781 +▁EARN 1782 +▁EARTH 1783 +▁EARTHQUAKE 1784 +▁EASE 1785 +▁EASIE 1786 +▁EASIER 1787 +▁EASILY 1788 +▁EAST 1789 +▁EASTERN 1790 +▁EASY 1791 +▁EAT 1792 +▁EATING 1793 +▁EBOLA 1794 +▁ECHO 1795 +▁ECOLOGICAL 1796 +▁ECONOMIC 1797 +▁ECONOMIES 1798 +▁ECONOMIST 1799 +▁ECONOMY 1800 +▁ECOSYSTEM 1801 +▁EDGE 1802 +▁EDIT 1803 +▁EDITOR 1804 +▁EDUCATE 1805 +▁EDUCATED 1806 +▁EDUCATION 1807 +▁EDUCATIONAL 1808 +▁EDUCATOR 1809 +▁EFFECT 1810 +▁EFFECTIVE 1811 +▁EFFECTIVELY 1812 +▁EFFECTS 1813 +▁EFFICIENCY 1814 +▁EFFICIENT 1815 +▁EFFORT 1816 +▁EGG 1817 +▁EGO 1818 +▁EGYPT 1819 +▁EIGHT 1820 +▁EINSTEIN 1821 +▁EITHER 1822 +▁EL 1823 +▁ELDERLY 1824 +▁ELECTION 1825 +▁ELECTRIC 1826 +▁ELECTRICAL 1827 +▁ELECTRICITY 1828 +▁ELECTRO 1829 +▁ELECTRONIC 1830 +▁ELEGANT 1831 +▁ELEMENT 1832 +▁ELEMENTS 1833 +▁ELEPHANT 1834 +▁ELIMINAT 1835 +▁ELIMINATE 1836 +▁ELITE 1837 +▁ELSE 1838 +▁ELSEWHERE 1839 +▁EM 1840 +▁EMAIL 1841 +▁EMBARRASSED 1842 +▁EMBARRASSING 1843 +▁EMBEDDED 1844 +▁EMBODIE 1845 +▁EMBRAC 1846 +▁EMBRACE 1847 +▁EMBRYO 1848 +▁EMERGE 1849 +▁EMERGENC 1850 +▁EMERGENCY 1851 +▁EMERGING 1852 +▁EMISSIONS 1853 +▁EMOTION 1854 +▁EMOTIONAL 1855 +▁EMOTIONALLY 1856 +▁EMOTIONS 1857 +▁EMPATHY 1858 +▁EMPHASIZE 1859 +▁EMPIRE 1860 +▁EMPLOY 1861 +▁EMPLOYEE 1862 +▁EMPLOYEES 1863 +▁EMPLOYER 1864 +▁EMPLOYMENT 1865 +▁EMPOWER 1866 +▁EMPTY 1867 +▁EN 1868 +▁ENABLE 1869 +▁ENCOUNTER 1870 +▁ENCOURAGE 1871 +▁ENCOURAGING 1872 +▁END 1873 +▁ENDANGERED 1874 +▁ENDEAVOR 1875 +▁ENDED 1876 +▁ENDLESS 1877 +▁ENEMIES 1878 +▁ENEMY 1879 +▁ENERGY 1880 +▁ENFORCE 1881 +▁ENGAGE 1882 +▁ENGAGED 1883 +▁ENGAGEMENT 1884 +▁ENGAGING 1885 +▁ENGINE 1886 +▁ENGINEER 1887 +▁ENGINEERING 1888 +▁ENGLAND 1889 +▁ENGLISH 1890 +▁ENHANCE 1891 +▁ENJOY 1892 +▁ENLIGHTEN 1893 +▁ENORMOUS 1894 +▁ENOUGH 1895 +▁ENRICH 1896 +▁ENROLL 1897 +▁ENSURE 1898 +▁ENTER 1899 +▁ENTERPRISE 1900 +▁ENTERTAIN 1901 +▁ENTERTAINMENT 1902 +▁ENTIRE 1903 +▁ENTIRELY 1904 +▁ENTITLED 1905 +▁ENTR 1906 +▁ENTREPRENEUR 1907 +▁ENVIRONMENT 1908 +▁ENVIRONMENTAL 1909 +▁ENVISION 1910 +▁EPIDEMIC 1911 +▁EPISODE 1912 +▁EQU 1913 +▁EQUAL 1914 +▁EQUALITY 1915 +▁EQUALLY 1916 +▁EQUATION 1917 +▁EQUIPMENT 1918 +▁EQUIPPED 1919 +▁EQUIVALENT 1920 +▁ERA 1921 +▁ERIC 1922 +▁ERROR 1923 +▁ESCAPE 1924 +▁ESPECIALLY 1925 +▁ESSAY 1926 +▁ESSENCE 1927 +▁ESSENTIAL 1928 +▁ESSENTIALLY 1929 +▁ESTABLISH 1930 +▁ESTEEM 1931 +▁ESTIMATE 1932 +▁ETC 1933 +▁ETHICAL 1934 +▁ETHNIC 1935 +▁EU 1936 +▁EURO 1937 +▁EUROPE 1938 +▁EUROPEAN 1939 +▁EVALUATE 1940 +▁EVEN 1941 +▁EVENT 1942 +▁EVENTS 1943 +▁EVENTUALLY 1944 +▁EVER 1945 +▁EVERY 1946 +▁EVERYBODY 1947 +▁EVERYDAY 1948 +▁EVERYONE 1949 +▁EVERYTHING 1950 +▁EVERYWHERE 1951 +▁EVIDENCE 1952 +▁EVIL 1953 +▁EVOLUTION 1954 +▁EVOLUTIONARY 1955 +▁EVOLVE 1956 +▁EVOLVED 1957 +▁EVOLVING 1958 +▁EX 1959 +▁EXACT 1960 +▁EXACTLY 1961 +▁EXAGGERAT 1962 +▁EXAM 1963 +▁EXAMINE 1964 +▁EXAMPLE 1965 +▁EXAMPLES 1966 +▁EXCEL 1967 +▁EXCELLENT 1968 +▁EXCEPT 1969 +▁EXCEPTION 1970 +▁EXCESS 1971 +▁EXCHANGE 1972 +▁EXCITED 1973 +▁EXCITEMENT 1974 +▁EXCITING 1975 +▁EXCLUSIVE 1976 +▁EXCUSE 1977 +▁EXECUTE 1978 +▁EXECUTIVE 1979 +▁EXERCISE 1980 +▁EXHAUSTED 1981 +▁EXHIBIT 1982 +▁EXIST 1983 +▁EXISTENCE 1984 +▁EXISTING 1985 +▁EXPAND 1986 +▁EXPECT 1987 +▁EXPECTANCY 1988 +▁EXPECTATIONS 1989 +▁EXPECTED 1990 +▁EXPENSE 1991 +▁EXPENSIVE 1992 +▁EXPERIENCE 1993 +▁EXPERIENCED 1994 +▁EXPERIENCES 1995 +▁EXPERIENCING 1996 +▁EXPERIMENT 1997 +▁EXPERIMENTS 1998 +▁EXPERT 1999 +▁EXPERTISE 2000 +▁EXPERTS 2001 +▁EXPLAIN 2002 +▁EXPLAINED 2003 +▁EXPLANATION 2004 +▁EXPLICIT 2005 +▁EXPLODE 2006 +▁EXPLOIT 2007 +▁EXPLORATION 2008 +▁EXPLORE 2009 +▁EXPLORING 2010 +▁EXPLOSION 2011 +▁EXPONENTIAL 2012 +▁EXPOSED 2013 +▁EXPOSURE 2014 +▁EXPRESS 2015 +▁EXPRESSION 2016 +▁EXTEND 2017 +▁EXTENSION 2018 +▁EXTENSIVE 2019 +▁EXTENT 2020 +▁EXTERNAL 2021 +▁EXTINCT 2022 +▁EXTINCTION 2023 +▁EXTRA 2024 +▁EXTRACT 2025 +▁EXTRAORDINARY 2026 +▁EXTREME 2027 +▁EXTREMELY 2028 +▁EXTREMIST 2029 +▁EXTROVERT 2030 +▁EYE 2031 +▁EYES 2032 +▁F 2033 +▁FA 2034 +▁FABRIC 2035 +▁FABULOUS 2036 +▁FACE 2037 +▁FACEBOOK 2038 +▁FACED 2039 +▁FACIAL 2040 +▁FACILITIES 2041 +▁FACILITY 2042 +▁FACING 2043 +▁FACT 2044 +▁FACTOR 2045 +▁FACTORS 2046 +▁FACTORY 2047 +▁FACULTY 2048 +▁FAIL 2049 +▁FAILED 2050 +▁FAILING 2051 +▁FAILURE 2052 +▁FAIR 2053 +▁FAIRLY 2054 +▁FAITH 2055 +▁FAKE 2056 +▁FALL 2057 +▁FALLING 2058 +▁FALSE 2059 +▁FAMILIAR 2060 +▁FAMILIES 2061 +▁FAMILY 2062 +▁FAMOUS 2063 +▁FAN 2064 +▁FANCY 2065 +▁FANTASTIC 2066 +▁FANTASY 2067 +▁FAR 2068 +▁FARM 2069 +▁FARMER 2070 +▁FARMERS 2071 +▁FASCINATED 2072 +▁FASCINATING 2073 +▁FASHION 2074 +▁FAST 2075 +▁FASTER 2076 +▁FAT 2077 +▁FATHER 2078 +▁FAULT 2079 +▁FAVOR 2080 +▁FAVORITE 2081 +▁FE 2082 +▁FEAR 2083 +▁FEATURE 2084 +▁FEBRUARY 2085 +▁FEDERAL 2086 +▁FEED 2087 +▁FEEDBACK 2088 +▁FEEL 2089 +▁FEELING 2090 +▁FEELINGS 2091 +▁FEELS 2092 +▁FEET 2093 +▁FELL 2094 +▁FELLOW 2095 +▁FELT 2096 +▁FEMALE 2097 +▁FEMININE 2098 +▁FEMINISM 2099 +▁FEMINIST 2100 +▁FERTIL 2101 +▁FERTILIZER 2102 +▁FESTIVAL 2103 +▁FEW 2104 +▁FEWER 2105 +▁FI 2106 +▁FICTION 2107 +▁FIELD 2108 +▁FIFTH 2109 +▁FIGHT 2110 +▁FIGHTING 2111 +▁FIGURE 2112 +▁FIGURED 2113 +▁FIGURING 2114 +▁FILE 2115 +▁FILL 2116 +▁FILLED 2117 +▁FILM 2118 +▁FILMMAKER 2119 +▁FILTER 2120 +▁FINAL 2121 +▁FINALLY 2122 +▁FINANCE 2123 +▁FINANCIAL 2124 +▁FIND 2125 +▁FINDING 2126 +▁FINE 2127 +▁FINGER 2128 +▁FINISH 2129 +▁FINISHED 2130 +▁FINLAND 2131 +▁FIRE 2132 +▁FIRM 2133 +▁FIRST 2134 +▁FISH 2135 +▁FISHERIES 2136 +▁FIT 2137 +▁FIVE 2138 +▁FIX 2139 +▁FLAG 2140 +▁FLAME 2141 +▁FLASH 2142 +▁FLAT 2143 +▁FLAVOR 2144 +▁FLAW 2145 +▁FLEE 2146 +▁FLESH 2147 +▁FLEW 2148 +▁FLEXIBILITY 2149 +▁FLEXIBLE 2150 +▁FLIGHT 2151 +▁FLIP 2152 +▁FLOOD 2153 +▁FLOOR 2154 +▁FLORIDA 2155 +▁FLOURISH 2156 +▁FLOW 2157 +▁FLOWER 2158 +▁FLU 2159 +▁FLUID 2160 +▁FLY 2161 +▁FLYING 2162 +▁FO 2163 +▁FOCUS 2164 +▁FOCUSED 2165 +▁FOLD 2166 +▁FOLKS 2167 +▁FOLLOW 2168 +▁FOLLOWED 2169 +▁FOLLOWING 2170 +▁FOOD 2171 +▁FOOL 2172 +▁FOOT 2173 +▁FOOTBALL 2174 +▁FOOTPRINT 2175 +▁FOR 2176 +▁FORCE 2177 +▁FORCED 2178 +▁FORCES 2179 +▁FOREIGN 2180 +▁FOREST 2181 +▁FOREVER 2182 +▁FORGET 2183 +▁FORGIVE 2184 +▁FORGIVENESS 2185 +▁FORGOT 2186 +▁FORGOTTEN 2187 +▁FORM 2188 +▁FORMAL 2189 +▁FORMER 2190 +▁FORMS 2191 +▁FORMULA 2192 +▁FORTH 2193 +▁FORTUNATE 2194 +▁FORTUNATELY 2195 +▁FORTUNE 2196 +▁FORWARD 2197 +▁FOSSIL 2198 +▁FOSTER 2199 +▁FOUND 2200 +▁FOUNDATION 2201 +▁FOUR 2202 +▁FOURTH 2203 +▁FR 2204 +▁FRA 2205 +▁FRACTION 2206 +▁FRAGILE 2207 +▁FRAGMENT 2208 +▁FRAME 2209 +▁FRAMEWORK 2210 +▁FRANCE 2211 +▁FRANCISCO 2212 +▁FRANK 2213 +▁FRANKLY 2214 +▁FREAK 2215 +▁FREE 2216 +▁FREEDOM 2217 +▁FRENCH 2218 +▁FREQUENC 2219 +▁FREQUENT 2220 +▁FRESH 2221 +▁FRIDAY 2222 +▁FRIEND 2223 +▁FRIENDS 2224 +▁FRIENDSHIP 2225 +▁FROM 2226 +▁FRONT 2227 +▁FROZEN 2228 +▁FRUIT 2229 +▁FRUSTRATED 2230 +▁FRUSTRATING 2231 +▁FRUSTRATION 2232 +▁FUEL 2233 +▁FULFILL 2234 +▁FULL 2235 +▁FULLY 2236 +▁FUN 2237 +▁FUNCTION 2238 +▁FUNCTIONAL 2239 +▁FUND 2240 +▁FUNDAMENTAL 2241 +▁FUNDAMENTALLY 2242 +▁FUNDING 2243 +▁FUNERAL 2244 +▁FUNNY 2245 +▁FURNITURE 2246 +▁FURTHER 2247 +▁FUSION 2248 +▁FUTURE 2249 +▁G 2250 +▁GA 2251 +▁GAIN 2252 +▁GALAXIES 2253 +▁GALAXY 2254 +▁GALLON 2255 +▁GAME 2256 +▁GAMES 2257 +▁GANDHI 2258 +▁GAP 2259 +▁GARAGE 2260 +▁GARBAGE 2261 +▁GARDEN 2262 +▁GAS 2263 +▁GATE 2264 +▁GATHER 2265 +▁GAVE 2266 +▁GAY 2267 +▁GDP 2268 +▁GE 2269 +▁GEEK 2270 +▁GEN 2271 +▁GENDER 2272 +▁GENE 2273 +▁GENERAL 2274 +▁GENERALLY 2275 +▁GENERATE 2276 +▁GENERATING 2277 +▁GENERATION 2278 +▁GENERATIONS 2279 +▁GENEROSITY 2280 +▁GENEROUS 2281 +▁GENES 2282 +▁GENETIC 2283 +▁GENITAL 2284 +▁GENIUS 2285 +▁GENOCIDE 2286 +▁GENOME 2287 +▁GENTLE 2288 +▁GENTLEMEN 2289 +▁GENUINE 2290 +▁GEOGRAPHIC 2291 +▁GEORGE 2292 +▁GEORGIA 2293 +▁GERMAN 2294 +▁GERMANY 2295 +▁GESTURE 2296 +▁GET 2297 +▁GETS 2298 +▁GETTING 2299 +▁GI 2300 +▁GIANT 2301 +▁GIFT 2302 +▁GIRL 2303 +▁GIRLFRIEND 2304 +▁GIRLS 2305 +▁GIVE 2306 +▁GIVEN 2307 +▁GIVES 2308 +▁GIVING 2309 +▁GLAD 2310 +▁GLASS 2311 +▁GLIMPSE 2312 +▁GLOBAL 2313 +▁GLOBE 2314 +▁GLOW 2315 +▁GLUCOSE 2316 +▁GO 2317 +▁GOAL 2318 +▁GOALS 2319 +▁GOD 2320 +▁GOES 2321 +▁GOING 2322 +▁GOLD 2323 +▁GOLDEN 2324 +▁GOLF 2325 +▁GONE 2326 +▁GONNA 2327 +▁GOOD 2328 +▁GOODBYE 2329 +▁GOOGLE 2330 +▁GOT 2331 +▁GOTTEN 2332 +▁GOVERN 2333 +▁GOVERNMENT 2334 +▁GOVERNMENTS 2335 +▁GR 2336 +▁GRAB 2337 +▁GRABBED 2338 +▁GRACE 2339 +▁GRADE 2340 +▁GRADUALLY 2341 +▁GRADUATE 2342 +▁GRADUATION 2343 +▁GRAIN 2344 +▁GRAND 2345 +▁GRANDCHILDREN 2346 +▁GRANDFATHER 2347 +▁GRANDMA 2348 +▁GRANDMOTHER 2349 +▁GRANDPARENTS 2350 +▁GRANTED 2351 +▁GRAPH 2352 +▁GRASP 2353 +▁GRASS 2354 +▁GRATEFUL 2355 +▁GRATITUDE 2356 +▁GRAVE 2357 +▁GRAVITATIONAL 2358 +▁GRAVITY 2359 +▁GREAT 2360 +▁GREATER 2361 +▁GREATEST 2362 +▁GREECE 2363 +▁GREEK 2364 +▁GREEN 2365 +▁GREENHOUSE 2366 +▁GREET 2367 +▁GREW 2368 +▁GRID 2369 +▁GRIEF 2370 +▁GRIEV 2371 +▁GROCERY 2372 +▁GROSS 2373 +▁GROUND 2374 +▁GROUP 2375 +▁GROUPS 2376 +▁GROW 2377 +▁GROWING 2378 +▁GROWN 2379 +▁GROWTH 2380 +▁GU 2381 +▁GUARANTEE 2382 +▁GUARD 2383 +▁GUESS 2384 +▁GUIDANCE 2385 +▁GUIDE 2386 +▁GUILT 2387 +▁GUILTY 2388 +▁GUITAR 2389 +▁GULF 2390 +▁GUN 2391 +▁GUT 2392 +▁GUY 2393 +▁GUYS 2394 +▁GYM 2395 +▁H 2396 +▁HA 2397 +▁HABIT 2398 +▁HABITAT 2399 +▁HACK 2400 +▁HAD 2401 +▁HADN 2402 +▁HAIR 2403 +▁HAITI 2404 +▁HALF 2405 +▁HALL 2406 +▁HALLUCINAT 2407 +▁HAND 2408 +▁HANDLE 2409 +▁HANDS 2410 +▁HANG 2411 +▁HAPPEN 2412 +▁HAPPENED 2413 +▁HAPPENING 2414 +▁HAPPENS 2415 +▁HAPPIER 2416 +▁HAPPILY 2417 +▁HAPPINESS 2418 +▁HAPPY 2419 +▁HAR 2420 +▁HARD 2421 +▁HARDER 2422 +▁HARDWARE 2423 +▁HARM 2424 +▁HARMONY 2425 +▁HARNESS 2426 +▁HARSH 2427 +▁HARVARD 2428 +▁HARVEST 2429 +▁HAS 2430 +▁HASN 2431 +▁HAT 2432 +▁HATE 2433 +▁HATRED 2434 +▁HAVE 2435 +▁HAVEN 2436 +▁HAVING 2437 +▁HAWAII 2438 +▁HE 2439 +▁HEAD 2440 +▁HEALTH 2441 +▁HEALTHCARE 2442 +▁HEALTHIER 2443 +▁HEALTHY 2444 +▁HEAR 2445 +▁HEARD 2446 +▁HEARING 2447 +▁HEART 2448 +▁HEARTBREAK 2449 +▁HEAT 2450 +▁HEAVEN 2451 +▁HEAVILY 2452 +▁HEAVY 2453 +▁HELD 2454 +▁HELLO 2455 +▁HELP 2456 +▁HELPED 2457 +▁HELPFUL 2458 +▁HELPING 2459 +▁HER 2460 +▁HERE 2461 +▁HERITAGE 2462 +▁HERO 2463 +▁HEROES 2464 +▁HERSELF 2465 +▁HETEROSEXUAL 2466 +▁HEY 2467 +▁HI 2468 +▁HIDDEN 2469 +▁HIDE 2470 +▁HIDING 2471 +▁HIERARCHY 2472 +▁HIGGS 2473 +▁HIGH 2474 +▁HIGHER 2475 +▁HIGHEST 2476 +▁HIGHLIGHT 2477 +▁HIGHLY 2478 +▁HILL 2479 +▁HIM 2480 +▁HIMSELF 2481 +▁HIP 2482 +▁HIRE 2483 +▁HIS 2484 +▁HISTORIAN 2485 +▁HISTORIC 2486 +▁HISTORICAL 2487 +▁HISTORY 2488 +▁HIT 2489 +▁HIV 2490 +▁HO 2491 +▁HOL 2492 +▁HOLD 2493 +▁HOLDING 2494 +▁HOLE 2495 +▁HOLIDAY 2496 +▁HOLLYWOOD 2497 +▁HOME 2498 +▁HOMELESS 2499 +▁HOMEWORK 2500 +▁HOMO 2501 +▁HOMOSEXUAL 2502 +▁HONEST 2503 +▁HONESTLY 2504 +▁HONEY 2505 +▁HONOR 2506 +▁HOOK 2507 +▁HOP 2508 +▁HOPE 2509 +▁HOPEFULLY 2510 +▁HORIZON 2511 +▁HORR 2512 +▁HORRIBLE 2513 +▁HORRIFIC 2514 +▁HORSE 2515 +▁HOSPITAL 2516 +▁HOST 2517 +▁HOT 2518 +▁HOTEL 2519 +▁HOUR 2520 +▁HOURS 2521 +▁HOUSE 2522 +▁HOUSEHOLD 2523 +▁HOUSING 2524 +▁HOW 2525 +▁HOWEVER 2526 +▁HU 2527 +▁HUG 2528 +▁HUGE 2529 +▁HUM 2530 +▁HUMAN 2531 +▁HUMANITARIAN 2532 +▁HUMANITY 2533 +▁HUMANS 2534 +▁HUMILITY 2535 +▁HUMOR 2536 +▁HUNDRED 2537 +▁HUNDREDS 2538 +▁HUNGER 2539 +▁HUNGRY 2540 +▁HUNT 2541 +▁HURT 2542 +▁HUSBAND 2543 +▁HYDRO 2544 +▁HYDROGEN 2545 +▁HYMEN 2546 +▁HYPER 2547 +▁HYPOTHESIS 2548 +▁HYPOTHETICAL 2549 +▁I 2550 +▁ICE 2551 +▁IDEA 2552 +▁IDEAL 2553 +▁IDEAS 2554 +▁IDENTICAL 2555 +▁IDENTIFIED 2556 +▁IDENTIFY 2557 +▁IDENTITIES 2558 +▁IDENTITY 2559 +▁IDEOLOGY 2560 +▁IF 2561 +▁IGNORANCE 2562 +▁IGNORE 2563 +▁ILLEGAL 2564 +▁ILLNESS 2565 +▁ILLUSION 2566 +▁ILLUSTRATE 2567 +▁IMAGE 2568 +▁IMAGES 2569 +▁IMAGINATION 2570 +▁IMAGINE 2571 +▁IMAGING 2572 +▁IMAGINING 2573 +▁IMMEDIATE 2574 +▁IMMEDIATELY 2575 +▁IMMENSE 2576 +▁IMMIGRANT 2577 +▁IMMIGRATION 2578 +▁IMMUNE 2579 +▁IMPACT 2580 +▁IMPAIRED 2581 +▁IMPERFECT 2582 +▁IMPLANT 2583 +▁IMPLEMENT 2584 +▁IMPLICATIONS 2585 +▁IMPLIE 2586 +▁IMPORTANCE 2587 +▁IMPORTANT 2588 +▁IMPORTANTLY 2589 +▁IMPOSE 2590 +▁IMPOSSIBLE 2591 +▁IMPRESS 2592 +▁IMPRESSION 2593 +▁IMPROV 2594 +▁IMPROVE 2595 +▁IMPROVEMENT 2596 +▁IMPULSE 2597 +▁IN 2598 +▁INCARCERATED 2599 +▁INCARCERATION 2600 +▁INCENTIVE 2601 +▁INCIDENT 2602 +▁INCLUDE 2603 +▁INCLUDING 2604 +▁INCLUSION 2605 +▁INCLUSIVE 2606 +▁INCOME 2607 +▁INCREASE 2608 +▁INCREASED 2609 +▁INCREASING 2610 +▁INCREASINGLY 2611 +▁INCREDIBLE 2612 +▁INCREDIBLY 2613 +▁INDEED 2614 +▁INDEPENDENCE 2615 +▁INDEPENDENT 2616 +▁INDIA 2617 +▁INDIAN 2618 +▁INDICATE 2619 +▁INDICATOR 2620 +▁INDIGENOUS 2621 +▁INDIVIDUAL 2622 +▁INDIVIDUALS 2623 +▁INDUCE 2624 +▁INDUSTRIAL 2625 +▁INDUSTRIES 2626 +▁INDUSTRY 2627 +▁INEQUALITY 2628 +▁INEVITAB 2629 +▁INFANT 2630 +▁INFECTION 2631 +▁INFECTIOUS 2632 +▁INFINITE 2633 +▁INFLAT 2634 +▁INFLUENCE 2635 +▁INFORMATION 2636 +▁INFORMED 2637 +▁INFRASTRUCTURE 2638 +▁INGREDIENT 2639 +▁INHABIT 2640 +▁INHERIT 2641 +▁INHIBIT 2642 +▁INITIAL 2643 +▁INITIATIVE 2644 +▁INJECT 2645 +▁INJURED 2646 +▁INJURIES 2647 +▁INJURY 2648 +▁INJUSTICE 2649 +▁INNER 2650 +▁INNOCENT 2651 +▁INNOVATE 2652 +▁INNOVATION 2653 +▁INNOVATIVE 2654 +▁INNOVATOR 2655 +▁INPUT 2656 +▁INSANE 2657 +▁INSECTS 2658 +▁INSIDE 2659 +▁INSIGHT 2660 +▁INSPIRATION 2661 +▁INSPIRE 2662 +▁INSPIRED 2663 +▁INSPIRING 2664 +▁INSTAGRAM 2665 +▁INSTALL 2666 +▁INSTANCE 2667 +▁INSTANT 2668 +▁INSTEAD 2669 +▁INSTINCT 2670 +▁INSTITUTE 2671 +▁INSTITUTION 2672 +▁INSTITUTIONS 2673 +▁INSTRUCTION 2674 +▁INSTRUMENT 2675 +▁INSULIN 2676 +▁INSURANCE 2677 +▁INTEGRATE 2678 +▁INTEGRATION 2679 +▁INTEGRITY 2680 +▁INTELLECTUAL 2681 +▁INTELLIGENCE 2682 +▁INTELLIGENT 2683 +▁INTENSE 2684 +▁INTENSIVE 2685 +▁INTENTION 2686 +▁INTER 2687 +▁INTERACT 2688 +▁INTERACTION 2689 +▁INTERCONNECT 2690 +▁INTEREST 2691 +▁INTERESTED 2692 +▁INTERESTING 2693 +▁INTERFACE 2694 +▁INTERFERE 2695 +▁INTERNAL 2696 +▁INTERNATIONAL 2697 +▁INTERNET 2698 +▁INTERNSHIP 2699 +▁INTERPRET 2700 +▁INTERRUPT 2701 +▁INTERSECTION 2702 +▁INTERVENTION 2703 +▁INTERVIEW 2704 +▁INTIMACY 2705 +▁INTIMATE 2706 +▁INTO 2707 +▁INTRIGU 2708 +▁INTRODUCE 2709 +▁INTRODUCED 2710 +▁INTRODUCING 2711 +▁INTRODUCTION 2712 +▁INTROVERT 2713 +▁INTUITION 2714 +▁INTUITIVE 2715 +▁INVENT 2716 +▁INVENTED 2717 +▁INVENTION 2718 +▁INVEST 2719 +▁INVESTIGAT 2720 +▁INVESTIGATION 2721 +▁INVESTMENT 2722 +▁INVISIBLE 2723 +▁INVITATION 2724 +▁INVITE 2725 +▁INVITED 2726 +▁INVOLVE 2727 +▁INVOLVED 2728 +▁IPHONE 2729 +▁IRAQ 2730 +▁IRON 2731 +▁IRRATIONAL 2732 +▁IS 2733 +▁ISLAM 2734 +▁ISLAMIC 2735 +▁ISLAND 2736 +▁ISN 2737 +▁ISOLATED 2738 +▁ISOLATION 2739 +▁ISRAEL 2740 +▁ISSUE 2741 +▁ISSUES 2742 +▁IT 2743 +▁ITEM 2744 +▁ITSELF 2745 +▁J 2746 +▁JACK 2747 +▁JAIL 2748 +▁JAMES 2749 +▁JANE 2750 +▁JANUARY 2751 +▁JAPAN 2752 +▁JAPANESE 2753 +▁JAZZ 2754 +▁JE 2755 +▁JEALOUS 2756 +▁JENN 2757 +▁JERSEY 2758 +▁JEWISH 2759 +▁JIHAD 2760 +▁JIM 2761 +▁JO 2762 +▁JOB 2763 +▁JOBS 2764 +▁JOHN 2765 +▁JOIN 2766 +▁JOINED 2767 +▁JOKE 2768 +▁JORDAN 2769 +▁JOURNAL 2770 +▁JOURNALIST 2771 +▁JOURNEY 2772 +▁JOY 2773 +▁JU 2774 +▁JUDGE 2775 +▁JUDGING 2776 +▁JUDGMENT 2777 +▁JUICE 2778 +▁JUMP 2779 +▁JUNIOR 2780 +▁JUNK 2781 +▁JUST 2782 +▁JUSTICE 2783 +▁K 2784 +▁KA 2785 +▁KAR 2786 +▁KE 2787 +▁KEEP 2788 +▁KEEPING 2789 +▁KENNEDY 2790 +▁KENYA 2791 +▁KEPT 2792 +▁KEVIN 2793 +▁KEY 2794 +▁KI 2795 +▁KICK 2796 +▁KID 2797 +▁KIDNEY 2798 +▁KIDS 2799 +▁KILL 2800 +▁KILLED 2801 +▁KILLING 2802 +▁KILOMETER 2803 +▁KIND 2804 +▁KINDERGARTEN 2805 +▁KINDNESS 2806 +▁KINDS 2807 +▁KING 2808 +▁KINGDOM 2809 +▁KITCHEN 2810 +▁KNEE 2811 +▁KNEW 2812 +▁KNIFE 2813 +▁KNOCK 2814 +▁KNOW 2815 +▁KNOWING 2816 +▁KNOWLEDGE 2817 +▁KNOWN 2818 +▁KO 2819 +▁KOREA 2820 +▁L 2821 +▁LA 2822 +▁LAB 2823 +▁LABEL 2824 +▁LABOR 2825 +▁LABORATORY 2826 +▁LACK 2827 +▁LADIES 2828 +▁LADY 2829 +▁LAKE 2830 +▁LAND 2831 +▁LANDSCAPE 2832 +▁LANGUAGE 2833 +▁LANGUAGES 2834 +▁LAPTOP 2835 +▁LARGE 2836 +▁LARGELY 2837 +▁LARGER 2838 +▁LARGEST 2839 +▁LAST 2840 +▁LATE 2841 +▁LATER 2842 +▁LATIN 2843 +▁LATVIA 2844 +▁LAUGH 2845 +▁LAUGHTER 2846 +▁LAUNCH 2847 +▁LAUNCHED 2848 +▁LAW 2849 +▁LAWS 2850 +▁LAWYER 2851 +▁LAY 2852 +▁LAYER 2853 +▁LAZY 2854 +▁LE 2855 +▁LEAD 2856 +▁LEADER 2857 +▁LEADERS 2858 +▁LEADERSHIP 2859 +▁LEADING 2860 +▁LEADS 2861 +▁LEAF 2862 +▁LEAGUE 2863 +▁LEAN 2864 +▁LEAP 2865 +▁LEARN 2866 +▁LEARNED 2867 +▁LEARNING 2868 +▁LEAST 2869 +▁LEAVE 2870 +▁LEAVING 2871 +▁LEBANON 2872 +▁LECTURE 2873 +▁LED 2874 +▁LEFT 2875 +▁LEG 2876 +▁LEGACY 2877 +▁LEGAL 2878 +▁LEGEND 2879 +▁LEGISLAT 2880 +▁LEGITIMATE 2881 +▁LEGS 2882 +▁LENGTH 2883 +▁LENS 2884 +▁LESBIAN 2885 +▁LESS 2886 +▁LESSON 2887 +▁LESSONS 2888 +▁LET 2889 +▁LETTER 2890 +▁LETTING 2891 +▁LEVEL 2892 +▁LEVELS 2893 +▁LEVERAGE 2894 +▁LG 2895 +▁LGBT 2896 +▁LI 2897 +▁LIBERAL 2898 +▁LIBERAT 2899 +▁LIBERTY 2900 +▁LIBRARIES 2901 +▁LIBRARY 2902 +▁LICENSE 2903 +▁LIE 2904 +▁LIES 2905 +▁LIFE 2906 +▁LIFESPAN 2907 +▁LIFESTYLE 2908 +▁LIFETIME 2909 +▁LIFT 2910 +▁LIGHT 2911 +▁LIKE 2912 +▁LIKELY 2913 +▁LIMB 2914 +▁LIMIT 2915 +▁LIMITATIONS 2916 +▁LIMITED 2917 +▁LINE 2918 +▁LINES 2919 +▁LINGUISTIC 2920 +▁LINK 2921 +▁LINKED 2922 +▁LION 2923 +▁LIQUID 2924 +▁LIST 2925 +▁LISTEN 2926 +▁LISTENED 2927 +▁LISTENING 2928 +▁LITERACY 2929 +▁LITERALLY 2930 +▁LITERATURE 2931 +▁LITTLE 2932 +▁LIVE 2933 +▁LIVED 2934 +▁LIVES 2935 +▁LIVING 2936 +▁LO 2937 +▁LOAD 2938 +▁LOAN 2939 +▁LOCAL 2940 +▁LOCATE 2941 +▁LOCATION 2942 +▁LOCK 2943 +▁LOG 2944 +▁LOGIC 2945 +▁LOGICAL 2946 +▁LONDON 2947 +▁LONELINESS 2948 +▁LONELY 2949 +▁LONG 2950 +▁LONGER 2951 +▁LONGEVITY 2952 +▁LOOK 2953 +▁LOOKED 2954 +▁LOOKING 2955 +▁LOOKS 2956 +▁LOOP 2957 +▁LOOSE 2958 +▁LOSE 2959 +▁LOSING 2960 +▁LOSS 2961 +▁LOST 2962 +▁LOT 2963 +▁LOTS 2964 +▁LOUD 2965 +▁LOUIS 2966 +▁LOVE 2967 +▁LOVED 2968 +▁LOVING 2969 +▁LOW 2970 +▁LOWER 2971 +▁LU 2972 +▁LUCK 2973 +▁LUCKILY 2974 +▁LUCKY 2975 +▁LUNCH 2976 +▁LUNG 2977 +▁LYING 2978 +▁M 2979 +▁MA 2980 +▁MACHINE 2981 +▁MACHINES 2982 +▁MAD 2983 +▁MADE 2984 +▁MAGAZINE 2985 +▁MAGIC 2986 +▁MAGICAL 2987 +▁MAGNET 2988 +▁MAGNITUDE 2989 +▁MAIL 2990 +▁MAIN 2991 +▁MAINSTREAM 2992 +▁MAINTAIN 2993 +▁MAJOR 2994 +▁MAJORITY 2995 +▁MAKE 2996 +▁MAKES 2997 +▁MAKING 2998 +▁MALARIA 2999 +▁MALE 3000 +▁MAMMAL 3001 +▁MAMMOTH 3002 +▁MAN 3003 +▁MANAGE 3004 +▁MANAGED 3005 +▁MANAGEMENT 3006 +▁MANAGER 3007 +▁MANAGING 3008 +▁MANHATTAN 3009 +▁MANIFEST 3010 +▁MANIPULAT 3011 +▁MANKIND 3012 +▁MANNER 3013 +▁MANUFACTURE 3014 +▁MANUFACTURING 3015 +▁MANY 3016 +▁MAP 3017 +▁MAR 3018 +▁MARATHON 3019 +▁MARCH 3020 +▁MARIJUANA 3021 +▁MARINE 3022 +▁MARK 3023 +▁MARKET 3024 +▁MARKETING 3025 +▁MARRIAGE 3026 +▁MARRIED 3027 +▁MARRY 3028 +▁MARS 3029 +▁MARTIN 3030 +▁MARY 3031 +▁MASCULINE 3032 +▁MASCULINITY 3033 +▁MASK 3034 +▁MASS 3035 +▁MASSIVE 3036 +▁MASTER 3037 +▁MATCH 3038 +▁MATERIAL 3039 +▁MATERIALS 3040 +▁MATH 3041 +▁MATHEMATICAL 3042 +▁MATHEMATICIAN 3043 +▁MATHEMATICS 3044 +▁MATTER 3045 +▁MATTERS 3046 +▁MAXIMIZ 3047 +▁MAXIMUM 3048 +▁MAY 3049 +▁MAYBE 3050 +▁MC 3051 +▁ME 3052 +▁MEAN 3053 +▁MEANING 3054 +▁MEANINGFUL 3055 +▁MEANS 3056 +▁MEANT 3057 +▁MEANWHILE 3058 +▁MEASURE 3059 +▁MEASURING 3060 +▁MEAT 3061 +▁MECHANICAL 3062 +▁MECHANISM 3063 +▁MEDIA 3064 +▁MEDICAL 3065 +▁MEDICATION 3066 +▁MEDICINE 3067 +▁MEDITATION 3068 +▁MEDIUM 3069 +▁MEET 3070 +▁MEETING 3071 +▁MELT 3072 +▁MEMBER 3073 +▁MEMBERS 3074 +▁MEMORIES 3075 +▁MEMORIZE 3076 +▁MEMORY 3077 +▁MEN 3078 +▁MENTAL 3079 +▁MENTION 3080 +▁MENTIONED 3081 +▁MENTOR 3082 +▁MERELY 3083 +▁MESS 3084 +▁MESSAGE 3085 +▁MESSAGES 3086 +▁MET 3087 +▁METABOLI 3088 +▁METAL 3089 +▁METAPHOR 3090 +▁METERS 3091 +▁METHOD 3092 +▁METROPOLI 3093 +▁MEXICO 3094 +▁MICE 3095 +▁MICHAEL 3096 +▁MICHEL 3097 +▁MICHIGAN 3098 +▁MICRO 3099 +▁MICROBES 3100 +▁MICROSCOPE 3101 +▁MICROSOFT 3102 +▁MID 3103 +▁MIDDLE 3104 +▁MIGHT 3105 +▁MIGRANT 3106 +▁MIGRATION 3107 +▁MIKE 3108 +▁MIL 3109 +▁MILE 3110 +▁MILES 3111 +▁MILITARY 3112 +▁MILK 3113 +▁MILL 3114 +▁MILLENNIA 3115 +▁MILLION 3116 +▁MILLIONS 3117 +▁MIN 3118 +▁MIND 3119 +▁MINDFULNESS 3120 +▁MINDS 3121 +▁MINDSET 3122 +▁MINE 3123 +▁MINI 3124 +▁MINIMUM 3125 +▁MINISTER 3126 +▁MINORITY 3127 +▁MINUTE 3128 +▁MINUTES 3129 +▁MIRACLE 3130 +▁MIRROR 3131 +▁MIS 3132 +▁MISERABLE 3133 +▁MISS 3134 +▁MISSING 3135 +▁MISSION 3136 +▁MISTAKE 3137 +▁MISTAKES 3138 +▁MIT 3139 +▁MIX 3140 +▁MO 3141 +▁MOBILE 3142 +▁MOBILITY 3143 +▁MODE 3144 +▁MODEL 3145 +▁MODELS 3146 +▁MODERN 3147 +▁MOLECULAR 3148 +▁MOLECULE 3149 +▁MOM 3150 +▁MOMENT 3151 +▁MOMENTS 3152 +▁MONDAY 3153 +▁MONEY 3154 +▁MONITOR 3155 +▁MONKEY 3156 +▁MONSTER 3157 +▁MONTH 3158 +▁MONTHS 3159 +▁MOOD 3160 +▁MOON 3161 +▁MOR 3162 +▁MORAL 3163 +▁MORE 3164 +▁MORNING 3165 +▁MORTALITY 3166 +▁MOSQUITO 3167 +▁MOST 3168 +▁MOSTLY 3169 +▁MOTHER 3170 +▁MOTION 3171 +▁MOTIVATE 3172 +▁MOTIVATED 3173 +▁MOTIVATION 3174 +▁MOTOR 3175 +▁MOUNT 3176 +▁MOUNTAIN 3177 +▁MOUTH 3178 +▁MOVE 3179 +▁MOVED 3180 +▁MOVEMENT 3181 +▁MOVIE 3182 +▁MOVING 3183 +▁MOZART 3184 +▁MR 3185 +▁MU 3186 +▁MUCH 3187 +▁MULTI 3188 +▁MULTIPLE 3189 +▁MULTIPLY 3190 +▁MUM 3191 +▁MURDER 3192 +▁MUSCLE 3193 +▁MUSEUM 3194 +▁MUSIC 3195 +▁MUSICAL 3196 +▁MUSLIM 3197 +▁MUST 3198 +▁MUTATION 3199 +▁MUTUAL 3200 +▁MY 3201 +▁MYSELF 3202 +▁MYSTERIOUS 3203 +▁MYSTERY 3204 +▁MYTH 3205 +▁N 3206 +▁NA 3207 +▁NAKED 3208 +▁NAME 3209 +▁NAMED 3210 +▁NANO 3211 +▁NARRATIVE 3212 +▁NARROW 3213 +▁NASA 3214 +▁NATION 3215 +▁NATIONAL 3216 +▁NATIONS 3217 +▁NATIVE 3218 +▁NATURAL 3219 +▁NATURALLY 3220 +▁NATURE 3221 +▁NAVIGAT 3222 +▁NAVIGATE 3223 +▁NBSP 3224 +▁NE 3225 +▁NEAR 3226 +▁NEARLY 3227 +▁NECESSARILY 3228 +▁NECESSARY 3229 +▁NECK 3230 +▁NEED 3231 +▁NEEDED 3232 +▁NEEDS 3233 +▁NEGATIVE 3234 +▁NEGLECT 3235 +▁NEGOTIATE 3236 +▁NEGOTIATION 3237 +▁NEIGHBOR 3238 +▁NEIGHBORHOOD 3239 +▁NEIGHBORS 3240 +▁NEIGHBOUR 3241 +▁NEITHER 3242 +▁NERVE 3243 +▁NERVOUS 3244 +▁NETHERLANDS 3245 +▁NETWORK 3246 +▁NEURAL 3247 +▁NEURO 3248 +▁NEURONS 3249 +▁NEUROSCIENCE 3250 +▁NEUROSCIENTIST 3251 +▁NEUTRAL 3252 +▁NEUTRON 3253 +▁NEVER 3254 +▁NEW 3255 +▁NEWS 3256 +▁NEWSPAPER 3257 +▁NEXT 3258 +▁NGO 3259 +▁NI 3260 +▁NICE 3261 +▁NIGERIA 3262 +▁NIGHT 3263 +▁NIGHTMARE 3264 +▁NINE 3265 +▁NO 3266 +▁NOBEL 3267 +▁NOBODY 3268 +▁NOISE 3269 +▁NON 3270 +▁NONE 3271 +▁NONPROFIT 3272 +▁NONVIOLENT 3273 +▁NOR 3274 +▁NORM 3275 +▁NORMAL 3276 +▁NORMALLY 3277 +▁NORTH 3278 +▁NORTHERN 3279 +▁NORWAY 3280 +▁NORWEGIAN 3281 +▁NOT 3282 +▁NOTE 3283 +▁NOTHING 3284 +▁NOTICE 3285 +▁NOTICED 3286 +▁NOTION 3287 +▁NOURISH 3288 +▁NOVEL 3289 +▁NOVEMBER 3290 +▁NOW 3291 +▁NOWADAYS 3292 +▁NOWHERE 3293 +▁NS 3294 +▁NU 3295 +▁NUCLEAR 3296 +▁NUMB 3297 +▁NUMBER 3298 +▁NUMBERS 3299 +▁NUMEROUS 3300 +▁NURSE 3301 +▁NURSING 3302 +▁NURTURE 3303 +▁NUTRITION 3304 +▁O 3305 +▁OB 3306 +▁OBAMA 3307 +▁OBESITY 3308 +▁OBJECT 3309 +▁OBJECTIVE 3310 +▁OBJECTS 3311 +▁OBLIGATION 3312 +▁OBSERVATION 3313 +▁OBSERVE 3314 +▁OBSESSED 3315 +▁OBSTACLE 3316 +▁OBTAIN 3317 +▁OBVIOUS 3318 +▁OBVIOUSLY 3319 +▁OCCASION 3320 +▁OCCUPY 3321 +▁OCCUR 3322 +▁OCCURRED 3323 +▁OCEAN 3324 +▁OCTOBER 3325 +▁ODD 3326 +▁OF 3327 +▁OFF 3328 +▁OFFENDERS 3329 +▁OFFER 3330 +▁OFFERED 3331 +▁OFFICE 3332 +▁OFFICER 3333 +▁OFFICIAL 3334 +▁OFFSPRING 3335 +▁OFTEN 3336 +▁OFTENTIMES 3337 +▁OH 3338 +▁OIL 3339 +▁OK 3340 +▁OKAY 3341 +▁OLD 3342 +▁OLDER 3343 +▁OLIVE 3344 +▁OLYMPIC 3345 +▁ON 3346 +▁ONCE 3347 +▁ONE 3348 +▁ONGOING 3349 +▁ONLINE 3350 +▁ONLY 3351 +▁ONTO 3352 +▁OP 3353 +▁OPEN 3354 +▁OPENED 3355 +▁OPENING 3356 +▁OPERA 3357 +▁OPERATE 3358 +▁OPERATING 3359 +▁OPERATION 3360 +▁OPINION 3361 +▁OPPONENT 3362 +▁OPPORTUNITIES 3363 +▁OPPORTUNITY 3364 +▁OPPOSED 3365 +▁OPPOSITE 3366 +▁OPPOSITION 3367 +▁OPPRESS 3368 +▁OPTIMAL 3369 +▁OPTIMISM 3370 +▁OPTIMIZ 3371 +▁OPTION 3372 +▁OPTIONS 3373 +▁OR 3374 +▁ORANGE 3375 +▁ORBIT 3376 +▁ORCHESTRA 3377 +▁ORDER 3378 +▁ORDINARY 3379 +▁ORGAN 3380 +▁ORGANIC 3381 +▁ORGANISM 3382 +▁ORGANIZATION 3383 +▁ORGANIZATIONS 3384 +▁ORGANIZE 3385 +▁ORGANIZED 3386 +▁ORGANIZING 3387 +▁ORGASM 3388 +▁ORIENTATION 3389 +▁ORIENTED 3390 +▁ORIGIN 3391 +▁ORIGINAL 3392 +▁ORPHAN 3393 +▁OTHER 3394 +▁OTHERS 3395 +▁OTHERWISE 3396 +▁OUR 3397 +▁OURSELVES 3398 +▁OUT 3399 +▁OUTBREAK 3400 +▁OUTCOME 3401 +▁OUTRAGE 3402 +▁OUTSIDE 3403 +▁OVER 3404 +▁OVERALL 3405 +▁OVERCOME 3406 +▁OVERLOOK 3407 +▁OVERNIGHT 3408 +▁OVERWHELMED 3409 +▁OVERWHELMING 3410 +▁OWE 3411 +▁OWN 3412 +▁OWNERSHIP 3413 +▁OXFORD 3414 +▁OXYGEN 3415 +▁OXYTOCIN 3416 +▁P 3417 +▁PA 3418 +▁PACE 3419 +▁PACIFIC 3420 +▁PACK 3421 +▁PACKAGE 3422 +▁PAGE 3423 +▁PAID 3424 +▁PAIN 3425 +▁PAINFUL 3426 +▁PAINT 3427 +▁PAINTING 3428 +▁PAIR 3429 +▁PAKISTAN 3430 +▁PALESTINIAN 3431 +▁PAN 3432 +▁PANEL 3433 +▁PANIC 3434 +▁PANTS 3435 +▁PAPER 3436 +▁PARA 3437 +▁PARADIGM 3438 +▁PARADOX 3439 +▁PARALLEL 3440 +▁PARALYZE 3441 +▁PARENT 3442 +▁PARENTS 3443 +▁PARIS 3444 +▁PARK 3445 +▁PARKINSON 3446 +▁PARLIAMENT 3447 +▁PART 3448 +▁PARTICIPAT 3449 +▁PARTICIPATE 3450 +▁PARTICIPATION 3451 +▁PARTICLE 3452 +▁PARTICLES 3453 +▁PARTICULAR 3454 +▁PARTICULARLY 3455 +▁PARTIES 3456 +▁PARTNER 3457 +▁PARTNERSHIP 3458 +▁PARTS 3459 +▁PARTY 3460 +▁PASS 3461 +▁PASSED 3462 +▁PASSENGER 3463 +▁PASSION 3464 +▁PASSIONATE 3465 +▁PAST 3466 +▁PATENT 3467 +▁PATH 3468 +▁PATHOGEN 3469 +▁PATHWAY 3470 +▁PATIENCE 3471 +▁PATIENT 3472 +▁PATIENTS 3473 +▁PATTERN 3474 +▁PATTERNS 3475 +▁PAUL 3476 +▁PAUSE 3477 +▁PAY 3478 +▁PAYING 3479 +▁PE 3480 +▁PEACE 3481 +▁PEACEFUL 3482 +▁PEAK 3483 +▁PEER 3484 +▁PEN 3485 +▁PENALTY 3486 +▁PENGUIN 3487 +▁PENNSYLVANIA 3488 +▁PEOPLE 3489 +▁PER 3490 +▁PERCEIVE 3491 +▁PERCENT 3492 +▁PERCENTAGE 3493 +▁PERCEPTION 3494 +▁PERFECT 3495 +▁PERFECTION 3496 +▁PERFECTLY 3497 +▁PERFORM 3498 +▁PERFORMANCE 3499 +▁PERFORMING 3500 +▁PERHAPS 3501 +▁PERIOD 3502 +▁PERMANENT 3503 +▁PERMISSION 3504 +▁PERPETRATOR 3505 +▁PERPETUAT 3506 +▁PERSIST 3507 +▁PERSON 3508 +▁PERSONAL 3509 +▁PERSONALITY 3510 +▁PERSONALLY 3511 +▁PERSPECTIVE 3512 +▁PERSUADE 3513 +▁PESTICIDE 3514 +▁PET 3515 +▁PH 3516 +▁PHARMA 3517 +▁PHARMACEUTICAL 3518 +▁PHASE 3519 +▁PHENOMENA 3520 +▁PHENOMENON 3521 +▁PHILADELPHIA 3522 +▁PHILANTHROP 3523 +▁PHILOSOPHER 3524 +▁PHILOSOPHICAL 3525 +▁PHILOSOPHY 3526 +▁PHONE 3527 +▁PHOTO 3528 +▁PHOTOGRAPH 3529 +▁PHRASE 3530 +▁PHYSICAL 3531 +▁PHYSICALLY 3532 +▁PHYSICIAN 3533 +▁PHYSICIST 3534 +▁PHYSICS 3535 +▁PHYSIOLOGICAL 3536 +▁PI 3537 +▁PIANO 3538 +▁PICK 3539 +▁PICKED 3540 +▁PICTURE 3541 +▁PICTURES 3542 +▁PIECE 3543 +▁PIECES 3544 +▁PIG 3545 +▁PILL 3546 +▁PILOT 3547 +▁PIN 3548 +▁PINK 3549 +▁PIONEER 3550 +▁PITCH 3551 +▁PL 3552 +▁PLACE 3553 +▁PLACES 3554 +▁PLAIN 3555 +▁PLAN 3556 +▁PLANE 3557 +▁PLANET 3558 +▁PLANNING 3559 +▁PLANT 3560 +▁PLANTS 3561 +▁PLASTIC 3562 +▁PLATE 3563 +▁PLATFORM 3564 +▁PLAY 3565 +▁PLAYED 3566 +▁PLAYER 3567 +▁PLAYGROUND 3568 +▁PLAYING 3569 +▁PLEA 3570 +▁PLEASE 3571 +▁PLEASURE 3572 +▁PLENTY 3573 +▁PLOT 3574 +▁PLUS 3575 +▁PO 3576 +▁POCKET 3577 +▁POEM 3578 +▁POET 3579 +▁POETRY 3580 +▁POINT 3581 +▁POINTS 3582 +▁POISON 3583 +▁POLAR 3584 +▁POLE 3585 +▁POLICE 3586 +▁POLICIES 3587 +▁POLICY 3588 +▁POLIO 3589 +▁POLISH 3590 +▁POLITE 3591 +▁POLITICAL 3592 +▁POLITICIAN 3593 +▁POLITICIANS 3594 +▁POLITICS 3595 +▁POLL 3596 +▁POLLUTION 3597 +▁POOL 3598 +▁POOR 3599 +▁POP 3600 +▁POPULAR 3601 +▁POPULATION 3602 +▁PORN 3603 +▁PORT 3604 +▁PORTRAIT 3605 +▁PORTRAY 3606 +▁POSE 3607 +▁POSITION 3608 +▁POSITIVE 3609 +▁POSSESS 3610 +▁POSSIBILITIES 3611 +▁POSSIBILITY 3612 +▁POSSIBLE 3613 +▁POSSIBLY 3614 +▁POST 3615 +▁POTATO 3616 +▁POTENTIAL 3617 +▁POTENTIALLY 3618 +▁POUND 3619 +▁POUNDS 3620 +▁POUR 3621 +▁POVERTY 3622 +▁POWER 3623 +▁POWERFUL 3624 +▁PR 3625 +▁PRACTICAL 3626 +▁PRACTICE 3627 +▁PRACTICING 3628 +▁PRAISE 3629 +▁PRAY 3630 +▁PRE 3631 +▁PRECIOUS 3632 +▁PRECISE 3633 +▁PREDATOR 3634 +▁PREDICT 3635 +▁PREFER 3636 +▁PREGNANCY 3637 +▁PREGNANT 3638 +▁PREJUDICE 3639 +▁PREPARATION 3640 +▁PREPARE 3641 +▁PREPARED 3642 +▁PREPARING 3643 +▁PRESCRIBE 3644 +▁PRESCRIPTION 3645 +▁PRESENCE 3646 +▁PRESENT 3647 +▁PRESENTATION 3648 +▁PRESERVE 3649 +▁PRESIDENT 3650 +▁PRESS 3651 +▁PRESSURE 3652 +▁PRETEND 3653 +▁PRETTY 3654 +▁PREVENT 3655 +▁PREVIOUS 3656 +▁PRICE 3657 +▁PRIDE 3658 +▁PRIM 3659 +▁PRIMARILY 3660 +▁PRIMARY 3661 +▁PRIME 3662 +▁PRIMITIVE 3663 +▁PRINCE 3664 +▁PRINCIPAL 3665 +▁PRINCIPLE 3666 +▁PRINCIPLES 3667 +▁PRINT 3668 +▁PRIOR 3669 +▁PRIORITIZE 3670 +▁PRIORITY 3671 +▁PRISON 3672 +▁PRIVACY 3673 +▁PRIVATE 3674 +▁PRIVILEGE 3675 +▁PRIZE 3676 +▁PRO 3677 +▁PROBABILITY 3678 +▁PROBABLY 3679 +▁PROBLEM 3680 +▁PROBLEMATIC 3681 +▁PROBLEMS 3682 +▁PROCEDURE 3683 +▁PROCEED 3684 +▁PROCESS 3685 +▁PROCESSES 3686 +▁PROCRASTINAT 3687 +▁PRODUCE 3688 +▁PRODUCED 3689 +▁PRODUCING 3690 +▁PRODUCT 3691 +▁PRODUCTION 3692 +▁PRODUCTIVE 3693 +▁PRODUCTIVITY 3694 +▁PRODUCTS 3695 +▁PROFESSION 3696 +▁PROFESSIONAL 3697 +▁PROFESSOR 3698 +▁PROFILE 3699 +▁PROFIT 3700 +▁PROFOUND 3701 +▁PROGRAM 3702 +▁PROGRAMME 3703 +▁PROGRAMMING 3704 +▁PROGRAMS 3705 +▁PROGRESS 3706 +▁PROJECT 3707 +▁PROJECTS 3708 +▁PROMISE 3709 +▁PROMISING 3710 +▁PROMOT 3711 +▁PROMOTE 3712 +▁PROOF 3713 +▁PROP 3714 +▁PROPER 3715 +▁PROPERLY 3716 +▁PROPERTIES 3717 +▁PROPERTY 3718 +▁PROPORTION 3719 +▁PROPOSAL 3720 +▁PROPOSE 3721 +▁PROPOSITION 3722 +▁PROSECUT 3723 +▁PROSPECT 3724 +▁PROSPERITY 3725 +▁PROSTHETIC 3726 +▁PROTECT 3727 +▁PROTECTED 3728 +▁PROTECTION 3729 +▁PROTEIN 3730 +▁PROTEST 3731 +▁PROTOCOL 3732 +▁PROTOTYPE 3733 +▁PROUD 3734 +▁PROVE 3735 +▁PROVIDE 3736 +▁PROVIDING 3737 +▁PSYCH 3738 +▁PSYCHIATRIST 3739 +▁PSYCHOLOGICAL 3740 +▁PSYCHOLOGIST 3741 +▁PSYCHOLOGY 3742 +▁PSYCHOPATH 3743 +▁PTSD 3744 +▁PUBLIC 3745 +▁PUBLISH 3746 +▁PUBLISHED 3747 +▁PULL 3748 +▁PULLED 3749 +▁PULSE 3750 +▁PUMP 3751 +▁PUNCH 3752 +▁PUNISH 3753 +▁PUNISHMENT 3754 +▁PUR 3755 +▁PURCHASE 3756 +▁PURE 3757 +▁PURPLE 3758 +▁PURPOSE 3759 +▁PURSUE 3760 +▁PURSUING 3761 +▁PURSUIT 3762 +▁PUSH 3763 +▁PUSHED 3764 +▁PUSHING 3765 +▁PUT 3766 +▁PUTTING 3767 +▁PUZZLE 3768 +▁PYRAMID 3769 +▁QU 3770 +▁QUALIFIED 3771 +▁QUALITIES 3772 +▁QUALITY 3773 +▁QUANTUM 3774 +▁QUARTER 3775 +▁QUEEN 3776 +▁QUEER 3777 +▁QUESTION 3778 +▁QUESTIONS 3779 +▁QUICK 3780 +▁QUICKLY 3781 +▁QUIET 3782 +▁QUIT 3783 +▁QUITE 3784 +▁QUO 3785 +▁QUOTE 3786 +▁R 3787 +▁RA 3788 +▁RABBI 3789 +▁RACE 3790 +▁RACIAL 3791 +▁RACISM 3792 +▁RACIST 3793 +▁RADIATION 3794 +▁RADICAL 3795 +▁RADIO 3796 +▁RAIN 3797 +▁RAINFOREST 3798 +▁RAISE 3799 +▁RAISED 3800 +▁RAISING 3801 +▁RAN 3802 +▁RANDOM 3803 +▁RANGE 3804 +▁RANK 3805 +▁RAP 3806 +▁RAPE 3807 +▁RAPID 3808 +▁RAPIDLY 3809 +▁RARE 3810 +▁RARELY 3811 +▁RAT 3812 +▁RATE 3813 +▁RATES 3814 +▁RATHER 3815 +▁RATIONAL 3816 +▁RAW 3817 +▁RAY 3818 +▁RE 3819 +▁REACH 3820 +▁REACHED 3821 +▁REACT 3822 +▁REACTION 3823 +▁READ 3824 +▁READING 3825 +▁READY 3826 +▁REAL 3827 +▁REALISE 3828 +▁REALITY 3829 +▁REALIZE 3830 +▁REALIZED 3831 +▁REALLY 3832 +▁REASON 3833 +▁REASONS 3834 +▁REBEL 3835 +▁REBUILD 3836 +▁REC 3837 +▁RECALL 3838 +▁RECEIVE 3839 +▁RECEIVED 3840 +▁RECEIVING 3841 +▁RECENT 3842 +▁RECENTLY 3843 +▁RECEPTOR 3844 +▁RECIPE 3845 +▁RECOGNITION 3846 +▁RECOGNIZE 3847 +▁RECOGNIZED 3848 +▁RECOGNIZING 3849 +▁RECOMMEND 3850 +▁RECONNECT 3851 +▁RECONSTRUCT 3852 +▁RECORD 3853 +▁RECOVER 3854 +▁RECOVERY 3855 +▁RECRUIT 3856 +▁RECYCLE 3857 +▁RECYCLING 3858 +▁RED 3859 +▁REDESIGN 3860 +▁REDUC 3861 +▁REDUCE 3862 +▁REDUCTION 3863 +▁REEF 3864 +▁REFER 3865 +▁REFERENCE 3866 +▁REFERR 3867 +▁REFLECT 3868 +▁REFORM 3869 +▁REFRA 3870 +▁REFUGEE 3871 +▁REFUGEES 3872 +▁REFUSE 3873 +▁REGARD 3874 +▁REGARDLESS 3875 +▁REGIME 3876 +▁REGION 3877 +▁REGISTER 3878 +▁REGRET 3879 +▁REGULAR 3880 +▁REGULATE 3881 +▁REGULATION 3882 +▁REHABILITATION 3883 +▁REINFORCE 3884 +▁REINVENT 3885 +▁REJECT 3886 +▁REJECTION 3887 +▁RELATE 3888 +▁RELATED 3889 +▁RELATION 3890 +▁RELATIONSHIP 3891 +▁RELATIONSHIPS 3892 +▁RELATIVE 3893 +▁RELATIVELY 3894 +▁RELATIVITY 3895 +▁RELAX 3896 +▁RELEASE 3897 +▁RELEASED 3898 +▁RELEVANT 3899 +▁RELI 3900 +▁RELIABLE 3901 +▁RELIEF 3902 +▁RELIGION 3903 +▁RELIGIOUS 3904 +▁RELY 3905 +▁REMAIN 3906 +▁REMARKABLE 3907 +▁REMARKABLY 3908 +▁REMEMBER 3909 +▁REMIND 3910 +▁REMOTE 3911 +▁REMOVE 3912 +▁REMOVING 3913 +▁RENAISSANCE 3914 +▁RENEWABLE 3915 +▁RENT 3916 +▁REPAIR 3917 +▁REPEAT 3918 +▁REPLACE 3919 +▁REPLICA 3920 +▁REPORT 3921 +▁REPRESENT 3922 +▁REPRESENTATION 3923 +▁REPRESENTATIVE 3924 +▁REPRODUCE 3925 +▁REPRODUCTION 3926 +▁REPRODUCTIVE 3927 +▁REPUBLIC 3928 +▁REPUTATION 3929 +▁REQUEST 3930 +▁REQUIRE 3931 +▁REQUIRED 3932 +▁REQUIRES 3933 +▁RESCUE 3934 +▁RESEARCH 3935 +▁RESEARCHERS 3936 +▁RESERVE 3937 +▁RESIDENT 3938 +▁RESILIENCE 3939 +▁RESILIENT 3940 +▁RESIST 3941 +▁RESISTANCE 3942 +▁RESOLUTION 3943 +▁RESOLVE 3944 +▁RESONATE 3945 +▁RESOURCE 3946 +▁RESOURCES 3947 +▁RESPECT 3948 +▁RESPOND 3949 +▁RESPONSE 3950 +▁RESPONSIBILITY 3951 +▁RESPONSIBLE 3952 +▁REST 3953 +▁RESTAURANT 3954 +▁RESTORE 3955 +▁RESTRICT 3956 +▁RESULT 3957 +▁RESULTS 3958 +▁RESUME 3959 +▁RETAIL 3960 +▁RETHINK 3961 +▁RETIRE 3962 +▁RETREAT 3963 +▁RETURN 3964 +▁REV 3965 +▁REVEAL 3966 +▁REVENGE 3967 +▁REVENUE 3968 +▁REVERSE 3969 +▁REVIEW 3970 +▁REVOLUTION 3971 +▁REWARD 3972 +▁RHYTHM 3973 +▁RI 3974 +▁RICE 3975 +▁RICH 3976 +▁RICHARD 3977 +▁RID 3978 +▁RIDE 3979 +▁RIDICULOUS 3980 +▁RIGHT 3981 +▁RIGHTS 3982 +▁RIGID 3983 +▁RING 3984 +▁RIPPLE 3985 +▁RISE 3986 +▁RISING 3987 +▁RISK 3988 +▁RISKS 3989 +▁RITUAL 3990 +▁RIVER 3991 +▁RO 3992 +▁ROAD 3993 +▁ROB 3994 +▁ROBERT 3995 +▁ROBOT 3996 +▁ROBOTIC 3997 +▁ROBOTS 3998 +▁ROBUST 3999 +▁ROCK 4000 +▁ROCKET 4001 +▁ROLE 4002 +▁ROLL 4003 +▁ROMAN 4004 +▁ROMANTIC 4005 +▁ROOF 4006 +▁ROOM 4007 +▁ROOT 4008 +▁ROSE 4009 +▁ROUGH 4010 +▁ROUGHLY 4011 +▁ROUND 4012 +▁ROUTE 4013 +▁ROUTINE 4014 +▁ROW 4015 +▁ROYAL 4016 +▁RU 4017 +▁RUBB 4018 +▁RUIN 4019 +▁RULE 4020 +▁RULES 4021 +▁RUN 4022 +▁RUNNING 4023 +▁RURAL 4024 +▁RUSH 4025 +▁RUSSIA 4026 +▁RUSSIAN 4027 +▁RWANDA 4028 +▁S 4029 +▁SA 4030 +▁SACRED 4031 +▁SACRIFICE 4032 +▁SAD 4033 +▁SAFE 4034 +▁SAFETY 4035 +▁SAID 4036 +▁SAIL 4037 +▁SAL 4038 +▁SAME 4039 +▁SAMPLE 4040 +▁SAN 4041 +▁SAND 4042 +▁SANDWICH 4043 +▁SARAH 4044 +▁SAT 4045 +▁SATELLITE 4046 +▁SATISFACTION 4047 +▁SATISFIED 4048 +▁SATISFY 4049 +▁SATURDAY 4050 +▁SAUDI 4051 +▁SAVE 4052 +▁SAVED 4053 +▁SAVING 4054 +▁SAW 4055 +▁SAY 4056 +▁SAYING 4057 +▁SAYS 4058 +▁SC 4059 +▁SCALE 4060 +▁SCAN 4061 +▁SCANN 4062 +▁SCAR 4063 +▁SCARED 4064 +▁SCARY 4065 +▁SCENARIO 4066 +▁SCENE 4067 +▁SCHEDULE 4068 +▁SCHIZOPHRENIA 4069 +▁SCHOLAR 4070 +▁SCHOLARSHIP 4071 +▁SCHOOL 4072 +▁SCHOOLS 4073 +▁SCIENCE 4074 +▁SCIENTIFIC 4075 +▁SCIENTIST 4076 +▁SCIENTISTS 4077 +▁SCORE 4078 +▁SCOTT 4079 +▁SCRAP 4080 +▁SCRATCH 4081 +▁SCREAM 4082 +▁SCREEN 4083 +▁SCREW 4084 +▁SCRIPT 4085 +▁SE 4086 +▁SEA 4087 +▁SEARCH 4088 +▁SEARCHING 4089 +▁SEASON 4090 +▁SEAT 4091 +▁SECOND 4092 +▁SECRET 4093 +▁SECTION 4094 +▁SECTOR 4095 +▁SECULAR 4096 +▁SECURE 4097 +▁SECURITY 4098 +▁SEE 4099 +▁SEEDS 4100 +▁SEEING 4101 +▁SEEK 4102 +▁SEEM 4103 +▁SEEMED 4104 +▁SEEMINGLY 4105 +▁SEEMS 4106 +▁SEEN 4107 +▁SELECT 4108 +▁SELF 4109 +▁SELL 4110 +▁SELLING 4111 +▁SELVES 4112 +▁SEMESTER 4113 +▁SEMI 4114 +▁SEND 4115 +▁SENIOR 4116 +▁SENSATION 4117 +▁SENSE 4118 +▁SENSITIVE 4119 +▁SENSOR 4120 +▁SENT 4121 +▁SENTENCE 4122 +▁SEPARATE 4123 +▁SEPARATION 4124 +▁SEPTEMBER 4125 +▁SEQUENCE 4126 +▁SERIES 4127 +▁SERIOUS 4128 +▁SERIOUSLY 4129 +▁SERVE 4130 +▁SERVICE 4131 +▁SERVICES 4132 +▁SERVING 4133 +▁SESSION 4134 +▁SET 4135 +▁SETTING 4136 +▁SETTLE 4137 +▁SEVEN 4138 +▁SEVERAL 4139 +▁SEVERE 4140 +▁SEX 4141 +▁SEXUAL 4142 +▁SEXUALITY 4143 +▁SH 4144 +▁SHA 4145 +▁SHADOW 4146 +▁SHAKE 4147 +▁SHAKESPEARE 4148 +▁SHAME 4149 +▁SHAPE 4150 +▁SHARE 4151 +▁SHARED 4152 +▁SHARING 4153 +▁SHARK 4154 +▁SHARP 4155 +▁SHE 4156 +▁SHELTER 4157 +▁SHI 4158 +▁SHIFT 4159 +▁SHIP 4160 +▁SHIRT 4161 +▁SHOCK 4162 +▁SHOES 4163 +▁SHOOT 4164 +▁SHOOTING 4165 +▁SHOP 4166 +▁SHOPPING 4167 +▁SHORE 4168 +▁SHORT 4169 +▁SHOT 4170 +▁SHOULD 4171 +▁SHOULDER 4172 +▁SHOULDN 4173 +▁SHOUT 4174 +▁SHOW 4175 +▁SHOWED 4176 +▁SHOWING 4177 +▁SHOWN 4178 +▁SHOWS 4179 +▁SHRINK 4180 +▁SHUT 4181 +▁SHY 4182 +▁SI 4183 +▁SICK 4184 +▁SIDE 4185 +▁SIGHT 4186 +▁SIGN 4187 +▁SIGNAL 4188 +▁SIGNATURE 4189 +▁SIGNIFICANCE 4190 +▁SIGNIFICANT 4191 +▁SILENCE 4192 +▁SILENT 4193 +▁SILICON 4194 +▁SILLY 4195 +▁SILVER 4196 +▁SIMILAR 4197 +▁SIMPLE 4198 +▁SIMPLY 4199 +▁SIMULATION 4200 +▁SIMULTANEOUS 4201 +▁SINCE 4202 +▁SINGING 4203 +▁SINGLE 4204 +▁SINK 4205 +▁SIR 4206 +▁SISTER 4207 +▁SIT 4208 +▁SITE 4209 +▁SITTING 4210 +▁SITUATION 4211 +▁SIX 4212 +▁SIZE 4213 +▁SKEPTIC 4214 +▁SKI 4215 +▁SKILL 4216 +▁SKILLS 4217 +▁SKIN 4218 +▁SKIP 4219 +▁SKULL 4220 +▁SKY 4221 +▁SLAUGHTER 4222 +▁SLAVE 4223 +▁SLEEP 4224 +▁SLEEVE 4225 +▁SLEPT 4226 +▁SLICE 4227 +▁SLIDE 4228 +▁SLIGHTLY 4229 +▁SLIP 4230 +▁SLOW 4231 +▁SLOWLY 4232 +▁SLUM 4233 +▁SMALL 4234 +▁SMALLER 4235 +▁SMART 4236 +▁SMARTPHONE 4237 +▁SMELL 4238 +▁SMILE 4239 +▁SMILING 4240 +▁SMOKE 4241 +▁SMOKING 4242 +▁SMOOTH 4243 +▁SNAKE 4244 +▁SNAP 4245 +▁SNEAK 4246 +▁SNOW 4247 +▁SO 4248 +▁SOCCER 4249 +▁SOCIAL 4250 +▁SOCIETAL 4251 +▁SOCIETIES 4252 +▁SOCIETY 4253 +▁SOFT 4254 +▁SOFTWARE 4255 +▁SOIL 4256 +▁SOLAR 4257 +▁SOLD 4258 +▁SOLDIER 4259 +▁SOLID 4260 +▁SOLUTION 4261 +▁SOLUTIONS 4262 +▁SOLVE 4263 +▁SOLVED 4264 +▁SOLVING 4265 +▁SOME 4266 +▁SOMEBODY 4267 +▁SOMEDAY 4268 +▁SOMEHOW 4269 +▁SOMEONE 4270 +▁SOMETHING 4271 +▁SOMETIMES 4272 +▁SOMEWHAT 4273 +▁SOMEWHERE 4274 +▁SON 4275 +▁SONG 4276 +▁SOON 4277 +▁SOPHISTICATED 4278 +▁SORRY 4279 +▁SORT 4280 +▁SOUL 4281 +▁SOUND 4282 +▁SOUNDS 4283 +▁SOURCE 4284 +▁SOUTH 4285 +▁SOUTHERN 4286 +▁SOVIET 4287 +▁SP 4288 +▁SPACE 4289 +▁SPACECRAFT 4290 +▁SPAN 4291 +▁SPANISH 4292 +▁SPARK 4293 +▁SPATIAL 4294 +▁SPEAK 4295 +▁SPEAKER 4296 +▁SPEAKING 4297 +▁SPECIAL 4298 +▁SPECIES 4299 +▁SPECIFIC 4300 +▁SPECIFICALLY 4301 +▁SPECTACULAR 4302 +▁SPECTRUM 4303 +▁SPEECH 4304 +▁SPEED 4305 +▁SPELL 4306 +▁SPEND 4307 +▁SPENDING 4308 +▁SPENT 4309 +▁SPHERE 4310 +▁SPIN 4311 +▁SPIRAL 4312 +▁SPIRIT 4313 +▁SPIRITUAL 4314 +▁SPLIT 4315 +▁SPOKE 4316 +▁SPOKEN 4317 +▁SPONSOR 4318 +▁SPONTANEOUS 4319 +▁SPORT 4320 +▁SPORTS 4321 +▁SPOT 4322 +▁SPOUSE 4323 +▁SPRAY 4324 +▁SPREAD 4325 +▁SPRING 4326 +▁SQUARE 4327 +▁SQUEEZE 4328 +▁SQUI 4329 +▁ST 4330 +▁STA 4331 +▁STABLE 4332 +▁STAFF 4333 +▁STAGE 4334 +▁STAKE 4335 +▁STAMP 4336 +▁STAND 4337 +▁STANDARD 4338 +▁STANDING 4339 +▁STANFORD 4340 +▁STAR 4341 +▁STARS 4342 +▁START 4343 +▁STARTED 4344 +▁STARTING 4345 +▁STARTS 4346 +▁STATE 4347 +▁STATEMENT 4348 +▁STATES 4349 +▁STATION 4350 +▁STATISTIC 4351 +▁STATISTICS 4352 +▁STATUS 4353 +▁STAY 4354 +▁STAYED 4355 +▁STEAL 4356 +▁STEEL 4357 +▁STEM 4358 +▁STEP 4359 +▁STEPPED 4360 +▁STEPS 4361 +▁STEREOTYPE 4362 +▁STEVE 4363 +▁STICK 4364 +▁STIGMA 4365 +▁STILL 4366 +▁STIMUL 4367 +▁STOCK 4368 +▁STOMACH 4369 +▁STONE 4370 +▁STOOD 4371 +▁STOP 4372 +▁STOPPED 4373 +▁STORAGE 4374 +▁STORE 4375 +▁STORIES 4376 +▁STORM 4377 +▁STORY 4378 +▁STORYTELLER 4379 +▁STORYTELLING 4380 +▁STRAIGHT 4381 +▁STRAIGHTFORWARD 4382 +▁STRANGE 4383 +▁STRANGER 4384 +▁STRATEGIES 4385 +▁STRATEGY 4386 +▁STRAW 4387 +▁STREAM 4388 +▁STREET 4389 +▁STREETS 4390 +▁STRENGTH 4391 +▁STRESS 4392 +▁STRETCH 4393 +▁STRICT 4394 +▁STRIKE 4395 +▁STRIKING 4396 +▁STRING 4397 +▁STRIP 4398 +▁STRIVE 4399 +▁STROKE 4400 +▁STRONG 4401 +▁STRONGER 4402 +▁STRUCTURAL 4403 +▁STRUCTURE 4404 +▁STRUGGLE 4405 +▁STRUGGLING 4406 +▁STUCK 4407 +▁STUDENT 4408 +▁STUDENTS 4409 +▁STUDIED 4410 +▁STUDIES 4411 +▁STUDIO 4412 +▁STUDY 4413 +▁STUDYING 4414 +▁STUFF 4415 +▁STUMBL 4416 +▁STUPID 4417 +▁STYLE 4418 +▁SU 4419 +▁SUB 4420 +▁SUBCONSCIOUS 4421 +▁SUBJECT 4422 +▁SUBMIT 4423 +▁SUBSTANCE 4424 +▁SUBSTANTIAL 4425 +▁SUBTLE 4426 +▁SUBURB 4427 +▁SUCCEED 4428 +▁SUCCESS 4429 +▁SUCCESSFUL 4430 +▁SUCH 4431 +▁SUCK 4432 +▁SUDDEN 4433 +▁SUDDENLY 4434 +▁SUFFER 4435 +▁SUFFERING 4436 +▁SUFFICIENT 4437 +▁SUGAR 4438 +▁SUGGEST 4439 +▁SUGGESTION 4440 +▁SUICIDAL 4441 +▁SUICIDE 4442 +▁SUIT 4443 +▁SUM 4444 +▁SUMMER 4445 +▁SUN 4446 +▁SUNDAY 4447 +▁SUPER 4448 +▁SUPERHERO 4449 +▁SUPERMARKET 4450 +▁SUPPLIES 4451 +▁SUPPLY 4452 +▁SUPPORT 4453 +▁SUPPOSE 4454 +▁SUPPOSED 4455 +▁SUPPRESS 4456 +▁SUPREME 4457 +▁SURE 4458 +▁SURF 4459 +▁SURFACE 4460 +▁SURGEON 4461 +▁SURGERY 4462 +▁SURGICAL 4463 +▁SURPRISE 4464 +▁SURPRISED 4465 +▁SURPRISING 4466 +▁SURRENDER 4467 +▁SURROUND 4468 +▁SURROUNDED 4469 +▁SURVEILLANCE 4470 +▁SURVEY 4471 +▁SURVIVAL 4472 +▁SURVIVE 4473 +▁SURVIVING 4474 +▁SURVIVOR 4475 +▁SUSPECT 4476 +▁SUSTAIN 4477 +▁SUSTAINABILITY 4478 +▁SUSTAINABLE 4479 +▁SW 4480 +▁SWALLOW 4481 +▁SWEAT 4482 +▁SWEDEN 4483 +▁SWEET 4484 +▁SWIM 4485 +▁SWIMMING 4486 +▁SWITCH 4487 +▁SWITZERLAND 4488 +▁SYMBOL 4489 +▁SYMPTOM 4490 +▁SYMPTOMS 4491 +▁SYNDROME 4492 +▁SYNTHETIC 4493 +▁SYRIA 4494 +▁SYSTEM 4495 +▁SYSTEMATIC 4496 +▁SYSTEMS 4497 +▁T 4498 +▁TA 4499 +▁TABLE 4500 +▁TABOO 4501 +▁TACKLE 4502 +▁TAIL 4503 +▁TAKE 4504 +▁TAKEN 4505 +▁TAKES 4506 +▁TAKING 4507 +▁TALENT 4508 +▁TALK 4509 +▁TALKED 4510 +▁TALKING 4511 +▁TALL 4512 +▁TANGIBLE 4513 +▁TANK 4514 +▁TAP 4515 +▁TARGET 4516 +▁TASK 4517 +▁TASTE 4518 +▁TATTOO 4519 +▁TAUGHT 4520 +▁TAX 4521 +▁TE 4522 +▁TEA 4523 +▁TEACH 4524 +▁TEACHER 4525 +▁TEACHERS 4526 +▁TEACHES 4527 +▁TEACHING 4528 +▁TEAM 4529 +▁TEARS 4530 +▁TECH 4531 +▁TECHNICAL 4532 +▁TECHNIQUE 4533 +▁TECHNOLOGICAL 4534 +▁TECHNOLOGIES 4535 +▁TECHNOLOGY 4536 +▁TED 4537 +▁TEDX 4538 +▁TEEN 4539 +▁TEENAGE 4540 +▁TEENAGER 4541 +▁TEETH 4542 +▁TELEPHONE 4543 +▁TELESCOPE 4544 +▁TELEVISION 4545 +▁TELL 4546 +▁TELLING 4547 +▁TELLS 4548 +▁TEMP 4549 +▁TEMPERATURE 4550 +▁TEMPORARY 4551 +▁TEN 4552 +▁TEND 4553 +▁TENSION 4554 +▁TERM 4555 +▁TERMINAL 4556 +▁TERMS 4557 +▁TERRIBLE 4558 +▁TERRIBLY 4559 +▁TERRIFIED 4560 +▁TERRIFYING 4561 +▁TERRITORY 4562 +▁TERROR 4563 +▁TERRORISM 4564 +▁TERRORIST 4565 +▁TEST 4566 +▁TESTING 4567 +▁TEXAS 4568 +▁TEXT 4569 +▁TEXTBOOK 4570 +▁TH 4571 +▁THAN 4572 +▁THANK 4573 +▁THAT 4574 +▁THE 4575 +▁THEATER 4576 +▁THEIR 4577 +▁THEM 4578 +▁THEMSELVES 4579 +▁THEN 4580 +▁THEORETICAL 4581 +▁THEORIES 4582 +▁THEORY 4583 +▁THERAPEUTIC 4584 +▁THERAPIST 4585 +▁THERAPY 4586 +▁THERE 4587 +▁THEREFORE 4588 +▁THESE 4589 +▁THEY 4590 +▁THICK 4591 +▁THIN 4592 +▁THING 4593 +▁THINGS 4594 +▁THINK 4595 +▁THINKING 4596 +▁THIRD 4597 +▁THIRTY 4598 +▁THIS 4599 +▁THOMAS 4600 +▁THOSE 4601 +▁THOUGH 4602 +▁THOUGHT 4603 +▁THOUGHTS 4604 +▁THOUSAND 4605 +▁THOUSANDS 4606 +▁THREAD 4607 +▁THREAT 4608 +▁THREATENED 4609 +▁THREATENING 4610 +▁THREE 4611 +▁THRESHOLD 4612 +▁THRILL 4613 +▁THRIVE 4614 +▁THRIVING 4615 +▁THROAT 4616 +▁THROUGH 4617 +▁THROUGHOUT 4618 +▁THROW 4619 +▁THUMB 4620 +▁THUS 4621 +▁TICKET 4622 +▁TIE 4623 +▁TIGER 4624 +▁TIGHT 4625 +▁TILL 4626 +▁TIM 4627 +▁TIME 4628 +▁TIMES 4629 +▁TINY 4630 +▁TIP 4631 +▁TIRED 4632 +▁TISSUE 4633 +▁TITLE 4634 +▁TO 4635 +▁TODAY 4636 +▁TOGETHER 4637 +▁TOILET 4638 +▁TOLD 4639 +▁TOMATO 4640 +▁TOMORROW 4641 +▁TONGUE 4642 +▁TONIGHT 4643 +▁TOO 4644 +▁TOOK 4645 +▁TOOL 4646 +▁TOOLS 4647 +▁TOP 4648 +▁TOPIC 4649 +▁TORONTO 4650 +▁TORTURE 4651 +▁TOTAL 4652 +▁TOTALLY 4653 +▁TOUCH 4654 +▁TOUGH 4655 +▁TOWARDS 4656 +▁TOWER 4657 +▁TOWN 4658 +▁TOXIC 4659 +▁TR 4660 +▁TRACE 4661 +▁TRACK 4662 +▁TRADE 4663 +▁TRADITION 4664 +▁TRADITIONAL 4665 +▁TRAFFIC 4666 +▁TRAFFICKING 4667 +▁TRAGEDY 4668 +▁TRAGIC 4669 +▁TRAIL 4670 +▁TRAIN 4671 +▁TRAINED 4672 +▁TRAINING 4673 +▁TRANS 4674 +▁TRANSACTION 4675 +▁TRANSCEND 4676 +▁TRANSFER 4677 +▁TRANSFORM 4678 +▁TRANSFORMATION 4679 +▁TRANSGENDER 4680 +▁TRANSITION 4681 +▁TRANSLATE 4682 +▁TRANSLATION 4683 +▁TRANSLATOR 4684 +▁TRANSMIT 4685 +▁TRANSPARENCY 4686 +▁TRANSPARENT 4687 +▁TRANSPLANT 4688 +▁TRANSPORT 4689 +▁TRANSPORTATION 4690 +▁TRAP 4691 +▁TRASH 4692 +▁TRAUMA 4693 +▁TRAUMATIC 4694 +▁TRAVEL 4695 +▁TREASURE 4696 +▁TREAT 4697 +▁TREATED 4698 +▁TREATMENT 4699 +▁TREE 4700 +▁TREES 4701 +▁TREMENDOUS 4702 +▁TREND 4703 +▁TRI 4704 +▁TRIAL 4705 +▁TRIBAL 4706 +▁TRIBE 4707 +▁TRICK 4708 +▁TRIED 4709 +▁TRIGGER 4710 +▁TRILLION 4711 +▁TRIP 4712 +▁TRIVIAL 4713 +▁TRO 4714 +▁TROUBLE 4715 +▁TRUCK 4716 +▁TRUE 4717 +▁TRULY 4718 +▁TRUMP 4719 +▁TRUST 4720 +▁TRUTH 4721 +▁TRY 4722 +▁TRYING 4723 +▁TU 4724 +▁TUBE 4725 +▁TUMOR 4726 +▁TUNE 4727 +▁TUNNEL 4728 +▁TURKEY 4729 +▁TURN 4730 +▁TURNED 4731 +▁TURNING 4732 +▁TURNS 4733 +▁TV 4734 +▁TWEET 4735 +▁TWELVE 4736 +▁TWENTY 4737 +▁TWICE 4738 +▁TWIN 4739 +▁TWIST 4740 +▁TWITTER 4741 +▁TWO 4742 +▁TYPE 4743 +▁TYPES 4744 +▁TYPICAL 4745 +▁TYPICALLY 4746 +▁U 4747 +▁UGLY 4748 +▁UK 4749 +▁ULTIMATE 4750 +▁ULTIMATELY 4751 +▁ULTRA 4752 +▁UN 4753 +▁UNBELIEVABLE 4754 +▁UNCERTAIN 4755 +▁UNCERTAINTY 4756 +▁UNCLE 4757 +▁UNCOMFORTABLE 4758 +▁UNCONDITIONAL 4759 +▁UNCONSCIOUS 4760 +▁UNCOVER 4761 +▁UNDER 4762 +▁UNDERESTIMATE 4763 +▁UNDERGRADUATE 4764 +▁UNDERGROUND 4765 +▁UNDERLYING 4766 +▁UNDERNEATH 4767 +▁UNDERSTAND 4768 +▁UNDERSTANDING 4769 +▁UNDERSTOOD 4770 +▁UNEMPLOYMENT 4771 +▁UNEXPECTED 4772 +▁UNFAIR 4773 +▁UNFORTUNATELY 4774 +▁UNHAPPY 4775 +▁UNHEALTHY 4776 +▁UNIFORM 4777 +▁UNION 4778 +▁UNIQUE 4779 +▁UNIT 4780 +▁UNITED 4781 +▁UNIVERSAL 4782 +▁UNIVERSE 4783 +▁UNIVERSITIES 4784 +▁UNIVERSITY 4785 +▁UNKNOWN 4786 +▁UNLESS 4787 +▁UNLIKE 4788 +▁UNNECESSARY 4789 +▁UNPRECEDENTED 4790 +▁UNTIL 4791 +▁UNUSUAL 4792 +▁UP 4793 +▁UPDATE 4794 +▁UPGRADE 4795 +▁UPON 4796 +▁UPSET 4797 +▁URBAN 4798 +▁URGE 4799 +▁US 4800 +▁USE 4801 +▁USED 4802 +▁USEFUL 4803 +▁USING 4804 +▁USUALLY 4805 +▁UTOPIA 4806 +▁UTTER 4807 +▁V 4808 +▁VA 4809 +▁VACATION 4810 +▁VACCINE 4811 +▁VACUUM 4812 +▁VAGINA 4813 +▁VALID 4814 +▁VALLEY 4815 +▁VALUABLE 4816 +▁VALUE 4817 +▁VALUES 4818 +▁VAN 4819 +▁VARIABLE 4820 +▁VARIATION 4821 +▁VARIET 4822 +▁VARIOUS 4823 +▁VAST 4824 +▁VE 4825 +▁VEGAN 4826 +▁VEGETABLE 4827 +▁VEGETARIAN 4828 +▁VEHICLE 4829 +▁VENTURE 4830 +▁VERSION 4831 +▁VERSUS 4832 +▁VERTICAL 4833 +▁VERY 4834 +▁VESSEL 4835 +▁VETERAN 4836 +▁VI 4837 +▁VIBRANT 4838 +▁VIBRAT 4839 +▁VICIOUS 4840 +▁VICTIM 4841 +▁VICTIMS 4842 +▁VICTOR 4843 +▁VIDEO 4844 +▁VIETNAM 4845 +▁VIEW 4846 +▁VILLAGE 4847 +▁VIOLENCE 4848 +▁VIOLENT 4849 +▁VIRAL 4850 +▁VIRGIN 4851 +▁VIRTUAL 4852 +▁VIRTUE 4853 +▁VIRUS 4854 +▁VISCERAL 4855 +▁VISIBLE 4856 +▁VISION 4857 +▁VISIT 4858 +▁VISUAL 4859 +▁VITAL 4860 +▁VITAMIN 4861 +▁VO 4862 +▁VOCABULARY 4863 +▁VOCAL 4864 +▁VOICE 4865 +▁VOICES 4866 +▁VOLUME 4867 +▁VOLUNTEER 4868 +▁VOTE 4869 +▁VOTING 4870 +▁VULNERABILITY 4871 +▁VULNERABLE 4872 +▁W 4873 +▁WA 4874 +▁WAGE 4875 +▁WAIT 4876 +▁WAITING 4877 +▁WAK 4878 +▁WAKE 4879 +▁WALK 4880 +▁WALKED 4881 +▁WALKING 4882 +▁WALL 4883 +▁WANNA 4884 +▁WANT 4885 +▁WANTED 4886 +▁WAR 4887 +▁WAREHOUSE 4888 +▁WARM 4889 +▁WARRIOR 4890 +▁WAS 4891 +▁WASH 4892 +▁WASHINGTON 4893 +▁WASN 4894 +▁WASTE 4895 +▁WATCH 4896 +▁WATCHED 4897 +▁WATCHING 4898 +▁WATER 4899 +▁WAVE 4900 +▁WAY 4901 +▁WAYS 4902 +▁WE 4903 +▁WEAK 4904 +▁WEAKNESS 4905 +▁WEALTH 4906 +▁WEAPON 4907 +▁WEAR 4908 +▁WEARING 4909 +▁WEATHER 4910 +▁WEB 4911 +▁WEBSITE 4912 +▁WEEK 4913 +▁WEEKEND 4914 +▁WEEKS 4915 +▁WEIGH 4916 +▁WEIGHT 4917 +▁WEIRD 4918 +▁WELCOME 4919 +▁WELFARE 4920 +▁WELL 4921 +▁WENT 4922 +▁WERE 4923 +▁WEREN 4924 +▁WEST 4925 +▁WESTERN 4926 +▁WHALE 4927 +▁WHAT 4928 +▁WHATEVER 4929 +▁WHATSOEVER 4930 +▁WHEAT 4931 +▁WHEEL 4932 +▁WHEELCHAIR 4933 +▁WHEN 4934 +▁WHENEVER 4935 +▁WHERE 4936 +▁WHETHER 4937 +▁WHI 4938 +▁WHICH 4939 +▁WHILE 4940 +▁WHISPER 4941 +▁WHISTLE 4942 +▁WHITE 4943 +▁WHO 4944 +▁WHOEVER 4945 +▁WHOLE 4946 +▁WHOSE 4947 +▁WHY 4948 +▁WI 4949 +▁WIDE 4950 +▁WIDESPREAD 4951 +▁WIFE 4952 +▁WIKIPEDIA 4953 +▁WILD 4954 +▁WILDLIFE 4955 +▁WILL 4956 +▁WILLIAM 4957 +▁WILLING 4958 +▁WIN 4959 +▁WIND 4960 +▁WINDOW 4961 +▁WINE 4962 +▁WINNING 4963 +▁WINTER 4964 +▁WIRE 4965 +▁WISDOM 4966 +▁WISE 4967 +▁WISH 4968 +▁WITH 4969 +▁WITHDRAW 4970 +▁WITHIN 4971 +▁WITHOUT 4972 +▁WITNESS 4973 +▁WOKE 4974 +▁WOLF 4975 +▁WOLVES 4976 +▁WOMAN 4977 +▁WOMEN 4978 +▁WON 4979 +▁WONDER 4980 +▁WONDERED 4981 +▁WONDERFUL 4982 +▁WONDERING 4983 +▁WOOD 4984 +▁WORD 4985 +▁WORDS 4986 +▁WORE 4987 +▁WORK 4988 +▁WORKED 4989 +▁WORKERS 4990 +▁WORKFORCE 4991 +▁WORKING 4992 +▁WORKPLACE 4993 +▁WORKS 4994 +▁WORKSHOP 4995 +▁WORLD 4996 +▁WORLDVIEW 4997 +▁WORLDWIDE 4998 +▁WORM 4999 +▁WORRIED 5000 +▁WORRIES 5001 +▁WORRY 5002 +▁WORSE 5003 +▁WORSHIP 5004 +▁WORST 5005 +▁WORTH 5006 +▁WOULD 5007 +▁WOULDN 5008 +▁WOUND 5009 +▁WOW 5010 +▁WRAP 5011 +▁WRESTLE 5012 +▁WRITE 5013 +▁WRITER 5014 +▁WRITING 5015 +▁WRITTEN 5016 +▁WRONG 5017 +▁WROTE 5018 +▁X 5019 +▁YA 5020 +▁YARD 5021 +▁YEAH 5022 +▁YEAR 5023 +▁YEARS 5024 +▁YELL 5025 +▁YELLOW 5026 +▁YES 5027 +▁YESTERDAY 5028 +▁YET 5029 +▁YIELD 5030 +▁YO 5031 +▁YOGA 5032 +▁YORK 5033 +▁YOU 5034 +▁YOUNG 5035 +▁YOUNGER 5036 +▁YOUR 5037 +▁YOURSELF 5038 +▁YOURSELVES 5039 +▁YOUTH 5040 +▁YOUTUBE 5041 +▁YU 5042 +▁Z 5043 +▁ZEALAND 5044 +▁ZERO 5045 +▁ZONE 5046 +▁ZOO 5047