diff --git a/configs/LRS3_V_WER19.1.ini b/configs/LRS3_V_WER19.1.ini
new file mode 100644
index 0000000000000000000000000000000000000000..7c1ac9a7c0ebb812423f1a22826a9c0e7fc99902
--- /dev/null
+++ b/configs/LRS3_V_WER19.1.ini
@@ -0,0 +1,18 @@
+[input]
+modality=video
+v_fps=25
+
+[model]
+v_fps=25
+model_path=benchmarks/LRS3/models/LRS3_V_WER19.1/model.pth
+model_conf=benchmarks/LRS3/models/LRS3_V_WER19.1/model.json
+rnnlm=benchmarks/LRS3/language_models/lm_en_subword/model.pth
+rnnlm_conf=benchmarks/LRS3/language_models/lm_en_subword/model.json
+
+[decode]
+beam_size=40
+penalty=0.0
+maxlenratio=0.0
+minlenratio=0.0
+ctc_weight=0.1
+lm_weight=0.3
diff --git a/espnet/.DS_Store b/espnet/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..81c5efd96703d9ec242bcfbfeb1cc7e92c570439
Binary files /dev/null and b/espnet/.DS_Store differ
diff --git a/espnet/asr/asr_utils.py b/espnet/asr/asr_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..4f72ba13a677a11c58b8ac5a10235ebe79a3824e
--- /dev/null
+++ b/espnet/asr/asr_utils.py
@@ -0,0 +1,990 @@
+# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
+# Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+import argparse
+import copy
+import json
+import logging
+import os
+import shutil
+import tempfile
+
+import numpy as np
+import torch
+
+
+# * -------------------- training iterator related -------------------- *
+
+
+class CompareValueTrigger(object):
+    """Trigger invoked when key value getting bigger or lower than before.
+
+    Args:
+        key (str) : Key of value.
+        compare_fn ((float, float) -> bool) : Function to compare the values.
+        trigger (tuple(int, str)) : Trigger that decide the comparison interval.
+
+    """
+
+    def __init__(self, key, compare_fn, trigger=(1, "epoch")):
+        from chainer import training
+
+        self._key = key
+        self._best_value = None
+        self._interval_trigger = training.util.get_trigger(trigger)
+        self._init_summary()
+        self._compare_fn = compare_fn
+
+    def __call__(self, trainer):
+        """Get value related to the key and compare with current value."""
+        observation = trainer.observation
+        summary = self._summary
+        key = self._key
+        if key in observation:
+            summary.add({key: observation[key]})
+
+        if not self._interval_trigger(trainer):
+            return False
+
+        stats = summary.compute_mean()
+        value = float(stats[key])  # copy to CPU
+        self._init_summary()
+
+        if self._best_value is None:
+            # initialize best value
+            self._best_value = value
+            return False
+        elif self._compare_fn(self._best_value, value):
+            return True
+        else:
+            self._best_value = value
+            return False
+
+    def _init_summary(self):
+        import chainer
+
+        self._summary = chainer.reporter.DictSummary()
+
+
+try:
+    from chainer.training import extension
+except ImportError:
+    PlotAttentionReport = None
+else:
+
+    class PlotAttentionReport(extension.Extension):
+        """Plot attention reporter.
+
+        Args:
+            att_vis_fn (espnet.nets.*_backend.e2e_asr.E2E.calculate_all_attentions):
+                Function of attention visualization.
+            data (list[tuple(str, dict[str, list[Any]])]): List json utt key items.
+            outdir (str): Directory to save figures.
+            converter (espnet.asr.*_backend.asr.CustomConverter):
+                Function to convert data.
+            device (int | torch.device): Device.
+            reverse (bool): If True, input and output length are reversed.
+            ikey (str): Key to access input
+                (for ASR/ST ikey="input", for MT ikey="output".)
+            iaxis (int): Dimension to access input
+                (for ASR/ST iaxis=0, for MT iaxis=1.)
+            okey (str): Key to access output
+                (for ASR/ST okey="input", MT okay="output".)
+            oaxis (int): Dimension to access output
+                (for ASR/ST oaxis=0, for MT oaxis=0.)
+            subsampling_factor (int): subsampling factor in encoder
+
+        """
+
+        def __init__(
+            self,
+            att_vis_fn,
+            data,
+            outdir,
+            converter,
+            transform,
+            device,
+            reverse=False,
+            ikey="input",
+            iaxis=0,
+            okey="output",
+            oaxis=0,
+            subsampling_factor=1,
+        ):
+            self.att_vis_fn = att_vis_fn
+            self.data = copy.deepcopy(data)
+            self.data_dict = {k: v for k, v in copy.deepcopy(data)}
+            # key is utterance ID
+            self.outdir = outdir
+            self.converter = converter
+            self.transform = transform
+            self.device = device
+            self.reverse = reverse
+            self.ikey = ikey
+            self.iaxis = iaxis
+            self.okey = okey
+            self.oaxis = oaxis
+            self.factor = subsampling_factor
+            if not os.path.exists(self.outdir):
+                os.makedirs(self.outdir)
+
+        def __call__(self, trainer):
+            """Plot and save image file of att_ws matrix."""
+            att_ws, uttid_list = self.get_attention_weights()
+            if isinstance(att_ws, list):  # multi-encoder case
+                num_encs = len(att_ws) - 1
+                # atts
+                for i in range(num_encs):
+                    for idx, att_w in enumerate(att_ws[i]):
+                        filename = "%s/%s.ep.{.updater.epoch}.att%d.png" % (
+                            self.outdir,
+                            uttid_list[idx],
+                            i + 1,
+                        )
+                        att_w = self.trim_attention_weight(uttid_list[idx], att_w)
+                        np_filename = "%s/%s.ep.{.updater.epoch}.att%d.npy" % (
+                            self.outdir,
+                            uttid_list[idx],
+                            i + 1,
+                        )
+                        np.save(np_filename.format(trainer), att_w)
+                        self._plot_and_save_attention(att_w, filename.format(trainer))
+                # han
+                for idx, att_w in enumerate(att_ws[num_encs]):
+                    filename = "%s/%s.ep.{.updater.epoch}.han.png" % (
+                        self.outdir,
+                        uttid_list[idx],
+                    )
+                    att_w = self.trim_attention_weight(uttid_list[idx], att_w)
+                    np_filename = "%s/%s.ep.{.updater.epoch}.han.npy" % (
+                        self.outdir,
+                        uttid_list[idx],
+                    )
+                    np.save(np_filename.format(trainer), att_w)
+                    self._plot_and_save_attention(
+                        att_w, filename.format(trainer), han_mode=True
+                    )
+            else:
+                for idx, att_w in enumerate(att_ws):
+                    filename = "%s/%s.ep.{.updater.epoch}.png" % (
+                        self.outdir,
+                        uttid_list[idx],
+                    )
+                    att_w = self.trim_attention_weight(uttid_list[idx], att_w)
+                    np_filename = "%s/%s.ep.{.updater.epoch}.npy" % (
+                        self.outdir,
+                        uttid_list[idx],
+                    )
+                    np.save(np_filename.format(trainer), att_w)
+                    self._plot_and_save_attention(att_w, filename.format(trainer))
+
+        def log_attentions(self, logger, step):
+            """Add image files of att_ws matrix to the tensorboard."""
+            att_ws, uttid_list = self.get_attention_weights()
+            if isinstance(att_ws, list):  # multi-encoder case
+                num_encs = len(att_ws) - 1
+                # atts
+                for i in range(num_encs):
+                    for idx, att_w in enumerate(att_ws[i]):
+                        att_w = self.trim_attention_weight(uttid_list[idx], att_w)
+                        plot = self.draw_attention_plot(att_w)
+                        logger.add_figure(
+                            "%s_att%d" % (uttid_list[idx], i + 1),
+                            plot.gcf(),
+                            step,
+                        )
+                # han
+                for idx, att_w in enumerate(att_ws[num_encs]):
+                    att_w = self.trim_attention_weight(uttid_list[idx], att_w)
+                    plot = self.draw_han_plot(att_w)
+                    logger.add_figure(
+                        "%s_han" % (uttid_list[idx]),
+                        plot.gcf(),
+                        step,
+                    )
+            else:
+                for idx, att_w in enumerate(att_ws):
+                    att_w = self.trim_attention_weight(uttid_list[idx], att_w)
+                    plot = self.draw_attention_plot(att_w)
+                    logger.add_figure("%s" % (uttid_list[idx]), plot.gcf(), step)
+
+        def get_attention_weights(self):
+            """Return attention weights.
+
+            Returns:
+                numpy.ndarray: attention weights. float. Its shape would be
+                    differ from backend.
+                    * pytorch-> 1) multi-head case => (B, H, Lmax, Tmax), 2)
+                      other case => (B, Lmax, Tmax).
+                    * chainer-> (B, Lmax, Tmax)
+
+            """
+            return_batch, uttid_list = self.transform(self.data, return_uttid=True)
+            batch = self.converter([return_batch], self.device)
+            if isinstance(batch, tuple):
+                att_ws = self.att_vis_fn(*batch)
+            else:
+                att_ws = self.att_vis_fn(**batch)
+            return att_ws, uttid_list
+
+        def trim_attention_weight(self, uttid, att_w):
+            """Transform attention matrix with regard to self.reverse."""
+            if self.reverse:
+                enc_key, enc_axis = self.okey, self.oaxis
+                dec_key, dec_axis = self.ikey, self.iaxis
+            else:
+                enc_key, enc_axis = self.ikey, self.iaxis
+                dec_key, dec_axis = self.okey, self.oaxis
+            dec_len = int(self.data_dict[uttid][dec_key][dec_axis]["shape"][0])
+            enc_len = int(self.data_dict[uttid][enc_key][enc_axis]["shape"][0])
+            if self.factor > 1:
+                enc_len //= self.factor
+            if len(att_w.shape) == 3:
+                att_w = att_w[:, :dec_len, :enc_len]
+            else:
+                att_w = att_w[:dec_len, :enc_len]
+            return att_w
+
+        def draw_attention_plot(self, att_w):
+            """Plot the att_w matrix.
+
+            Returns:
+                matplotlib.pyplot: pyplot object with attention matrix image.
+
+            """
+            import matplotlib
+
+            matplotlib.use("Agg")
+            import matplotlib.pyplot as plt
+
+            plt.clf()
+            att_w = att_w.astype(np.float32)
+            if len(att_w.shape) == 3:
+                for h, aw in enumerate(att_w, 1):
+                    plt.subplot(1, len(att_w), h)
+                    plt.imshow(aw, aspect="auto")
+                    plt.xlabel("Encoder Index")
+                    plt.ylabel("Decoder Index")
+            else:
+                plt.imshow(att_w, aspect="auto")
+                plt.xlabel("Encoder Index")
+                plt.ylabel("Decoder Index")
+            plt.tight_layout()
+            return plt
+
+        def draw_han_plot(self, att_w):
+            """Plot the att_w matrix for hierarchical attention.
+
+            Returns:
+                matplotlib.pyplot: pyplot object with attention matrix image.
+
+            """
+            import matplotlib
+
+            matplotlib.use("Agg")
+            import matplotlib.pyplot as plt
+
+            plt.clf()
+            if len(att_w.shape) == 3:
+                for h, aw in enumerate(att_w, 1):
+                    legends = []
+                    plt.subplot(1, len(att_w), h)
+                    for i in range(aw.shape[1]):
+                        plt.plot(aw[:, i])
+                        legends.append("Att{}".format(i))
+                    plt.ylim([0, 1.0])
+                    plt.xlim([0, aw.shape[0]])
+                    plt.grid(True)
+                    plt.ylabel("Attention Weight")
+                    plt.xlabel("Decoder Index")
+                    plt.legend(legends)
+            else:
+                legends = []
+                for i in range(att_w.shape[1]):
+                    plt.plot(att_w[:, i])
+                    legends.append("Att{}".format(i))
+                plt.ylim([0, 1.0])
+                plt.xlim([0, att_w.shape[0]])
+                plt.grid(True)
+                plt.ylabel("Attention Weight")
+                plt.xlabel("Decoder Index")
+                plt.legend(legends)
+            plt.tight_layout()
+            return plt
+
+        def _plot_and_save_attention(self, att_w, filename, han_mode=False):
+            if han_mode:
+                plt = self.draw_han_plot(att_w)
+            else:
+                plt = self.draw_attention_plot(att_w)
+            plt.savefig(filename)
+            plt.close()
+
+
+try:
+    from chainer.training import extension
+except ImportError:
+    PlotCTCReport = None
+else:
+
+    class PlotCTCReport(extension.Extension):
+        """Plot CTC reporter.
+
+        Args:
+            ctc_vis_fn (espnet.nets.*_backend.e2e_asr.E2E.calculate_all_ctc_probs):
+                Function of CTC visualization.
+            data (list[tuple(str, dict[str, list[Any]])]): List json utt key items.
+            outdir (str): Directory to save figures.
+            converter (espnet.asr.*_backend.asr.CustomConverter):
+                Function to convert data.
+            device (int | torch.device): Device.
+            reverse (bool): If True, input and output length are reversed.
+            ikey (str): Key to access input
+                (for ASR/ST ikey="input", for MT ikey="output".)
+            iaxis (int): Dimension to access input
+                (for ASR/ST iaxis=0, for MT iaxis=1.)
+            okey (str): Key to access output
+                (for ASR/ST okey="input", MT okay="output".)
+            oaxis (int): Dimension to access output
+                (for ASR/ST oaxis=0, for MT oaxis=0.)
+            subsampling_factor (int): subsampling factor in encoder
+
+        """
+
+        def __init__(
+            self,
+            ctc_vis_fn,
+            data,
+            outdir,
+            converter,
+            transform,
+            device,
+            reverse=False,
+            ikey="input",
+            iaxis=0,
+            okey="output",
+            oaxis=0,
+            subsampling_factor=1,
+        ):
+            self.ctc_vis_fn = ctc_vis_fn
+            self.data = copy.deepcopy(data)
+            self.data_dict = {k: v for k, v in copy.deepcopy(data)}
+            # key is utterance ID
+            self.outdir = outdir
+            self.converter = converter
+            self.transform = transform
+            self.device = device
+            self.reverse = reverse
+            self.ikey = ikey
+            self.iaxis = iaxis
+            self.okey = okey
+            self.oaxis = oaxis
+            self.factor = subsampling_factor
+            if not os.path.exists(self.outdir):
+                os.makedirs(self.outdir)
+
+        def __call__(self, trainer):
+            """Plot and save image file of ctc prob."""
+            ctc_probs, uttid_list = self.get_ctc_probs()
+            if isinstance(ctc_probs, list):  # multi-encoder case
+                num_encs = len(ctc_probs) - 1
+                for i in range(num_encs):
+                    for idx, ctc_prob in enumerate(ctc_probs[i]):
+                        filename = "%s/%s.ep.{.updater.epoch}.ctc%d.png" % (
+                            self.outdir,
+                            uttid_list[idx],
+                            i + 1,
+                        )
+                        ctc_prob = self.trim_ctc_prob(uttid_list[idx], ctc_prob)
+                        np_filename = "%s/%s.ep.{.updater.epoch}.ctc%d.npy" % (
+                            self.outdir,
+                            uttid_list[idx],
+                            i + 1,
+                        )
+                        np.save(np_filename.format(trainer), ctc_prob)
+                        self._plot_and_save_ctc(ctc_prob, filename.format(trainer))
+            else:
+                for idx, ctc_prob in enumerate(ctc_probs):
+                    filename = "%s/%s.ep.{.updater.epoch}.png" % (
+                        self.outdir,
+                        uttid_list[idx],
+                    )
+                    ctc_prob = self.trim_ctc_prob(uttid_list[idx], ctc_prob)
+                    np_filename = "%s/%s.ep.{.updater.epoch}.npy" % (
+                        self.outdir,
+                        uttid_list[idx],
+                    )
+                    np.save(np_filename.format(trainer), ctc_prob)
+                    self._plot_and_save_ctc(ctc_prob, filename.format(trainer))
+
+        def log_ctc_probs(self, logger, step):
+            """Add image files of ctc probs to the tensorboard."""
+            ctc_probs, uttid_list = self.get_ctc_probs()
+            if isinstance(ctc_probs, list):  # multi-encoder case
+                num_encs = len(ctc_probs) - 1
+                for i in range(num_encs):
+                    for idx, ctc_prob in enumerate(ctc_probs[i]):
+                        ctc_prob = self.trim_ctc_prob(uttid_list[idx], ctc_prob)
+                        plot = self.draw_ctc_plot(ctc_prob)
+                        logger.add_figure(
+                            "%s_ctc%d" % (uttid_list[idx], i + 1),
+                            plot.gcf(),
+                            step,
+                        )
+            else:
+                for idx, ctc_prob in enumerate(ctc_probs):
+                    ctc_prob = self.trim_ctc_prob(uttid_list[idx], ctc_prob)
+                    plot = self.draw_ctc_plot(ctc_prob)
+                    logger.add_figure("%s" % (uttid_list[idx]), plot.gcf(), step)
+
+        def get_ctc_probs(self):
+            """Return CTC probs.
+
+            Returns:
+                numpy.ndarray: CTC probs. float. Its shape would be
+                    differ from backend. (B, Tmax, vocab).
+
+            """
+            return_batch, uttid_list = self.transform(self.data, return_uttid=True)
+            batch = self.converter([return_batch], self.device)
+            if isinstance(batch, tuple):
+                probs = self.ctc_vis_fn(*batch)
+            else:
+                probs = self.ctc_vis_fn(**batch)
+            return probs, uttid_list
+
+        def trim_ctc_prob(self, uttid, prob):
+            """Trim CTC posteriors accoding to input lengths."""
+            enc_len = int(self.data_dict[uttid][self.ikey][self.iaxis]["shape"][0])
+            if self.factor > 1:
+                enc_len //= self.factor
+            prob = prob[:enc_len]
+            return prob
+
+        def draw_ctc_plot(self, ctc_prob):
+            """Plot the ctc_prob matrix.
+
+            Returns:
+                matplotlib.pyplot: pyplot object with CTC prob matrix image.
+
+            """
+            import matplotlib
+
+            matplotlib.use("Agg")
+            import matplotlib.pyplot as plt
+
+            ctc_prob = ctc_prob.astype(np.float32)
+
+            plt.clf()
+            topk_ids = np.argsort(ctc_prob, axis=1)
+            n_frames, vocab = ctc_prob.shape
+            times_probs = np.arange(n_frames)
+
+            plt.figure(figsize=(20, 8))
+
+            # NOTE: index 0 is reserved for blank
+            for idx in set(topk_ids.reshape(-1).tolist()):
+                if idx == 0:
+                    plt.plot(
+                        times_probs, ctc_prob[:, 0], ":", label="<blank>", color="grey"
+                    )
+                else:
+                    plt.plot(times_probs, ctc_prob[:, idx])
+            plt.xlabel("Input [frame]", fontsize=12)
+            plt.ylabel("Posteriors", fontsize=12)
+            plt.xticks(list(range(0, int(n_frames) + 1, 10)))
+            plt.yticks(list(range(0, 2, 1)))
+            plt.tight_layout()
+            return plt
+
+        def _plot_and_save_ctc(self, ctc_prob, filename):
+            plt = self.draw_ctc_plot(ctc_prob)
+            plt.savefig(filename)
+            plt.close()
+
+
+def restore_snapshot(model, snapshot, load_fn=None):
+    """Extension to restore snapshot.
+
+    Returns:
+        An extension function.
+
+    """
+    import chainer
+    from chainer import training
+
+    if load_fn is None:
+        load_fn = chainer.serializers.load_npz
+
+    @training.make_extension(trigger=(1, "epoch"))
+    def restore_snapshot(trainer):
+        _restore_snapshot(model, snapshot, load_fn)
+
+    return restore_snapshot
+
+
+def _restore_snapshot(model, snapshot, load_fn=None):
+    if load_fn is None:
+        import chainer
+
+        load_fn = chainer.serializers.load_npz
+
+    load_fn(snapshot, model)
+    logging.info("restored from " + str(snapshot))
+
+
+def adadelta_eps_decay(eps_decay):
+    """Extension to perform adadelta eps decay.
+
+    Args:
+        eps_decay (float): Decay rate of eps.
+
+    Returns:
+        An extension function.
+
+    """
+    from chainer import training
+
+    @training.make_extension(trigger=(1, "epoch"))
+    def adadelta_eps_decay(trainer):
+        _adadelta_eps_decay(trainer, eps_decay)
+
+    return adadelta_eps_decay
+
+
+def _adadelta_eps_decay(trainer, eps_decay):
+    optimizer = trainer.updater.get_optimizer("main")
+    # for chainer
+    if hasattr(optimizer, "eps"):
+        current_eps = optimizer.eps
+        setattr(optimizer, "eps", current_eps * eps_decay)
+        logging.info("adadelta eps decayed to " + str(optimizer.eps))
+    # pytorch
+    else:
+        for p in optimizer.param_groups:
+            p["eps"] *= eps_decay
+            logging.info("adadelta eps decayed to " + str(p["eps"]))
+
+
+def adam_lr_decay(eps_decay):
+    """Extension to perform adam lr decay.
+
+    Args:
+        eps_decay (float): Decay rate of lr.
+
+    Returns:
+        An extension function.
+
+    """
+    from chainer import training
+
+    @training.make_extension(trigger=(1, "epoch"))
+    def adam_lr_decay(trainer):
+        _adam_lr_decay(trainer, eps_decay)
+
+    return adam_lr_decay
+
+
+def _adam_lr_decay(trainer, eps_decay):
+    optimizer = trainer.updater.get_optimizer("main")
+    # for chainer
+    if hasattr(optimizer, "lr"):
+        current_lr = optimizer.lr
+        setattr(optimizer, "lr", current_lr * eps_decay)
+        logging.info("adam lr decayed to " + str(optimizer.lr))
+    # pytorch
+    else:
+        for p in optimizer.param_groups:
+            p["lr"] *= eps_decay
+            logging.info("adam lr decayed to " + str(p["lr"]))
+
+
+def torch_snapshot(savefun=torch.save, filename="snapshot.ep.{.updater.epoch}"):
+    """Extension to take snapshot of the trainer for pytorch.
+
+    Returns:
+        An extension function.
+
+    """
+    from chainer.training import extension
+
+    @extension.make_extension(trigger=(1, "epoch"), priority=-100)
+    def torch_snapshot(trainer):
+        _torch_snapshot_object(trainer, trainer, filename.format(trainer), savefun)
+
+    return torch_snapshot
+
+
+def _torch_snapshot_object(trainer, target, filename, savefun):
+    from chainer.serializers import DictionarySerializer
+
+    # make snapshot_dict dictionary
+    s = DictionarySerializer()
+    s.save(trainer)
+    if hasattr(trainer.updater.model, "model"):
+        # (for TTS)
+        if hasattr(trainer.updater.model.model, "module"):
+            model_state_dict = trainer.updater.model.model.module.state_dict()
+        else:
+            model_state_dict = trainer.updater.model.model.state_dict()
+    else:
+        # (for ASR)
+        if hasattr(trainer.updater.model, "module"):
+            model_state_dict = trainer.updater.model.module.state_dict()
+        else:
+            model_state_dict = trainer.updater.model.state_dict()
+    snapshot_dict = {
+        "trainer": s.target,
+        "model": model_state_dict,
+        "optimizer": trainer.updater.get_optimizer("main").state_dict(),
+    }
+
+    # save snapshot dictionary
+    fn = filename.format(trainer)
+    prefix = "tmp" + fn
+    tmpdir = tempfile.mkdtemp(prefix=prefix, dir=trainer.out)
+    tmppath = os.path.join(tmpdir, fn)
+    try:
+        savefun(snapshot_dict, tmppath)
+        shutil.move(tmppath, os.path.join(trainer.out, fn))
+    finally:
+        shutil.rmtree(tmpdir)
+
+
+def add_gradient_noise(model, iteration, duration=100, eta=1.0, scale_factor=0.55):
+    """Adds noise from a standard normal distribution to the gradients.
+
+    The standard deviation (`sigma`) is controlled by the three hyper-parameters below.
+    `sigma` goes to zero (no noise) with more iterations.
+
+    Args:
+        model (torch.nn.model): Model.
+        iteration (int): Number of iterations.
+        duration (int) {100, 1000}:
+            Number of durations to control the interval of the `sigma` change.
+        eta (float) {0.01, 0.3, 1.0}: The magnitude of `sigma`.
+        scale_factor (float) {0.55}: The scale of `sigma`.
+    """
+    interval = (iteration // duration) + 1
+    sigma = eta / interval**scale_factor
+    for param in model.parameters():
+        if param.grad is not None:
+            _shape = param.grad.size()
+            noise = sigma * torch.randn(_shape).to(param.device)
+            param.grad += noise
+
+
+# * -------------------- general -------------------- *
+def get_model_conf(model_path, conf_path=None):
+    """Get model config information by reading a model config file (model.json).
+
+    Args:
+        model_path (str): Model path.
+        conf_path (str): Optional model config path.
+
+    Returns:
+        list[int, int, dict[str, Any]]: Config information loaded from json file.
+
+    """
+    if conf_path is None:
+        model_conf = os.path.dirname(model_path) + "/model.json"
+    else:
+        model_conf = conf_path
+    with open(model_conf, "rb") as f:
+        logging.info("reading a config file from " + model_conf)
+        confs = json.load(f)
+    if isinstance(confs, dict):
+        # for lm
+        args = confs
+        return argparse.Namespace(**args)
+    else:
+        # for asr, tts, mt
+        idim, odim, args = confs
+        return idim, odim, argparse.Namespace(**args)
+
+
+def chainer_load(path, model):
+    """Load chainer model parameters.
+
+    Args:
+        path (str): Model path or snapshot file path to be loaded.
+        model (chainer.Chain): Chainer model.
+
+    """
+    import chainer
+
+    if "snapshot" in os.path.basename(path):
+        chainer.serializers.load_npz(path, model, path="updater/model:main/")
+    else:
+        chainer.serializers.load_npz(path, model)
+
+
+def torch_save(path, model):
+    """Save torch model states.
+
+    Args:
+        path (str): Model path to be saved.
+        model (torch.nn.Module): Torch model.
+
+    """
+    if hasattr(model, "module"):
+        torch.save(model.module.state_dict(), path)
+    else:
+        torch.save(model.state_dict(), path)
+
+
+def snapshot_object(target, filename):
+    """Returns a trainer extension to take snapshots of a given object.
+
+    Args:
+        target (model): Object to serialize.
+        filename (str): Name of the file into which the object is serialized.It can
+            be a format string, where the trainer object is passed to
+            the :meth: `str.format` method. For example,
+            ``'snapshot_{.updater.iteration}'`` is converted to
+            ``'snapshot_10000'`` at the 10,000th iteration.
+
+    Returns:
+        An extension function.
+
+    """
+    from chainer.training import extension
+
+    @extension.make_extension(trigger=(1, "epoch"), priority=-100)
+    def snapshot_object(trainer):
+        torch_save(os.path.join(trainer.out, filename.format(trainer)), target)
+
+    return snapshot_object
+
+
+def torch_load(path, model):
+    """Load torch model states.
+
+    Args:
+        path (str): Model path or snapshot file path to be loaded.
+        model (torch.nn.Module): Torch model.
+
+    """
+    if "snapshot" in os.path.basename(path):
+        model_state_dict = torch.load(path, map_location=lambda storage, loc: storage)[
+            "model"
+        ]
+    else:
+        model_state_dict = torch.load(path, map_location=lambda storage, loc: storage)
+
+    if hasattr(model, "module"):
+        model.module.load_state_dict(model_state_dict)
+    else:
+        model.load_state_dict(model_state_dict)
+
+    del model_state_dict
+
+
+def torch_resume(snapshot_path, trainer):
+    """Resume from snapshot for pytorch.
+
+    Args:
+        snapshot_path (str): Snapshot file path.
+        trainer (chainer.training.Trainer): Chainer's trainer instance.
+
+    """
+    from chainer.serializers import NpzDeserializer
+
+    # load snapshot
+    snapshot_dict = torch.load(snapshot_path, map_location=lambda storage, loc: storage)
+
+    # restore trainer states
+    d = NpzDeserializer(snapshot_dict["trainer"])
+    d.load(trainer)
+
+    # restore model states
+    if hasattr(trainer.updater.model, "model"):
+        # (for TTS model)
+        if hasattr(trainer.updater.model.model, "module"):
+            trainer.updater.model.model.module.load_state_dict(snapshot_dict["model"])
+        else:
+            trainer.updater.model.model.load_state_dict(snapshot_dict["model"])
+    else:
+        # (for ASR model)
+        if hasattr(trainer.updater.model, "module"):
+            trainer.updater.model.module.load_state_dict(snapshot_dict["model"])
+        else:
+            trainer.updater.model.load_state_dict(snapshot_dict["model"])
+
+    # retore optimizer states
+    trainer.updater.get_optimizer("main").load_state_dict(snapshot_dict["optimizer"])
+
+    # delete opened snapshot
+    del snapshot_dict
+
+
+# * ------------------ recognition related ------------------ *
+def parse_hypothesis(hyp, char_list):
+    """Parse hypothesis.
+
+    Args:
+        hyp (list[dict[str, Any]]): Recognition hypothesis.
+        char_list (list[str]): List of characters.
+
+    Returns:
+        tuple(str, str, str, float)
+
+    """
+    # remove sos and get results
+    tokenid_as_list = list(map(int, hyp["yseq"][1:]))
+    token_as_list = [char_list[idx] for idx in tokenid_as_list]
+    score = float(hyp["score"])
+
+    # convert to string
+    tokenid = " ".join([str(idx) for idx in tokenid_as_list])
+    token = " ".join(token_as_list)
+    text = "".join(token_as_list).replace("<space>", " ")
+
+    return text, token, tokenid, score
+
+
+def add_results_to_json(nbest_hyps, char_list):
+    """Add N-best results to json.
+    Args:
+        js (dict[str, Any]): Groundtruth utterance dict.
+        nbest_hyps_sd (list[dict[str, Any]]):
+            List of hypothesis for multi_speakers: nutts x nspkrs.
+        char_list (list[str]): List of characters.
+    Returns:
+        str: 1-best result
+    """
+    assert len(nbest_hyps) == 1, "only 1-best result is supported."
+    # parse hypothesis
+    rec_text, rec_token, rec_tokenid, score = parse_hypothesis(nbest_hyps[0], char_list)
+    return rec_text
+
+
+def plot_spectrogram(
+    plt,
+    spec,
+    mode="db",
+    fs=None,
+    frame_shift=None,
+    bottom=True,
+    left=True,
+    right=True,
+    top=False,
+    labelbottom=True,
+    labelleft=True,
+    labelright=True,
+    labeltop=False,
+    cmap="inferno",
+):
+    """Plot spectrogram using matplotlib.
+
+    Args:
+        plt (matplotlib.pyplot): pyplot object.
+        spec (numpy.ndarray): Input stft (Freq, Time)
+        mode (str): db or linear.
+        fs (int): Sample frequency. To convert y-axis to kHz unit.
+        frame_shift (int): The frame shift of stft. To convert x-axis to second unit.
+        bottom (bool):Whether to draw the respective ticks.
+        left (bool):
+        right (bool):
+        top (bool):
+        labelbottom (bool):Whether to draw the respective tick labels.
+        labelleft (bool):
+        labelright (bool):
+        labeltop (bool):
+        cmap (str): Colormap defined in matplotlib.
+
+    """
+    spec = np.abs(spec)
+    if mode == "db":
+        x = 20 * np.log10(spec + np.finfo(spec.dtype).eps)
+    elif mode == "linear":
+        x = spec
+    else:
+        raise ValueError(mode)
+
+    if fs is not None:
+        ytop = fs / 2000
+        ylabel = "kHz"
+    else:
+        ytop = x.shape[0]
+        ylabel = "bin"
+
+    if frame_shift is not None and fs is not None:
+        xtop = x.shape[1] * frame_shift / fs
+        xlabel = "s"
+    else:
+        xtop = x.shape[1]
+        xlabel = "frame"
+
+    extent = (0, xtop, 0, ytop)
+    plt.imshow(x[::-1], cmap=cmap, extent=extent)
+
+    if labelbottom:
+        plt.xlabel("time [{}]".format(xlabel))
+    if labelleft:
+        plt.ylabel("freq [{}]".format(ylabel))
+    plt.colorbar().set_label("{}".format(mode))
+
+    plt.tick_params(
+        bottom=bottom,
+        left=left,
+        right=right,
+        top=top,
+        labelbottom=labelbottom,
+        labelleft=labelleft,
+        labelright=labelright,
+        labeltop=labeltop,
+    )
+    plt.axis("auto")
+
+
+# * ------------------ recognition related ------------------ *
+def format_mulenc_args(args):
+    """Format args for multi-encoder setup.
+
+    It deals with following situations:  (when args.num_encs=2):
+    1. args.elayers = None -> args.elayers = [4, 4];
+    2. args.elayers = 4 -> args.elayers = [4, 4];
+    3. args.elayers = [4, 4, 4] -> args.elayers = [4, 4].
+
+    """
+    # default values when None is assigned.
+    default_dict = {
+        "etype": "blstmp",
+        "elayers": 4,
+        "eunits": 300,
+        "subsample": "1",
+        "dropout_rate": 0.0,
+        "atype": "dot",
+        "adim": 320,
+        "awin": 5,
+        "aheads": 4,
+        "aconv_chans": -1,
+        "aconv_filts": 100,
+    }
+    for k in default_dict.keys():
+        if isinstance(vars(args)[k], list):
+            if len(vars(args)[k]) != args.num_encs:
+                logging.warning(
+                    "Length mismatch {}: Convert {} to {}.".format(
+                        k, vars(args)[k], vars(args)[k][: args.num_encs]
+                    )
+                )
+            vars(args)[k] = vars(args)[k][: args.num_encs]
+        else:
+            if not vars(args)[k]:
+                # assign default value if it is None
+                vars(args)[k] = default_dict[k]
+                logging.warning(
+                    "{} is not specified, use default value {}.".format(
+                        k, default_dict[k]
+                    )
+                )
+            # duplicate
+            logging.warning(
+                "Type mismatch {}: Convert {} to {}.".format(
+                    k, vars(args)[k], [vars(args)[k] for _ in range(args.num_encs)]
+                )
+            )
+            vars(args)[k] = [vars(args)[k] for _ in range(args.num_encs)]
+    return args
diff --git a/espnet/nets/.DS_Store b/espnet/nets/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..18468621f0c4966193dd622b3b05091ca6164b8c
Binary files /dev/null and b/espnet/nets/.DS_Store differ
diff --git a/espnet/nets/batch_beam_search.py b/espnet/nets/batch_beam_search.py
new file mode 100644
index 0000000000000000000000000000000000000000..7ce5a10ca061d38f1904c5ec5f49fbbf32acd554
--- /dev/null
+++ b/espnet/nets/batch_beam_search.py
@@ -0,0 +1,349 @@
+"""Parallel beam search module."""
+
+import logging
+from typing import Any
+from typing import Dict
+from typing import List
+from typing import NamedTuple
+from typing import Tuple
+
+import torch
+from torch.nn.utils.rnn import pad_sequence
+
+from espnet.nets.beam_search import BeamSearch
+from espnet.nets.beam_search import Hypothesis
+
+
+class BatchHypothesis(NamedTuple):
+    """Batchfied/Vectorized hypothesis data type."""
+
+    yseq: torch.Tensor = torch.tensor([])  # (batch, maxlen)
+    score: torch.Tensor = torch.tensor([])  # (batch,)
+    length: torch.Tensor = torch.tensor([])  # (batch,)
+    scores: Dict[str, torch.Tensor] = dict()  # values: (batch,)
+    states: Dict[str, Dict] = dict()
+
+    def __len__(self) -> int:
+        """Return a batch size."""
+        return len(self.length)
+
+
+class BatchBeamSearch(BeamSearch):
+    """Batch beam search implementation."""
+
+    def batchfy(self, hyps: List[Hypothesis]) -> BatchHypothesis:
+        """Convert list to batch."""
+        if len(hyps) == 0:
+            return BatchHypothesis()
+        yseq=pad_sequence(
+            [h.yseq for h in hyps], batch_first=True, padding_value=self.eos
+        )
+        return BatchHypothesis(
+            yseq=yseq,
+            length=torch.tensor([len(h.yseq) for h in hyps], dtype=torch.int64, device=yseq.device),
+            score=torch.tensor([h.score for h in hyps]).to(yseq.device),
+            scores={k: torch.tensor([h.scores[k] for h in hyps], device=yseq.device) for k in self.scorers},
+            states={k: [h.states[k] for h in hyps] for k in self.scorers},
+        )
+
+    def _batch_select(self, hyps: BatchHypothesis, ids: List[int]) -> BatchHypothesis:
+        return BatchHypothesis(
+            yseq=hyps.yseq[ids],
+            score=hyps.score[ids],
+            length=hyps.length[ids],
+            scores={k: v[ids] for k, v in hyps.scores.items()},
+            states={
+                k: [self.scorers[k].select_state(v, i) for i in ids]
+                for k, v in hyps.states.items()
+            },
+        )
+
+    def _select(self, hyps: BatchHypothesis, i: int) -> Hypothesis:
+        return Hypothesis(
+            yseq=hyps.yseq[i, : hyps.length[i]],
+            score=hyps.score[i],
+            scores={k: v[i] for k, v in hyps.scores.items()},
+            states={
+                k: self.scorers[k].select_state(v, i) for k, v in hyps.states.items()
+            },
+        )
+
+    def unbatchfy(self, batch_hyps: BatchHypothesis) -> List[Hypothesis]:
+        """Revert batch to list."""
+        return [
+            Hypothesis(
+                yseq=batch_hyps.yseq[i][: batch_hyps.length[i]],
+                score=batch_hyps.score[i],
+                scores={k: batch_hyps.scores[k][i] for k in self.scorers},
+                states={
+                    k: v.select_state(batch_hyps.states[k], i)
+                    for k, v in self.scorers.items()
+                },
+            )
+            for i in range(len(batch_hyps.length))
+        ]
+
+    def batch_beam(
+        self, weighted_scores: torch.Tensor, ids: torch.Tensor
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        """Batch-compute topk full token ids and partial token ids.
+
+        Args:
+            weighted_scores (torch.Tensor): The weighted sum scores for each tokens.
+                Its shape is `(n_beam, self.vocab_size)`.
+            ids (torch.Tensor): The partial token ids to compute topk.
+                Its shape is `(n_beam, self.pre_beam_size)`.
+
+        Returns:
+            Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+                The topk full (prev_hyp, new_token) ids
+                and partial (prev_hyp, new_token) ids.
+                Their shapes are all `(self.beam_size,)`
+
+        """
+        top_ids = weighted_scores.view(-1).topk(self.beam_size)[1]
+        # Because of the flatten above, `top_ids` is organized as:
+        # [hyp1 * V + token1, hyp2 * V + token2, ..., hypK * V + tokenK],
+        # where V is `self.n_vocab` and K is `self.beam_size`
+        prev_hyp_ids = torch.div(top_ids, self.n_vocab, rounding_mode='trunc')
+        new_token_ids = top_ids % self.n_vocab
+        return prev_hyp_ids, new_token_ids, prev_hyp_ids, new_token_ids
+
+    def init_hyp(self, x: torch.Tensor) -> BatchHypothesis:
+        """Get an initial hypothesis data.
+
+        Args:
+            x (torch.Tensor): The encoder output feature
+
+        Returns:
+            Hypothesis: The initial hypothesis.
+
+        """
+        init_states = dict()
+        init_scores = dict()
+        for k, d in self.scorers.items():
+            init_states[k] = d.batch_init_state(x)
+            init_scores[k] = 0.0
+        return self.batchfy(
+            [
+                Hypothesis(
+                    score=0.0,
+                    scores=init_scores,
+                    states=init_states,
+                    yseq=torch.tensor([self.sos], device=x.device),
+                )
+            ]
+        )
+
+    def score_full(
+        self, hyp: BatchHypothesis, x: torch.Tensor
+    ) -> Tuple[Dict[str, torch.Tensor], Dict[str, Any]]:
+        """Score new hypothesis by `self.full_scorers`.
+
+        Args:
+            hyp (Hypothesis): Hypothesis with prefix tokens to score
+            x (torch.Tensor): Corresponding input feature
+
+        Returns:
+            Tuple[Dict[str, torch.Tensor], Dict[str, Any]]: Tuple of
+                score dict of `hyp` that has string keys of `self.full_scorers`
+                and tensor score values of shape: `(self.n_vocab,)`,
+                and state dict that has string keys
+                and state values of `self.full_scorers`
+
+        """
+        scores = dict()
+        states = dict()
+        for k, d in self.full_scorers.items():
+            scores[k], states[k] = d.batch_score(hyp.yseq, hyp.states[k], x)
+        return scores, states
+
+    def score_partial(
+        self, hyp: BatchHypothesis, ids: torch.Tensor, x: torch.Tensor
+    ) -> Tuple[Dict[str, torch.Tensor], Dict[str, Any]]:
+        """Score new hypothesis by `self.full_scorers`.
+
+        Args:
+            hyp (Hypothesis): Hypothesis with prefix tokens to score
+            ids (torch.Tensor): 2D tensor of new partial tokens to score
+            x (torch.Tensor): Corresponding input feature
+
+        Returns:
+            Tuple[Dict[str, torch.Tensor], Dict[str, Any]]: Tuple of
+                score dict of `hyp` that has string keys of `self.full_scorers`
+                and tensor score values of shape: `(self.n_vocab,)`,
+                and state dict that has string keys
+                and state values of `self.full_scorers`
+
+        """
+        scores = dict()
+        states = dict()
+        for k, d in self.part_scorers.items():
+            scores[k], states[k] = d.batch_score_partial(
+                hyp.yseq, ids, hyp.states[k], x
+            )
+        return scores, states
+
+    def merge_states(self, states: Any, part_states: Any, part_idx: int) -> Any:
+        """Merge states for new hypothesis.
+
+        Args:
+            states: states of `self.full_scorers`
+            part_states: states of `self.part_scorers`
+            part_idx (int): The new token id for `part_scores`
+
+        Returns:
+            Dict[str, torch.Tensor]: The new score dict.
+                Its keys are names of `self.full_scorers` and `self.part_scorers`.
+                Its values are states of the scorers.
+
+        """
+        new_states = dict()
+        for k, v in states.items():
+            new_states[k] = v
+        for k, v in part_states.items():
+            new_states[k] = v
+        return new_states
+
+    def search(self, running_hyps: BatchHypothesis, x: torch.Tensor) -> BatchHypothesis:
+        """Search new tokens for running hypotheses and encoded speech x.
+
+        Args:
+            running_hyps (BatchHypothesis): Running hypotheses on beam
+            x (torch.Tensor): Encoded speech feature (T, D)
+
+        Returns:
+            BatchHypothesis: Best sorted hypotheses
+
+        """
+        n_batch = len(running_hyps)
+        part_ids = None  # no pre-beam
+        # batch scoring
+        weighted_scores = torch.zeros(
+            n_batch, self.n_vocab, dtype=x.dtype, device=x.device
+        )
+        scores, states = self.score_full(running_hyps, x.expand(n_batch, *x.shape))
+        for k in self.full_scorers:
+            weighted_scores += self.weights[k] * scores[k]
+        # partial scoring
+        if self.do_pre_beam:
+            pre_beam_scores = (
+                weighted_scores
+                if self.pre_beam_score_key == "full"
+                else scores[self.pre_beam_score_key]
+            )
+            part_ids = torch.topk(pre_beam_scores, self.pre_beam_size, dim=-1)[1]
+        # NOTE(takaaki-hori): Unlike BeamSearch, we assume that score_partial returns
+        # full-size score matrices, which has non-zero scores for part_ids and zeros
+        # for others.
+        part_scores, part_states = self.score_partial(running_hyps, part_ids, x)
+        for k in self.part_scorers:
+            weighted_scores += self.weights[k] * part_scores[k]
+        # add previous hyp scores
+        weighted_scores += running_hyps.score.to(
+            dtype=x.dtype, device=x.device
+        ).unsqueeze(1)
+
+        # TODO(karita): do not use list. use batch instead
+        # see also https://github.com/espnet/espnet/pull/1402#discussion_r354561029
+        # update hyps
+        best_hyps = []
+        prev_hyps = self.unbatchfy(running_hyps)
+        for (
+            full_prev_hyp_id,
+            full_new_token_id,
+            part_prev_hyp_id,
+            part_new_token_id,
+        ) in zip(*self.batch_beam(weighted_scores, part_ids)):
+            prev_hyp = prev_hyps[full_prev_hyp_id]
+            best_hyps.append(
+                Hypothesis(
+                    score=weighted_scores[full_prev_hyp_id, full_new_token_id],
+                    yseq=self.append_token(prev_hyp.yseq, full_new_token_id),
+                    scores=self.merge_scores(
+                        prev_hyp.scores,
+                        {k: v[full_prev_hyp_id] for k, v in scores.items()},
+                        full_new_token_id,
+                        {k: v[part_prev_hyp_id] for k, v in part_scores.items()},
+                        part_new_token_id,
+                    ),
+                    states=self.merge_states(
+                        {
+                            k: self.full_scorers[k].select_state(v, full_prev_hyp_id)
+                            for k, v in states.items()
+                        },
+                        {
+                            k: self.part_scorers[k].select_state(
+                                v, part_prev_hyp_id, part_new_token_id
+                            )
+                            for k, v in part_states.items()
+                        },
+                        part_new_token_id,
+                    ),
+                )
+            )
+        return self.batchfy(best_hyps)
+
+    def post_process(
+        self,
+        i: int,
+        maxlen: int,
+        maxlenratio: float,
+        running_hyps: BatchHypothesis,
+        ended_hyps: List[Hypothesis],
+    ) -> BatchHypothesis:
+        """Perform post-processing of beam search iterations.
+
+        Args:
+            i (int): The length of hypothesis tokens.
+            maxlen (int): The maximum length of tokens in beam search.
+            maxlenratio (int): The maximum length ratio in beam search.
+            running_hyps (BatchHypothesis): The running hypotheses in beam search.
+            ended_hyps (List[Hypothesis]): The ended hypotheses in beam search.
+
+        Returns:
+            BatchHypothesis: The new running hypotheses.
+
+        """
+        n_batch = running_hyps.yseq.shape[0]
+        logging.debug(f"the number of running hypothes: {n_batch}")
+        if self.token_list is not None:
+            logging.debug(
+                "best hypo: "
+                + "".join(
+                    [
+                        self.token_list[x]
+                        for x in running_hyps.yseq[0, 1 : running_hyps.length[0]]
+                    ]
+                )
+            )
+        # add eos in the final loop to avoid that there are no ended hyps
+        if i == maxlen - 1:
+            logging.info("adding <eos> in the last position in the loop")
+            yseq_eos = torch.cat(
+                (
+                    running_hyps.yseq,
+                    torch.full(
+                        (n_batch, 1),
+                        self.eos,
+                        device=running_hyps.yseq.device,
+                        dtype=torch.int64,
+                    ),
+                ),
+                1,
+            )
+            running_hyps.yseq.resize_as_(yseq_eos)
+            running_hyps.yseq[:] = yseq_eos
+            running_hyps.length[:] = yseq_eos.shape[1]
+
+        # add ended hypotheses to a final list, and removed them from current hypotheses
+        # (this will be a probmlem, number of hyps < beam)
+        is_eos = (
+            running_hyps.yseq[torch.arange(n_batch), running_hyps.length - 1]
+            == self.eos
+        )
+        for b in torch.nonzero(is_eos, as_tuple=False).view(-1):
+            hyp = self._select(running_hyps, b)
+            ended_hyps.append(hyp)
+        remained_ids = torch.nonzero(is_eos == 0, as_tuple=False).view(-1)
+        return self._batch_select(running_hyps, remained_ids)
diff --git a/espnet/nets/beam_search.py b/espnet/nets/beam_search.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f33d8c63bf667c7eed598a10ce9e5cb53be121c
--- /dev/null
+++ b/espnet/nets/beam_search.py
@@ -0,0 +1,516 @@
+"""Beam search module."""
+
+from itertools import chain
+import logging
+from typing import Any
+from typing import Dict
+from typing import List
+from typing import NamedTuple
+from typing import Tuple
+from typing import Union
+
+import torch
+
+from espnet.nets.e2e_asr_common import end_detect
+from espnet.nets.scorer_interface import PartialScorerInterface
+from espnet.nets.scorer_interface import ScorerInterface
+
+
+class Hypothesis(NamedTuple):
+    """Hypothesis data type."""
+
+    yseq: torch.Tensor
+    score: Union[float, torch.Tensor] = 0
+    scores: Dict[str, Union[float, torch.Tensor]] = dict()
+    states: Dict[str, Any] = dict()
+
+    def asdict(self) -> dict:
+        """Convert data to JSON-friendly dict."""
+        return self._replace(
+            yseq=self.yseq.tolist(),
+            score=float(self.score),
+            scores={k: float(v) for k, v in self.scores.items()},
+        )._asdict()
+
+
+class BeamSearch(torch.nn.Module):
+    """Beam search implementation."""
+
+    def __init__(
+        self,
+        scorers: Dict[str, ScorerInterface],
+        weights: Dict[str, float],
+        beam_size: int,
+        vocab_size: int,
+        sos: int,
+        eos: int,
+        token_list: List[str] = None,
+        pre_beam_ratio: float = 1.5,
+        pre_beam_score_key: str = None,
+    ):
+        """Initialize beam search.
+
+        Args:
+            scorers (dict[str, ScorerInterface]): Dict of decoder modules
+                e.g., Decoder, CTCPrefixScorer, LM
+                The scorer will be ignored if it is `None`
+            weights (dict[str, float]): Dict of weights for each scorers
+                The scorer will be ignored if its weight is 0
+            beam_size (int): The number of hypotheses kept during search
+            vocab_size (int): The number of vocabulary
+            sos (int): Start of sequence id
+            eos (int): End of sequence id
+            token_list (list[str]): List of tokens for debug log
+            pre_beam_score_key (str): key of scores to perform pre-beam search
+            pre_beam_ratio (float): beam size in the pre-beam search
+                will be `int(pre_beam_ratio * beam_size)`
+
+        """
+        super().__init__()
+        # set scorers
+        self.weights = weights
+        self.scorers = dict()
+        self.full_scorers = dict()
+        self.part_scorers = dict()
+        # this module dict is required for recursive cast
+        # `self.to(device, dtype)` in `recog.py`
+        self.nn_dict = torch.nn.ModuleDict()
+        for k, v in scorers.items():
+            w = weights.get(k, 0)
+            if w == 0 or v is None:
+                continue
+            assert isinstance(
+                v, ScorerInterface
+            ), f"{k} ({type(v)}) does not implement ScorerInterface"
+            self.scorers[k] = v
+            if isinstance(v, PartialScorerInterface):
+                self.part_scorers[k] = v
+            else:
+                self.full_scorers[k] = v
+            if isinstance(v, torch.nn.Module):
+                self.nn_dict[k] = v
+
+        # set configurations
+        self.sos = sos
+        self.eos = eos
+        self.token_list = token_list
+        self.pre_beam_size = int(pre_beam_ratio * beam_size)
+        self.beam_size = beam_size
+        self.n_vocab = vocab_size
+        if (
+            pre_beam_score_key is not None
+            and pre_beam_score_key != "full"
+            and pre_beam_score_key not in self.full_scorers
+        ):
+            raise KeyError(f"{pre_beam_score_key} is not found in {self.full_scorers}")
+        self.pre_beam_score_key = pre_beam_score_key
+        self.do_pre_beam = (
+            self.pre_beam_score_key is not None
+            and self.pre_beam_size < self.n_vocab
+            and len(self.part_scorers) > 0
+        )
+
+    def init_hyp(self, x: torch.Tensor) -> List[Hypothesis]:
+        """Get an initial hypothesis data.
+
+        Args:
+            x (torch.Tensor): The encoder output feature
+
+        Returns:
+            Hypothesis: The initial hypothesis.
+
+        """
+        init_states = dict()
+        init_scores = dict()
+        for k, d in self.scorers.items():
+            init_states[k] = d.init_state(x)
+            init_scores[k] = 0.0
+        return [
+            Hypothesis(
+                score=0.0,
+                scores=init_scores,
+                states=init_states,
+                yseq=torch.tensor([self.sos], device=x.device),
+            )
+        ]
+
+    @staticmethod
+    def append_token(xs: torch.Tensor, x: int) -> torch.Tensor:
+        """Append new token to prefix tokens.
+
+        Args:
+            xs (torch.Tensor): The prefix token
+            x (int): The new token to append
+
+        Returns:
+            torch.Tensor: New tensor contains: xs + [x] with xs.dtype and xs.device
+
+        """
+        x = torch.tensor([x], dtype=xs.dtype, device=xs.device)
+        return torch.cat((xs, x))
+
+    def score_full(
+        self, hyp: Hypothesis, x: torch.Tensor
+    ) -> Tuple[Dict[str, torch.Tensor], Dict[str, Any]]:
+        """Score new hypothesis by `self.full_scorers`.
+
+        Args:
+            hyp (Hypothesis): Hypothesis with prefix tokens to score
+            x (torch.Tensor): Corresponding input feature
+
+        Returns:
+            Tuple[Dict[str, torch.Tensor], Dict[str, Any]]: Tuple of
+                score dict of `hyp` that has string keys of `self.full_scorers`
+                and tensor score values of shape: `(self.n_vocab,)`,
+                and state dict that has string keys
+                and state values of `self.full_scorers`
+
+        """
+        scores = dict()
+        states = dict()
+        for k, d in self.full_scorers.items():
+            scores[k], states[k] = d.score(hyp.yseq, hyp.states[k], x)
+        return scores, states
+
+    def score_partial(
+        self, hyp: Hypothesis, ids: torch.Tensor, x: torch.Tensor
+    ) -> Tuple[Dict[str, torch.Tensor], Dict[str, Any]]:
+        """Score new hypothesis by `self.part_scorers`.
+
+        Args:
+            hyp (Hypothesis): Hypothesis with prefix tokens to score
+            ids (torch.Tensor): 1D tensor of new partial tokens to score
+            x (torch.Tensor): Corresponding input feature
+
+        Returns:
+            Tuple[Dict[str, torch.Tensor], Dict[str, Any]]: Tuple of
+                score dict of `hyp` that has string keys of `self.part_scorers`
+                and tensor score values of shape: `(len(ids),)`,
+                and state dict that has string keys
+                and state values of `self.part_scorers`
+
+        """
+        scores = dict()
+        states = dict()
+        for k, d in self.part_scorers.items():
+            scores[k], states[k] = d.score_partial(hyp.yseq, ids, hyp.states[k], x)
+        return scores, states
+
+    def beam(
+        self, weighted_scores: torch.Tensor, ids: torch.Tensor
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Compute topk full token ids and partial token ids.
+
+        Args:
+            weighted_scores (torch.Tensor): The weighted sum scores for each tokens.
+            Its shape is `(self.n_vocab,)`.
+            ids (torch.Tensor): The partial token ids to compute topk
+
+        Returns:
+            Tuple[torch.Tensor, torch.Tensor]:
+                The topk full token ids and partial token ids.
+                Their shapes are `(self.beam_size,)`
+
+        """
+        # no pre beam performed
+        if weighted_scores.size(0) == ids.size(0):
+            top_ids = weighted_scores.topk(self.beam_size)[1]
+            return top_ids, top_ids
+
+        # mask pruned in pre-beam not to select in topk
+        tmp = weighted_scores[ids]
+        weighted_scores[:] = -float("inf")
+        weighted_scores[ids] = tmp
+        top_ids = weighted_scores.topk(self.beam_size)[1]
+        local_ids = weighted_scores[ids].topk(self.beam_size)[1]
+        return top_ids, local_ids
+
+    @staticmethod
+    def merge_scores(
+        prev_scores: Dict[str, float],
+        next_full_scores: Dict[str, torch.Tensor],
+        full_idx: int,
+        next_part_scores: Dict[str, torch.Tensor],
+        part_idx: int,
+    ) -> Dict[str, torch.Tensor]:
+        """Merge scores for new hypothesis.
+
+        Args:
+            prev_scores (Dict[str, float]):
+                The previous hypothesis scores by `self.scorers`
+            next_full_scores (Dict[str, torch.Tensor]): scores by `self.full_scorers`
+            full_idx (int): The next token id for `next_full_scores`
+            next_part_scores (Dict[str, torch.Tensor]):
+                scores of partial tokens by `self.part_scorers`
+            part_idx (int): The new token id for `next_part_scores`
+
+        Returns:
+            Dict[str, torch.Tensor]: The new score dict.
+                Its keys are names of `self.full_scorers` and `self.part_scorers`.
+                Its values are scalar tensors by the scorers.
+
+        """
+        new_scores = dict()
+        for k, v in next_full_scores.items():
+            new_scores[k] = prev_scores[k] + v[full_idx]
+        for k, v in next_part_scores.items():
+            new_scores[k] = prev_scores[k] + v[part_idx]
+        return new_scores
+
+    def merge_states(self, states: Any, part_states: Any, part_idx: int) -> Any:
+        """Merge states for new hypothesis.
+
+        Args:
+            states: states of `self.full_scorers`
+            part_states: states of `self.part_scorers`
+            part_idx (int): The new token id for `part_scores`
+
+        Returns:
+            Dict[str, torch.Tensor]: The new score dict.
+                Its keys are names of `self.full_scorers` and `self.part_scorers`.
+                Its values are states of the scorers.
+
+        """
+        new_states = dict()
+        for k, v in states.items():
+            new_states[k] = v
+        for k, d in self.part_scorers.items():
+            new_states[k] = d.select_state(part_states[k], part_idx)
+        return new_states
+
+    def search(
+        self, running_hyps: List[Hypothesis], x: torch.Tensor
+    ) -> List[Hypothesis]:
+        """Search new tokens for running hypotheses and encoded speech x.
+
+        Args:
+            running_hyps (List[Hypothesis]): Running hypotheses on beam
+            x (torch.Tensor): Encoded speech feature (T, D)
+
+        Returns:
+            List[Hypotheses]: Best sorted hypotheses
+
+        """
+        best_hyps = []
+        part_ids = torch.arange(self.n_vocab, device=x.device)  # no pre-beam
+        for hyp in running_hyps:
+            # scoring
+            weighted_scores = torch.zeros(self.n_vocab, dtype=x.dtype, device=x.device)
+            scores, states = self.score_full(hyp, x)
+            for k in self.full_scorers:
+                weighted_scores += self.weights[k] * scores[k]
+            # partial scoring
+            if self.do_pre_beam:
+                pre_beam_scores = (
+                    weighted_scores
+                    if self.pre_beam_score_key == "full"
+                    else scores[self.pre_beam_score_key]
+                )
+                part_ids = torch.topk(pre_beam_scores, self.pre_beam_size)[1]
+            part_scores, part_states = self.score_partial(hyp, part_ids, x)
+            for k in self.part_scorers:
+                weighted_scores[part_ids] += self.weights[k] * part_scores[k]
+            # add previous hyp score
+            weighted_scores += hyp.score
+
+            # update hyps
+            for j, part_j in zip(*self.beam(weighted_scores, part_ids)):
+                # will be (2 x beam at most)
+                best_hyps.append(
+                    Hypothesis(
+                        score=weighted_scores[j],
+                        yseq=self.append_token(hyp.yseq, j),
+                        scores=self.merge_scores(
+                            hyp.scores, scores, j, part_scores, part_j
+                        ),
+                        states=self.merge_states(states, part_states, part_j),
+                    )
+                )
+
+            # sort and prune 2 x beam -> beam
+            best_hyps = sorted(best_hyps, key=lambda x: x.score, reverse=True)[
+                : min(len(best_hyps), self.beam_size)
+            ]
+        return best_hyps
+
+    def forward(
+        self, x: torch.Tensor, maxlenratio: float = 0.0, minlenratio: float = 0.0
+    ) -> List[Hypothesis]:
+        """Perform beam search.
+
+        Args:
+            x (torch.Tensor): Encoded speech feature (T, D)
+            maxlenratio (float): Input length ratio to obtain max output length.
+                If maxlenratio=0.0 (default), it uses a end-detect function
+                to automatically find maximum hypothesis lengths
+                If maxlenratio<0.0, its absolute value is interpreted
+                as a constant max output length.
+            minlenratio (float): Input length ratio to obtain min output length.
+
+        Returns:
+            list[Hypothesis]: N-best decoding results
+
+        """
+        # set length bounds
+        if maxlenratio == 0:
+            maxlen = x.shape[0]
+        elif maxlenratio < 0:
+            maxlen = -1 * int(maxlenratio)
+        else:
+            maxlen = max(1, int(maxlenratio * x.size(0)))
+        minlen = int(minlenratio * x.size(0))
+        logging.info("decoder input length: " + str(x.shape[0]))
+        logging.info("max output length: " + str(maxlen))
+        logging.info("min output length: " + str(minlen))
+
+        # main loop of prefix search
+        running_hyps = self.init_hyp(x)
+        ended_hyps = []
+        for i in range(maxlen):
+            logging.debug("position " + str(i))
+            best = self.search(running_hyps, x)
+            # post process of one iteration
+            running_hyps = self.post_process(i, maxlen, maxlenratio, best, ended_hyps)
+            # end detection
+            if maxlenratio == 0.0 and end_detect([h.asdict() for h in ended_hyps], i):
+                logging.info(f"end detected at {i}")
+                break
+            if len(running_hyps) == 0:
+                logging.info("no hypothesis. Finish decoding.")
+                break
+            else:
+                logging.debug(f"remained hypotheses: {len(running_hyps)}")
+
+        nbest_hyps = sorted(ended_hyps, key=lambda x: x.score, reverse=True)
+        # check the number of hypotheses reaching to eos
+        if len(nbest_hyps) == 0:
+            logging.warning(
+                "there is no N-best results, perform recognition "
+                "again with smaller minlenratio."
+            )
+            return (
+                []
+                if minlenratio < 0.1
+                else self.forward(x, maxlenratio, max(0.0, minlenratio - 0.1))
+            )
+
+        # report the best result
+        best = nbest_hyps[0]
+        for k, v in best.scores.items():
+            logging.info(
+                f"{v:6.2f} * {self.weights[k]:3} = {v * self.weights[k]:6.2f} for {k}"
+            )
+        logging.info(f"total log probability: {best.score:.2f}")
+        logging.info(f"normalized log probability: {best.score / len(best.yseq):.2f}")
+        logging.info(f"total number of ended hypotheses: {len(nbest_hyps)}")
+        if self.token_list is not None:
+            logging.info(
+                "best hypo: "
+                + "".join([self.token_list[x] for x in best.yseq[1:-1]])
+                + "\n"
+            )
+        return nbest_hyps
+
+    def post_process(
+        self,
+        i: int,
+        maxlen: int,
+        maxlenratio: float,
+        running_hyps: List[Hypothesis],
+        ended_hyps: List[Hypothesis],
+    ) -> List[Hypothesis]:
+        """Perform post-processing of beam search iterations.
+
+        Args:
+            i (int): The length of hypothesis tokens.
+            maxlen (int): The maximum length of tokens in beam search.
+            maxlenratio (int): The maximum length ratio in beam search.
+            running_hyps (List[Hypothesis]): The running hypotheses in beam search.
+            ended_hyps (List[Hypothesis]): The ended hypotheses in beam search.
+
+        Returns:
+            List[Hypothesis]: The new running hypotheses.
+
+        """
+        logging.debug(f"the number of running hypotheses: {len(running_hyps)}")
+        if self.token_list is not None:
+            logging.debug(
+                "best hypo: "
+                + "".join([self.token_list[x] for x in running_hyps[0].yseq[1:]])
+            )
+        # add eos in the final loop to avoid that there are no ended hyps
+        if i == maxlen - 1:
+            logging.info("adding <eos> in the last position in the loop")
+            running_hyps = [
+                h._replace(yseq=self.append_token(h.yseq, self.eos))
+                for h in running_hyps
+            ]
+
+        # add ended hypotheses to a final list, and removed them from current hypotheses
+        # (this will be a problem, number of hyps < beam)
+        remained_hyps = []
+        for hyp in running_hyps:
+            if hyp.yseq[-1] == self.eos:
+                # e.g., Word LM needs to add final <eos> score
+                for k, d in chain(self.full_scorers.items(), self.part_scorers.items()):
+                    s = d.final_score(hyp.states[k])
+                    hyp.scores[k] += s
+                    hyp = hyp._replace(score=hyp.score + self.weights[k] * s)
+                ended_hyps.append(hyp)
+            else:
+                remained_hyps.append(hyp)
+        return remained_hyps
+
+
+def beam_search(
+    x: torch.Tensor,
+    sos: int,
+    eos: int,
+    beam_size: int,
+    vocab_size: int,
+    scorers: Dict[str, ScorerInterface],
+    weights: Dict[str, float],
+    token_list: List[str] = None,
+    maxlenratio: float = 0.0,
+    minlenratio: float = 0.0,
+    pre_beam_ratio: float = 1.5,
+    pre_beam_score_key: str = "full",
+) -> list:
+    """Perform beam search with scorers.
+
+    Args:
+        x (torch.Tensor): Encoded speech feature (T, D)
+        sos (int): Start of sequence id
+        eos (int): End of sequence id
+        beam_size (int): The number of hypotheses kept during search
+        vocab_size (int): The number of vocabulary
+        scorers (dict[str, ScorerInterface]): Dict of decoder modules
+            e.g., Decoder, CTCPrefixScorer, LM
+            The scorer will be ignored if it is `None`
+        weights (dict[str, float]): Dict of weights for each scorers
+            The scorer will be ignored if its weight is 0
+        token_list (list[str]): List of tokens for debug log
+        maxlenratio (float): Input length ratio to obtain max output length.
+            If maxlenratio=0.0 (default), it uses a end-detect function
+            to automatically find maximum hypothesis lengths
+        minlenratio (float): Input length ratio to obtain min output length.
+        pre_beam_score_key (str): key of scores to perform pre-beam search
+        pre_beam_ratio (float): beam size in the pre-beam search
+            will be `int(pre_beam_ratio * beam_size)`
+
+    Returns:
+        list: N-best decoding results
+
+    """
+    ret = BeamSearch(
+        scorers,
+        weights,
+        beam_size=beam_size,
+        vocab_size=vocab_size,
+        pre_beam_ratio=pre_beam_ratio,
+        pre_beam_score_key=pre_beam_score_key,
+        sos=sos,
+        eos=eos,
+        token_list=token_list,
+    ).forward(x=x, maxlenratio=maxlenratio, minlenratio=minlenratio)
+    return [h.asdict() for h in ret]
diff --git a/espnet/nets/ctc_prefix_score.py b/espnet/nets/ctc_prefix_score.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c67ecd096de46ad00972cf3a8ba812852f38c97
--- /dev/null
+++ b/espnet/nets/ctc_prefix_score.py
@@ -0,0 +1,359 @@
+#!/usr/bin/env python3
+
+# Copyright 2018 Mitsubishi Electric Research Labs (Takaaki Hori)
+#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+import torch
+
+import numpy as np
+import six
+
+
+class CTCPrefixScoreTH(object):
+    """Batch processing of CTCPrefixScore
+
+    which is based on Algorithm 2 in WATANABE et al.
+    "HYBRID CTC/ATTENTION ARCHITECTURE FOR END-TO-END SPEECH RECOGNITION,"
+    but extended to efficiently compute the label probablities for multiple
+    hypotheses simultaneously
+    See also Seki et al. "Vectorized Beam Search for CTC-Attention-Based
+    Speech Recognition," In INTERSPEECH (pp. 3825-3829), 2019.
+    """
+
+    def __init__(self, x, xlens, blank, eos, margin=0):
+        """Construct CTC prefix scorer
+
+        :param torch.Tensor x: input label posterior sequences (B, T, O)
+        :param torch.Tensor xlens: input lengths (B,)
+        :param int blank: blank label id
+        :param int eos: end-of-sequence id
+        :param int margin: margin parameter for windowing (0 means no windowing)
+        """
+        # In the comment lines,
+        # we assume T: input_length, B: batch size, W: beam width, O: output dim.
+        self.logzero = -10000000000.0
+        self.blank = blank
+        self.eos = eos
+        self.batch = x.size(0)
+        self.input_length = x.size(1)
+        self.odim = x.size(2)
+        self.dtype = x.dtype
+        self.device = (
+            torch.device("cuda:%d" % x.get_device())
+            if x.is_cuda
+            else torch.device("cpu")
+        )
+        # Pad the rest of posteriors in the batch
+        # TODO(takaaki-hori): need a better way without for-loops
+        for i, l in enumerate(xlens):
+            if l < self.input_length:
+                x[i, l:, :] = self.logzero
+                x[i, l:, blank] = 0
+        # Reshape input x
+        xn = x.transpose(0, 1)  # (B, T, O) -> (T, B, O)
+        xb = xn[:, :, self.blank].unsqueeze(2).expand(-1, -1, self.odim)
+        self.x = torch.stack([xn, xb])  # (2, T, B, O)
+        self.end_frames = torch.as_tensor(xlens) - 1
+
+        # Setup CTC windowing
+        self.margin = margin
+        if margin > 0:
+            self.frame_ids = torch.arange(
+                self.input_length, dtype=self.dtype, device=self.device
+            )
+        # Base indices for index conversion
+        self.idx_bh = None
+        self.idx_b = torch.arange(self.batch, device=self.device)
+        self.idx_bo = (self.idx_b * self.odim).unsqueeze(1)
+
+    def __call__(self, y, state, scoring_ids=None, att_w=None):
+        """Compute CTC prefix scores for next labels
+
+        :param list y: prefix label sequences
+        :param tuple state: previous CTC state
+        :param torch.Tensor pre_scores: scores for pre-selection of hypotheses (BW, O)
+        :param torch.Tensor att_w: attention weights to decide CTC window
+        :return new_state, ctc_local_scores (BW, O)
+        """
+        output_length = len(y[0]) - 1  # ignore sos
+        last_ids = [yi[-1] for yi in y]  # last output label ids
+        n_bh = len(last_ids)  # batch * hyps
+        n_hyps = n_bh // self.batch  # assuming each utterance has the same # of hyps
+        self.scoring_num = scoring_ids.size(-1) if scoring_ids is not None else 0
+        # prepare state info
+        if state is None:
+            r_prev = torch.full(
+                (self.input_length, 2, self.batch, n_hyps),
+                self.logzero,
+                dtype=self.dtype,
+                device=self.device,
+            )
+            r_prev[:, 1] = torch.cumsum(self.x[0, :, :, self.blank], 0).unsqueeze(2)
+            r_prev = r_prev.view(-1, 2, n_bh)
+            s_prev = 0.0
+            f_min_prev = 0
+            f_max_prev = 1
+        else:
+            r_prev, s_prev, f_min_prev, f_max_prev = state
+
+        # select input dimensions for scoring
+        if self.scoring_num > 0:
+            scoring_idmap = torch.full(
+                (n_bh, self.odim), -1, dtype=torch.long, device=self.device
+            )
+            snum = self.scoring_num
+            if self.idx_bh is None or n_bh > len(self.idx_bh):
+                self.idx_bh = torch.arange(n_bh, device=self.device).view(-1, 1)
+            scoring_idmap[self.idx_bh[:n_bh], scoring_ids] = torch.arange(
+                snum, device=self.device
+            )
+            scoring_idx = (
+                scoring_ids + self.idx_bo.repeat(1, n_hyps).view(-1, 1)
+            ).view(-1)
+            x_ = torch.index_select(
+                self.x.view(2, -1, self.batch * self.odim), 2, scoring_idx
+            ).view(2, -1, n_bh, snum)
+        else:
+            scoring_ids = None
+            scoring_idmap = None
+            snum = self.odim
+            x_ = self.x.unsqueeze(3).repeat(1, 1, 1, n_hyps, 1).view(2, -1, n_bh, snum)
+
+        # new CTC forward probs are prepared as a (T x 2 x BW x S) tensor
+        # that corresponds to r_t^n(h) and r_t^b(h) in a batch.
+        r = torch.full(
+            (self.input_length, 2, n_bh, snum),
+            self.logzero,
+            dtype=self.dtype,
+            device=self.device,
+        )
+        if output_length == 0:
+            r[0, 0] = x_[0, 0]
+
+        r_sum = torch.logsumexp(r_prev, 1)
+        log_phi = r_sum.unsqueeze(2).repeat(1, 1, snum)
+        if scoring_ids is not None:
+            for idx in range(n_bh):
+                pos = scoring_idmap[idx, last_ids[idx]]
+                if pos >= 0:
+                    log_phi[:, idx, pos] = r_prev[:, 1, idx]
+        else:
+            for idx in range(n_bh):
+                log_phi[:, idx, last_ids[idx]] = r_prev[:, 1, idx]
+
+        # decide start and end frames based on attention weights
+        if att_w is not None and self.margin > 0:
+            f_arg = torch.matmul(att_w, self.frame_ids)
+            f_min = max(int(f_arg.min().cpu()), f_min_prev)
+            f_max = max(int(f_arg.max().cpu()), f_max_prev)
+            start = min(f_max_prev, max(f_min - self.margin, output_length, 1))
+            end = min(f_max + self.margin, self.input_length)
+        else:
+            f_min = f_max = 0
+            start = max(output_length, 1)
+            end = self.input_length
+
+        # compute forward probabilities log(r_t^n(h)) and log(r_t^b(h))
+        for t in range(start, end):
+            rp = r[t - 1]
+            rr = torch.stack([rp[0], log_phi[t - 1], rp[0], rp[1]]).view(
+                2, 2, n_bh, snum
+            )
+            r[t] = torch.logsumexp(rr, 1) + x_[:, t]
+
+        # compute log prefix probabilities log(psi)
+        log_phi_x = torch.cat((log_phi[0].unsqueeze(0), log_phi[:-1]), dim=0) + x_[0]
+        if scoring_ids is not None:
+            log_psi = torch.full(
+                (n_bh, self.odim), self.logzero, dtype=self.dtype, device=self.device
+            )
+            log_psi_ = torch.logsumexp(
+                torch.cat((log_phi_x[start:end], r[start - 1, 0].unsqueeze(0)), dim=0),
+                dim=0,
+            )
+            for si in range(n_bh):
+                log_psi[si, scoring_ids[si]] = log_psi_[si]
+        else:
+            log_psi = torch.logsumexp(
+                torch.cat((log_phi_x[start:end], r[start - 1, 0].unsqueeze(0)), dim=0),
+                dim=0,
+            )
+
+        for si in range(n_bh):
+            log_psi[si, self.eos] = r_sum[self.end_frames[si // n_hyps], si]
+
+        # exclude blank probs
+        log_psi[:, self.blank] = self.logzero
+
+        return (log_psi - s_prev), (r, log_psi, f_min, f_max, scoring_idmap)
+
+    def index_select_state(self, state, best_ids):
+        """Select CTC states according to best ids
+
+        :param state    : CTC state
+        :param best_ids : index numbers selected by beam pruning (B, W)
+        :return selected_state
+        """
+        r, s, f_min, f_max, scoring_idmap = state
+        # convert ids to BHO space
+        n_bh = len(s)
+        n_hyps = n_bh // self.batch
+        vidx = (best_ids + (self.idx_b * (n_hyps * self.odim)).view(-1, 1)).view(-1)
+        # select hypothesis scores
+        s_new = torch.index_select(s.view(-1), 0, vidx)
+        s_new = s_new.view(-1, 1).repeat(1, self.odim).view(n_bh, self.odim)
+        # convert ids to BHS space (S: scoring_num)
+        if scoring_idmap is not None:
+            snum = self.scoring_num
+            hyp_idx = (best_ids // self.odim + (self.idx_b * n_hyps).view(-1, 1)).view(
+                -1
+            )
+            label_ids = torch.fmod(best_ids, self.odim).view(-1)
+            score_idx = scoring_idmap[hyp_idx, label_ids]
+            score_idx[score_idx == -1] = 0
+            vidx = score_idx + hyp_idx * snum
+        else:
+            snum = self.odim
+        # select forward probabilities
+        r_new = torch.index_select(r.view(-1, 2, n_bh * snum), 2, vidx).view(
+            -1, 2, n_bh
+        )
+        return r_new, s_new, f_min, f_max
+
+    def extend_prob(self, x):
+        """Extend CTC prob.
+
+        :param torch.Tensor x: input label posterior sequences (B, T, O)
+        """
+
+        if self.x.shape[1] < x.shape[1]:  # self.x (2,T,B,O); x (B,T,O)
+            # Pad the rest of posteriors in the batch
+            # TODO(takaaki-hori): need a better way without for-loops
+            xlens = [x.size(1)]
+            for i, l in enumerate(xlens):
+                if l < self.input_length:
+                    x[i, l:, :] = self.logzero
+                    x[i, l:, self.blank] = 0
+            tmp_x = self.x
+            xn = x.transpose(0, 1)  # (B, T, O) -> (T, B, O)
+            xb = xn[:, :, self.blank].unsqueeze(2).expand(-1, -1, self.odim)
+            self.x = torch.stack([xn, xb])  # (2, T, B, O)
+            self.x[:, : tmp_x.shape[1], :, :] = tmp_x
+            self.input_length = x.size(1)
+            self.end_frames = torch.as_tensor(xlens) - 1
+
+    def extend_state(self, state):
+        """Compute CTC prefix state.
+
+
+        :param state    : CTC state
+        :return ctc_state
+        """
+
+        if state is None:
+            # nothing to do
+            return state
+        else:
+            r_prev, s_prev, f_min_prev, f_max_prev = state
+
+            r_prev_new = torch.full(
+                (self.input_length, 2),
+                self.logzero,
+                dtype=self.dtype,
+                device=self.device,
+            )
+            start = max(r_prev.shape[0], 1)
+            r_prev_new[0:start] = r_prev
+            for t in six.moves.range(start, self.input_length):
+                r_prev_new[t, 1] = r_prev_new[t - 1, 1] + self.x[0, t, :, self.blank]
+
+            return (r_prev_new, s_prev, f_min_prev, f_max_prev)
+
+
+class CTCPrefixScore(object):
+    """Compute CTC label sequence scores
+
+    which is based on Algorithm 2 in WATANABE et al.
+    "HYBRID CTC/ATTENTION ARCHITECTURE FOR END-TO-END SPEECH RECOGNITION,"
+    but extended to efficiently compute the probablities of multiple labels
+    simultaneously
+    """
+
+    def __init__(self, x, blank, eos, xp):
+        self.xp = xp
+        self.logzero = -10000000000.0
+        self.blank = blank
+        self.eos = eos
+        self.input_length = len(x)
+        self.x = x
+
+    def initial_state(self):
+        """Obtain an initial CTC state
+
+        :return: CTC state
+        """
+        # initial CTC state is made of a frame x 2 tensor that corresponds to
+        # r_t^n(<sos>) and r_t^b(<sos>), where 0 and 1 of axis=1 represent
+        # superscripts n and b (non-blank and blank), respectively.
+        r = self.xp.full((self.input_length, 2), self.logzero, dtype=np.float32)
+        r[0, 1] = self.x[0, self.blank]
+        for i in six.moves.range(1, self.input_length):
+            r[i, 1] = r[i - 1, 1] + self.x[i, self.blank]
+        return r
+
+    def __call__(self, y, cs, r_prev):
+        """Compute CTC prefix scores for next labels
+
+        :param y     : prefix label sequence
+        :param cs    : array of next labels
+        :param r_prev: previous CTC state
+        :return ctc_scores, ctc_states
+        """
+        # initialize CTC states
+        output_length = len(y) - 1  # ignore sos
+        # new CTC states are prepared as a frame x (n or b) x n_labels tensor
+        # that corresponds to r_t^n(h) and r_t^b(h).
+        r = self.xp.ndarray((self.input_length, 2, len(cs)), dtype=np.float32)
+        xs = self.x[:, cs]
+        if output_length == 0:
+            r[0, 0] = xs[0]
+            r[0, 1] = self.logzero
+        else:
+            r[output_length - 1] = self.logzero
+
+        # prepare forward probabilities for the last label
+        r_sum = self.xp.logaddexp(
+            r_prev[:, 0], r_prev[:, 1]
+        )  # log(r_t^n(g) + r_t^b(g))
+        last = y[-1]
+        if output_length > 0 and last in cs:
+            log_phi = self.xp.ndarray((self.input_length, len(cs)), dtype=np.float32)
+            for i in six.moves.range(len(cs)):
+                log_phi[:, i] = r_sum if cs[i] != last else r_prev[:, 1]
+        else:
+            log_phi = r_sum
+
+        # compute forward probabilities log(r_t^n(h)), log(r_t^b(h)),
+        # and log prefix probabilities log(psi)
+        start = max(output_length, 1)
+        log_psi = r[start - 1, 0]
+        for t in six.moves.range(start, self.input_length):
+            r[t, 0] = self.xp.logaddexp(r[t - 1, 0], log_phi[t - 1]) + xs[t]
+            r[t, 1] = (
+                self.xp.logaddexp(r[t - 1, 0], r[t - 1, 1]) + self.x[t, self.blank]
+            )
+            log_psi = self.xp.logaddexp(log_psi, log_phi[t - 1] + xs[t])
+
+        # get P(...eos|X) that ends with the prefix itself
+        eos_pos = self.xp.where(cs == self.eos)[0]
+        if len(eos_pos) > 0:
+            log_psi[eos_pos] = r_sum[-1]  # log(r_T^n(g) + r_T^b(g))
+
+        # exclude blank probs
+        blank_pos = self.xp.where(cs == self.blank)[0]
+        if len(blank_pos) > 0:
+            log_psi[blank_pos] = self.logzero
+
+        # return the log prefix probability and CTC states, where the label axis
+        # of the CTC states is moved to the first axis to slice it easily
+        return log_psi, self.xp.rollaxis(r, 2)
diff --git a/espnet/nets/e2e_asr_common.py b/espnet/nets/e2e_asr_common.py
new file mode 100644
index 0000000000000000000000000000000000000000..92f90796a3a230b3bfc47ebe8d9292fae37a1b9c
--- /dev/null
+++ b/espnet/nets/e2e_asr_common.py
@@ -0,0 +1,249 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+
+# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
+#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+"""Common functions for ASR."""
+
+import json
+import logging
+import sys
+
+from itertools import groupby
+import numpy as np
+import six
+
+
+def end_detect(ended_hyps, i, M=3, D_end=np.log(1 * np.exp(-10))):
+    """End detection.
+
+    described in Eq. (50) of S. Watanabe et al
+    "Hybrid CTC/Attention Architecture for End-to-End Speech Recognition"
+
+    :param ended_hyps:
+    :param i:
+    :param M:
+    :param D_end:
+    :return:
+    """
+    if len(ended_hyps) == 0:
+        return False
+    count = 0
+    best_hyp = sorted(ended_hyps, key=lambda x: x["score"], reverse=True)[0]
+    for m in six.moves.range(M):
+        # get ended_hyps with their length is i - m
+        hyp_length = i - m
+        hyps_same_length = [x for x in ended_hyps if len(x["yseq"]) == hyp_length]
+        if len(hyps_same_length) > 0:
+            best_hyp_same_length = sorted(
+                hyps_same_length, key=lambda x: x["score"], reverse=True
+            )[0]
+            if best_hyp_same_length["score"] - best_hyp["score"] < D_end:
+                count += 1
+
+    if count == M:
+        return True
+    else:
+        return False
+
+
+# TODO(takaaki-hori): add different smoothing methods
+def label_smoothing_dist(odim, lsm_type, transcript=None, blank=0):
+    """Obtain label distribution for loss smoothing.
+
+    :param odim:
+    :param lsm_type:
+    :param blank:
+    :param transcript:
+    :return:
+    """
+    if transcript is not None:
+        with open(transcript, "rb") as f:
+            trans_json = json.load(f)["utts"]
+
+    if lsm_type == "unigram":
+        assert transcript is not None, (
+            "transcript is required for %s label smoothing" % lsm_type
+        )
+        labelcount = np.zeros(odim)
+        for k, v in trans_json.items():
+            ids = np.array([int(n) for n in v["output"][0]["tokenid"].split()])
+            # to avoid an error when there is no text in an uttrance
+            if len(ids) > 0:
+                labelcount[ids] += 1
+        labelcount[odim - 1] = len(transcript)  # count <eos>
+        labelcount[labelcount == 0] = 1  # flooring
+        labelcount[blank] = 0  # remove counts for blank
+        labeldist = labelcount.astype(np.float32) / np.sum(labelcount)
+    else:
+        logging.error("Error: unexpected label smoothing type: %s" % lsm_type)
+        sys.exit()
+
+    return labeldist
+
+
+def get_vgg2l_odim(idim, in_channel=3, out_channel=128):
+    """Return the output size of the VGG frontend.
+
+    :param in_channel: input channel size
+    :param out_channel: output channel size
+    :return: output size
+    :rtype int
+    """
+    idim = idim / in_channel
+    idim = np.ceil(np.array(idim, dtype=np.float32) / 2)  # 1st max pooling
+    idim = np.ceil(np.array(idim, dtype=np.float32) / 2)  # 2nd max pooling
+    return int(idim) * out_channel  # numer of channels
+
+
+class ErrorCalculator(object):
+    """Calculate CER and WER for E2E_ASR and CTC models during training.
+
+    :param y_hats: numpy array with predicted text
+    :param y_pads: numpy array with true (target) text
+    :param char_list:
+    :param sym_space:
+    :param sym_blank:
+    :return:
+    """
+
+    def __init__(
+        self, char_list, sym_space, sym_blank, report_cer=False, report_wer=False
+    ):
+        """Construct an ErrorCalculator object."""
+        super(ErrorCalculator, self).__init__()
+
+        self.report_cer = report_cer
+        self.report_wer = report_wer
+
+        self.char_list = char_list
+        self.space = sym_space
+        self.blank = sym_blank
+        self.idx_blank = self.char_list.index(self.blank)
+        if self.space in self.char_list:
+            self.idx_space = self.char_list.index(self.space)
+        else:
+            self.idx_space = None
+
+    def __call__(self, ys_hat, ys_pad, is_ctc=False):
+        """Calculate sentence-level WER/CER score.
+
+        :param torch.Tensor ys_hat: prediction (batch, seqlen)
+        :param torch.Tensor ys_pad: reference (batch, seqlen)
+        :param bool is_ctc: calculate CER score for CTC
+        :return: sentence-level WER score
+        :rtype float
+        :return: sentence-level CER score
+        :rtype float
+        """
+        cer, wer = None, None
+        if is_ctc:
+            return self.calculate_cer_ctc(ys_hat, ys_pad)
+        elif not self.report_cer and not self.report_wer:
+            return cer, wer
+
+        seqs_hat, seqs_true = self.convert_to_char(ys_hat, ys_pad)
+        if self.report_cer:
+            cer = self.calculate_cer(seqs_hat, seqs_true)
+
+        if self.report_wer:
+            wer = self.calculate_wer(seqs_hat, seqs_true)
+        return cer, wer
+
+    def calculate_cer_ctc(self, ys_hat, ys_pad):
+        """Calculate sentence-level CER score for CTC.
+
+        :param torch.Tensor ys_hat: prediction (batch, seqlen)
+        :param torch.Tensor ys_pad: reference (batch, seqlen)
+        :return: average sentence-level CER score
+        :rtype float
+        """
+        import editdistance
+
+        cers, char_ref_lens = [], []
+        for i, y in enumerate(ys_hat):
+            y_hat = [x[0] for x in groupby(y)]
+            y_true = ys_pad[i]
+            seq_hat, seq_true = [], []
+            for idx in y_hat:
+                idx = int(idx)
+                if idx != -1 and idx != self.idx_blank and idx != self.idx_space:
+                    seq_hat.append(self.char_list[int(idx)])
+
+            for idx in y_true:
+                idx = int(idx)
+                if idx != -1 and idx != self.idx_blank and idx != self.idx_space:
+                    seq_true.append(self.char_list[int(idx)])
+
+            hyp_chars = "".join(seq_hat)
+            ref_chars = "".join(seq_true)
+            if len(ref_chars) > 0:
+                cers.append(editdistance.eval(hyp_chars, ref_chars))
+                char_ref_lens.append(len(ref_chars))
+
+        cer_ctc = float(sum(cers)) / sum(char_ref_lens) if cers else None
+        return cer_ctc
+
+    def convert_to_char(self, ys_hat, ys_pad):
+        """Convert index to character.
+
+        :param torch.Tensor seqs_hat: prediction (batch, seqlen)
+        :param torch.Tensor seqs_true: reference (batch, seqlen)
+        :return: token list of prediction
+        :rtype list
+        :return: token list of reference
+        :rtype list
+        """
+        seqs_hat, seqs_true = [], []
+        for i, y_hat in enumerate(ys_hat):
+            y_true = ys_pad[i]
+            eos_true = np.where(y_true == -1)[0]
+            ymax = eos_true[0] if len(eos_true) > 0 else len(y_true)
+            # NOTE: padding index (-1) in y_true is used to pad y_hat
+            seq_hat = [self.char_list[int(idx)] for idx in y_hat[:ymax]]
+            seq_true = [self.char_list[int(idx)] for idx in y_true if int(idx) != -1]
+            seq_hat_text = "".join(seq_hat).replace(self.space, " ")
+            seq_hat_text = seq_hat_text.replace(self.blank, "")
+            seq_true_text = "".join(seq_true).replace(self.space, " ")
+            seqs_hat.append(seq_hat_text)
+            seqs_true.append(seq_true_text)
+        return seqs_hat, seqs_true
+
+    def calculate_cer(self, seqs_hat, seqs_true):
+        """Calculate sentence-level CER score.
+
+        :param list seqs_hat: prediction
+        :param list seqs_true: reference
+        :return: average sentence-level CER score
+        :rtype float
+        """
+        import editdistance
+
+        char_eds, char_ref_lens = [], []
+        for i, seq_hat_text in enumerate(seqs_hat):
+            seq_true_text = seqs_true[i]
+            hyp_chars = seq_hat_text.replace(" ", "")
+            ref_chars = seq_true_text.replace(" ", "")
+            char_eds.append(editdistance.eval(hyp_chars, ref_chars))
+            char_ref_lens.append(len(ref_chars))
+        return float(sum(char_eds)) / sum(char_ref_lens)
+
+    def calculate_wer(self, seqs_hat, seqs_true):
+        """Calculate sentence-level WER score.
+
+        :param list seqs_hat: prediction
+        :param list seqs_true: reference
+        :return: average sentence-level WER score
+        :rtype float
+        """
+        import editdistance
+
+        word_eds, word_ref_lens = [], []
+        for i, seq_hat_text in enumerate(seqs_hat):
+            seq_true_text = seqs_true[i]
+            hyp_words = seq_hat_text.split()
+            ref_words = seq_true_text.split()
+            word_eds.append(editdistance.eval(hyp_words, ref_words))
+            word_ref_lens.append(len(ref_words))
+        return float(sum(word_eds)) / sum(word_ref_lens)
diff --git a/espnet/nets/lm_interface.py b/espnet/nets/lm_interface.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f1e751c4d8c945c8bae3fc4356a4d380fc1e023
--- /dev/null
+++ b/espnet/nets/lm_interface.py
@@ -0,0 +1,86 @@
+"""Language model interface."""
+
+import argparse
+
+from espnet.nets.scorer_interface import ScorerInterface
+from espnet.utils.dynamic_import import dynamic_import
+from espnet.utils.fill_missing_args import fill_missing_args
+
+
+class LMInterface(ScorerInterface):
+    """LM Interface for ESPnet model implementation."""
+
+    @staticmethod
+    def add_arguments(parser):
+        """Add arguments to command line argument parser."""
+        return parser
+
+    @classmethod
+    def build(cls, n_vocab: int, **kwargs):
+        """Initialize this class with python-level args.
+
+        Args:
+            idim (int): The number of vocabulary.
+
+        Returns:
+            LMinterface: A new instance of LMInterface.
+
+        """
+        # local import to avoid cyclic import in lm_train
+        from espnet.bin.lm_train import get_parser
+
+        def wrap(parser):
+            return get_parser(parser, required=False)
+
+        args = argparse.Namespace(**kwargs)
+        args = fill_missing_args(args, wrap)
+        args = fill_missing_args(args, cls.add_arguments)
+        return cls(n_vocab, args)
+
+    def forward(self, x, t):
+        """Compute LM loss value from buffer sequences.
+
+        Args:
+            x (torch.Tensor): Input ids. (batch, len)
+            t (torch.Tensor): Target ids. (batch, len)
+
+        Returns:
+            tuple[torch.Tensor, torch.Tensor, torch.Tensor]: Tuple of
+                loss to backward (scalar),
+                negative log-likelihood of t: -log p(t) (scalar) and
+                the number of elements in x (scalar)
+
+        Notes:
+            The last two return values are used
+            in perplexity: p(t)^{-n} = exp(-log p(t) / n)
+
+        """
+        raise NotImplementedError("forward method is not implemented")
+
+
+predefined_lms = {
+    "pytorch": {
+        "default": "espnet.nets.pytorch_backend.lm.default:DefaultRNNLM",
+        "seq_rnn": "espnet.nets.pytorch_backend.lm.seq_rnn:SequentialRNNLM",
+        "transformer": "espnet.nets.pytorch_backend.lm.transformer:TransformerLM",
+    },
+    "chainer": {"default": "espnet.lm.chainer_backend.lm:DefaultRNNLM"},
+}
+
+
+def dynamic_import_lm(module, backend):
+    """Import LM class dynamically.
+
+    Args:
+        module (str): module_name:class_name or alias in `predefined_lms`
+        backend (str): NN backend. e.g., pytorch, chainer
+
+    Returns:
+        type: LM class
+
+    """
+    model_class = dynamic_import(module, predefined_lms.get(backend, dict()))
+    assert issubclass(
+        model_class, LMInterface
+    ), f"{module} does not implement LMInterface"
+    return model_class
diff --git a/espnet/nets/pytorch_backend/backbones/conv1d_extractor.py b/espnet/nets/pytorch_backend/backbones/conv1d_extractor.py
new file mode 100755
index 0000000000000000000000000000000000000000..7f456e0a6fbe832fa440bae07973475cd3170679
--- /dev/null
+++ b/espnet/nets/pytorch_backend/backbones/conv1d_extractor.py
@@ -0,0 +1,25 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Imperial College London (Pingchuan Ma)
+# Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+import torch
+from espnet.nets.pytorch_backend.backbones.modules.resnet1d import ResNet1D, BasicBlock1D
+
+class Conv1dResNet(torch.nn.Module):
+    def __init__(self, relu_type="swish", a_upsample_ratio=1):
+        super().__init__()
+        self.a_upsample_ratio = a_upsample_ratio
+        self.trunk = ResNet1D(BasicBlock1D, [2, 2, 2, 2], relu_type=relu_type, a_upsample_ratio=a_upsample_ratio)
+
+
+    def forward(self, xs_pad):
+        """forward.
+
+        :param xs_pad: torch.Tensor, batch of padded input sequences (B, Tmax, idim)
+        """
+        B, T, C = xs_pad.size()
+        xs_pad = xs_pad[:, :T // 640 * 640, :]
+        xs_pad = xs_pad.transpose(1, 2)
+        xs_pad = self.trunk(xs_pad)
+        return xs_pad.transpose(1, 2)
diff --git a/espnet/nets/pytorch_backend/backbones/conv3d_extractor.py b/espnet/nets/pytorch_backend/backbones/conv3d_extractor.py
new file mode 100755
index 0000000000000000000000000000000000000000..302bdfb643fcf9f99cbecc3603465e92e26fd0b9
--- /dev/null
+++ b/espnet/nets/pytorch_backend/backbones/conv3d_extractor.py
@@ -0,0 +1,47 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Imperial College London (Pingchuan Ma)
+# Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+import torch
+import torch.nn as nn
+from espnet.nets.pytorch_backend.backbones.modules.resnet import ResNet, BasicBlock
+from espnet.nets.pytorch_backend.transformer.convolution import Swish
+
+
+def threeD_to_2D_tensor(x):
+    n_batch, n_channels, s_time, sx, sy = x.shape
+    x = x.transpose(1, 2)
+    return x.reshape(n_batch * s_time, n_channels, sx, sy)
+
+
+
+class Conv3dResNet(torch.nn.Module):
+    """Conv3dResNet module
+    """
+
+    def __init__(self, backbone_type="resnet", relu_type="swish"):
+        """__init__.
+
+        :param backbone_type: str, the type of a visual front-end.
+        :param relu_type: str, activation function used in an audio front-end.
+        """
+        super(Conv3dResNet, self).__init__()
+        self.frontend_nout = 64
+        self.trunk = ResNet(BasicBlock, [2, 2, 2, 2], relu_type=relu_type)
+        self.frontend3D = nn.Sequential(
+            nn.Conv3d(1, self.frontend_nout, (5, 7, 7), (1, 2, 2), (2, 3, 3), bias=False),
+            nn.BatchNorm3d(self.frontend_nout),
+            Swish(),
+            nn.MaxPool3d((1, 3, 3), (1, 2, 2), (0, 1, 1))
+        )
+
+
+    def forward(self, xs_pad):
+        B, C, T, H, W = xs_pad.size()
+        xs_pad = self.frontend3D(xs_pad)
+        Tnew = xs_pad.shape[2]
+        xs_pad = threeD_to_2D_tensor(xs_pad)
+        xs_pad = self.trunk(xs_pad)
+        return xs_pad.view(B, Tnew, xs_pad.size(1))
diff --git a/espnet/nets/pytorch_backend/backbones/modules/resnet.py b/espnet/nets/pytorch_backend/backbones/modules/resnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..18b14a59c3049ecb2bd2e3680c9bfdce91f309f7
--- /dev/null
+++ b/espnet/nets/pytorch_backend/backbones/modules/resnet.py
@@ -0,0 +1,178 @@
+import math
+import torch.nn as nn
+import pdb
+
+from espnet.nets.pytorch_backend.transformer.convolution import Swish
+
+
+def conv3x3(in_planes, out_planes, stride=1):
+    """conv3x3.
+
+    :param in_planes: int, number of channels in the input sequence.
+    :param out_planes: int,  number of channels produced by the convolution.
+    :param stride: int, size of the convolving kernel.
+    """
+    return nn.Conv2d(
+        in_planes,
+        out_planes,
+        kernel_size=3,
+        stride=stride,
+        padding=1,
+        bias=False,
+    )
+
+
+def downsample_basic_block(inplanes, outplanes, stride):
+    """downsample_basic_block.
+
+    :param inplanes: int, number of channels in the input sequence.
+    :param outplanes: int, number of channels produced by the convolution.
+    :param stride: int, size of the convolving kernel.
+    """
+    return  nn.Sequential(
+        nn.Conv2d(
+            inplanes,
+            outplanes,
+            kernel_size=1,
+            stride=stride,
+            bias=False,
+            ),
+        nn.BatchNorm2d(outplanes),
+    )
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(
+        self,
+        inplanes,
+        planes,
+        stride=1,
+        downsample=None,
+        relu_type="swish",
+    ):
+        """__init__.
+
+        :param inplanes: int, number of channels in the input sequence.
+        :param planes: int,  number of channels produced by the convolution.
+        :param stride: int, size of the convolving kernel.
+        :param downsample: boolean, if True, the temporal resolution is downsampled.
+        :param relu_type: str, type of activation function.
+        """
+        super(BasicBlock, self).__init__()
+
+        assert relu_type in ["relu", "prelu", "swish"]
+
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = nn.BatchNorm2d(planes)
+
+        if relu_type == "relu":
+            self.relu1 = nn.ReLU(inplace=True)
+            self.relu2 = nn.ReLU(inplace=True)
+        elif relu_type == "prelu":
+            self.relu1 = nn.PReLU(num_parameters=planes)
+            self.relu2 = nn.PReLU(num_parameters=planes)
+        elif relu_type == "swish":
+            self.relu1 = Swish()
+            self.relu2 = Swish()
+        else:
+            raise NotImplementedError
+        # --------
+
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = nn.BatchNorm2d(planes)
+        
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        """forward.
+
+        :param x: torch.Tensor, input tensor with input size (B, C, T, H, W).
+        """
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu1(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu2(out)
+
+        return out
+
+
+class ResNet(nn.Module):
+
+    def __init__(
+        self,
+        block,
+        layers,
+        relu_type="swish",
+    ):
+        super(ResNet, self).__init__()
+        self.inplanes = 64
+        self.relu_type = relu_type
+        self.downsample_block = downsample_basic_block
+
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
+        self.avgpool = nn.AdaptiveAvgPool2d(1)
+
+
+    def _make_layer(self, block, planes, blocks, stride=1):
+        """_make_layer.
+
+        :param block: torch.nn.Module, class of blocks.
+        :param planes: int,  number of channels produced by the convolution.
+        :param blocks: int, number of layers in a block.
+        :param stride: int, size of the convolving kernel.
+        """
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = self.downsample_block(
+                inplanes=self.inplanes,
+                outplanes=planes*block.expansion,
+                stride=stride,
+            )
+
+        layers = []
+        layers.append(
+            block(
+                self.inplanes,
+                planes,
+                stride,
+                downsample,
+                relu_type=self.relu_type,
+            )
+        )
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(
+                block(
+                    self.inplanes,
+                    planes,
+                    relu_type=self.relu_type,
+                )
+            )
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        """forward.
+
+        :param x: torch.Tensor, input tensor with input size (B, C, T, H, W).
+        """
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        x = self.avgpool(x)
+        x = x.view(x.size(0), -1)
+        return x
diff --git a/espnet/nets/pytorch_backend/backbones/modules/resnet1d.py b/espnet/nets/pytorch_backend/backbones/modules/resnet1d.py
new file mode 100644
index 0000000000000000000000000000000000000000..adfec4a010871e7ddfe657e3474b08b5527720c6
--- /dev/null
+++ b/espnet/nets/pytorch_backend/backbones/modules/resnet1d.py
@@ -0,0 +1,213 @@
+import math
+import torch.nn as nn
+import pdb
+
+from espnet.nets.pytorch_backend.transformer.convolution import Swish
+
+
+def conv3x3(in_planes, out_planes, stride=1):
+    """conv3x3.
+
+    :param in_planes: int, number of channels in the input sequence.
+    :param out_planes: int,  number of channels produced by the convolution.
+    :param stride: int, size of the convolving kernel.
+    """
+    return nn.Conv1d(
+        in_planes,
+        out_planes,
+        kernel_size=3,
+        stride=stride,
+        padding=1,
+        bias=False,
+    )
+
+
+def downsample_basic_block(inplanes, outplanes, stride):
+    """downsample_basic_block.
+
+    :param inplanes: int, number of channels in the input sequence.
+    :param outplanes: int, number of channels produced by the convolution.
+    :param stride: int, size of the convolving kernel.
+    """
+    return  nn.Sequential(
+        nn.Conv1d(
+            inplanes,
+            outplanes,
+            kernel_size=1,
+            stride=stride,
+            bias=False,
+        ),
+        nn.BatchNorm1d(outplanes),
+    )
+
+
+class BasicBlock1D(nn.Module):
+    expansion = 1
+
+    def __init__(
+        self,
+        inplanes,
+        planes,
+        stride=1,
+        downsample=None,
+        relu_type="relu",
+    ):
+        """__init__.
+
+        :param inplanes: int, number of channels in the input sequence.
+        :param planes: int,  number of channels produced by the convolution.
+        :param stride: int, size of the convolving kernel.
+        :param downsample: boolean, if True, the temporal resolution is downsampled.
+        :param relu_type: str, type of activation function.
+        """
+        super(BasicBlock1D, self).__init__()
+
+        assert relu_type in ["relu","prelu", "swish"]
+
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = nn.BatchNorm1d(planes)
+
+        # type of ReLU is an input option
+        if relu_type == "relu":
+            self.relu1 = nn.ReLU(inplace=True)
+            self.relu2 = nn.ReLU(inplace=True)
+        elif relu_type == "prelu":
+            self.relu1 = nn.PReLU(num_parameters=planes)
+            self.relu2 = nn.PReLU(num_parameters=planes)
+        elif relu_type == "swish":
+            self.relu1 = Swish()
+            self.relu2 = Swish()
+        else:
+            raise NotImplementedError
+        # --------
+
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = nn.BatchNorm1d(planes)
+        
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        """forward.
+
+        :param x: torch.Tensor, input tensor with input size (B, C, T)
+        """
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu1(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu2(out)
+
+        return out
+
+
+class ResNet1D(nn.Module):
+
+    def __init__(self,
+        block,
+        layers,
+        relu_type="swish",
+        a_upsample_ratio=1,
+    ):
+        """__init__.
+
+        :param block: torch.nn.Module, class of blocks.
+        :param layers: List, customised layers in each block.
+        :param relu_type: str, type of activation function.
+        :param a_upsample_ratio: int, The ratio related to the \
+            temporal resolution of output features of the frontend. \
+            a_upsample_ratio=1 produce features with a fps of 25.
+        """
+        super(ResNet1D, self).__init__()
+        self.inplanes = 64
+        self.relu_type = relu_type
+        self.downsample_block = downsample_basic_block
+        self.a_upsample_ratio = a_upsample_ratio
+
+        self.conv1 = nn.Conv1d(
+            in_channels=1,
+            out_channels=self.inplanes,
+            kernel_size=80,
+            stride=4,
+            padding=38,
+            bias=False,
+        )
+        self.bn1 = nn.BatchNorm1d(self.inplanes)
+
+        if relu_type == "relu":
+            self.relu = nn.ReLU(inplace=True)
+        elif relu_type == "prelu":
+            self.relu = nn.PReLU(num_parameters=self.inplanes)
+        elif relu_type == "swish":
+            self.relu = Swish()
+
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
+        self.avgpool = nn.AvgPool1d(
+            kernel_size=20//self.a_upsample_ratio,
+            stride=20//self.a_upsample_ratio,
+        )
+
+
+    def _make_layer(self, block, planes, blocks, stride=1):
+        """_make_layer.
+
+        :param block: torch.nn.Module, class of blocks.
+        :param planes: int,  number of channels produced by the convolution.
+        :param blocks: int, number of layers in a block.
+        :param stride: int, size of the convolving kernel.
+        """
+
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = self.downsample_block(
+                inplanes=self.inplanes,
+                outplanes=planes*block.expansion,
+                stride=stride,
+            )
+
+        layers = []
+        layers.append(
+            block(
+                self.inplanes,
+                planes,
+                stride,
+                downsample,
+                relu_type=self.relu_type,
+            )
+        )
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(
+                block(
+                    self.inplanes,
+                    planes,
+                    relu_type=self.relu_type,
+                )
+            )
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        """forward.
+
+        :param x: torch.Tensor, input tensor with input size (B, C, T)
+        """
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        x = self.avgpool(x)
+        return x
diff --git a/espnet/nets/pytorch_backend/backbones/modules/shufflenetv2.py b/espnet/nets/pytorch_backend/backbones/modules/shufflenetv2.py
new file mode 100644
index 0000000000000000000000000000000000000000..53db7a8163ddb1d74d88e7d0a4d8824646918a6c
--- /dev/null
+++ b/espnet/nets/pytorch_backend/backbones/modules/shufflenetv2.py
@@ -0,0 +1,165 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Variable
+from collections import OrderedDict
+from torch.nn import init
+import math
+
+import pdb
+
+def conv_bn(inp, oup, stride):
+    return nn.Sequential(
+        nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
+        nn.BatchNorm2d(oup),
+        nn.ReLU(inplace=True)
+    )
+
+
+def conv_1x1_bn(inp, oup):
+    return nn.Sequential(
+        nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
+        nn.BatchNorm2d(oup),
+        nn.ReLU(inplace=True)
+    )
+
+def channel_shuffle(x, groups):
+    batchsize, num_channels, height, width = x.data.size()
+
+    channels_per_group = num_channels // groups
+    
+    # reshape
+    x = x.view(batchsize, groups, 
+        channels_per_group, height, width)
+
+    x = torch.transpose(x, 1, 2).contiguous()
+
+    # flatten
+    x = x.view(batchsize, -1, height, width)
+
+    return x
+    
+class InvertedResidual(nn.Module):
+    def __init__(self, inp, oup, stride, benchmodel):
+        super(InvertedResidual, self).__init__()
+        self.benchmodel = benchmodel
+        self.stride = stride
+        assert stride in [1, 2]
+
+        oup_inc = oup//2
+        
+        if self.benchmodel == 1:
+            #assert inp == oup_inc
+            self.banch2 = nn.Sequential(
+                # pw
+                nn.Conv2d(oup_inc, oup_inc, 1, 1, 0, bias=False),
+                nn.BatchNorm2d(oup_inc),
+                nn.ReLU(inplace=True),
+                # dw
+                nn.Conv2d(oup_inc, oup_inc, 3, stride, 1, groups=oup_inc, bias=False),
+                nn.BatchNorm2d(oup_inc),
+                # pw-linear
+                nn.Conv2d(oup_inc, oup_inc, 1, 1, 0, bias=False),
+                nn.BatchNorm2d(oup_inc),
+                nn.ReLU(inplace=True),
+            )                
+        else:                  
+            self.banch1 = nn.Sequential(
+                # dw
+                nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
+                nn.BatchNorm2d(inp),
+                # pw-linear
+                nn.Conv2d(inp, oup_inc, 1, 1, 0, bias=False),
+                nn.BatchNorm2d(oup_inc),
+                nn.ReLU(inplace=True),
+            )        
+    
+            self.banch2 = nn.Sequential(
+                # pw
+                nn.Conv2d(inp, oup_inc, 1, 1, 0, bias=False),
+                nn.BatchNorm2d(oup_inc),
+                nn.ReLU(inplace=True),
+                # dw
+                nn.Conv2d(oup_inc, oup_inc, 3, stride, 1, groups=oup_inc, bias=False),
+                nn.BatchNorm2d(oup_inc),
+                # pw-linear
+                nn.Conv2d(oup_inc, oup_inc, 1, 1, 0, bias=False),
+                nn.BatchNorm2d(oup_inc),
+                nn.ReLU(inplace=True),
+            )
+          
+    @staticmethod
+    def _concat(x, out):
+        # concatenate along channel axis
+        return torch.cat((x, out), 1)        
+
+    def forward(self, x):
+        if 1==self.benchmodel:
+            x1 = x[:, :(x.shape[1]//2), :, :]
+            x2 = x[:, (x.shape[1]//2):, :, :]
+            out = self._concat(x1, self.banch2(x2))
+        elif 2==self.benchmodel:
+            out = self._concat(self.banch1(x), self.banch2(x))
+
+        return channel_shuffle(out, 2)
+
+
+class ShuffleNetV2(nn.Module):
+    def __init__(self, n_class=1000, input_size=224, width_mult=2.):
+        super(ShuffleNetV2, self).__init__()
+        
+        assert input_size % 32 == 0, "Input size needs to be divisible by 32"
+        
+        self.stage_repeats = [4, 8, 4]
+        # index 0 is invalid and should never be called.
+        # only used for indexing convenience.
+        if width_mult == 0.5:
+            self.stage_out_channels = [-1, 24,  48,  96, 192, 1024]
+        elif width_mult == 1.0:
+            self.stage_out_channels = [-1, 24, 116, 232, 464, 1024]
+        elif width_mult == 1.5:
+            self.stage_out_channels = [-1, 24, 176, 352, 704, 1024]
+        elif width_mult == 2.0:
+            self.stage_out_channels = [-1, 24, 244, 488, 976, 2048]
+        else:
+            raise ValueError(
+                """Width multiplier should be in [0.5, 1.0, 1.5, 2.0]. Current value: {}""".format(width_mult))
+
+        # building first layer
+        input_channel = self.stage_out_channels[1]
+        self.conv1 = conv_bn(3, input_channel, 2)    
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        
+        self.features = []
+        # building inverted residual blocks
+        for idxstage in range(len(self.stage_repeats)):
+            numrepeat = self.stage_repeats[idxstage]
+            output_channel = self.stage_out_channels[idxstage+2]
+            for i in range(numrepeat):
+                if i == 0:
+                #inp, oup, stride, benchmodel):
+                    self.features.append(InvertedResidual(input_channel, output_channel, 2, 2))
+                else:
+                    self.features.append(InvertedResidual(input_channel, output_channel, 1, 1))
+                input_channel = output_channel
+                
+                
+        # make it nn.Sequential
+        self.features = nn.Sequential(*self.features)
+
+        # building last several layers
+        self.conv_last  = conv_1x1_bn(input_channel, self.stage_out_channels[-1])
+        self.globalpool = nn.Sequential(nn.AvgPool2d(int(input_size/32)))              
+        
+        # building classifier
+        self.classifier = nn.Sequential(nn.Linear(self.stage_out_channels[-1], n_class))
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.maxpool(x)
+        x = self.features(x)
+        x = self.conv_last(x)
+        x = self.globalpool(x)
+        x = x.view(-1, self.stage_out_channels[-1])
+        x = self.classifier(x)
+        return x
diff --git a/espnet/nets/pytorch_backend/ctc.py b/espnet/nets/pytorch_backend/ctc.py
new file mode 100644
index 0000000000000000000000000000000000000000..60e176df9268c83fd6251b64f823c6b1c512641d
--- /dev/null
+++ b/espnet/nets/pytorch_backend/ctc.py
@@ -0,0 +1,283 @@
+from distutils.version import LooseVersion
+import logging
+
+import numpy as np
+import six
+import torch
+import torch.nn.functional as F
+
+from espnet.nets.pytorch_backend.nets_utils import to_device
+
+
+class CTC(torch.nn.Module):
+    """CTC module
+
+    :param int odim: dimension of outputs
+    :param int eprojs: number of encoder projection units
+    :param float dropout_rate: dropout rate (0.0 ~ 1.0)
+    :param str ctc_type: builtin or warpctc
+    :param bool reduce: reduce the CTC loss into a scalar
+    """
+
+    def __init__(self, odim, eprojs, dropout_rate, ctc_type="warpctc", reduce=True):
+        super().__init__()
+        self.dropout_rate = dropout_rate
+        self.loss = None
+        self.ctc_lo = torch.nn.Linear(eprojs, odim)
+        self.dropout = torch.nn.Dropout(dropout_rate)
+        self.probs = None  # for visualization
+
+        # In case of Pytorch >= 1.7.0, CTC will be always builtin
+        self.ctc_type = (
+            ctc_type
+            if LooseVersion(torch.__version__) < LooseVersion("1.7.0")
+            else "builtin"
+        )
+
+        if self.ctc_type == "builtin":
+            reduction_type = "sum" if reduce else "none"
+            self.ctc_loss = torch.nn.CTCLoss(
+                reduction=reduction_type, zero_infinity=True
+            )
+        elif self.ctc_type == "cudnnctc":
+            reduction_type = "sum" if reduce else "none"
+            self.ctc_loss = torch.nn.CTCLoss(reduction=reduction_type)
+        elif self.ctc_type == "warpctc":
+            import warpctc_pytorch as warp_ctc
+
+            self.ctc_loss = warp_ctc.CTCLoss(size_average=True, reduce=reduce)
+        elif self.ctc_type == "gtnctc":
+            from espnet.nets.pytorch_backend.gtn_ctc import GTNCTCLossFunction
+
+            self.ctc_loss = GTNCTCLossFunction.apply
+        else:
+            raise ValueError(
+                'ctc_type must be "builtin" or "warpctc": {}'.format(self.ctc_type)
+            )
+
+        self.ignore_id = -1
+        self.reduce = reduce
+
+    def loss_fn(self, th_pred, th_target, th_ilen, th_olen):
+        if self.ctc_type in ["builtin", "cudnnctc"]:
+            th_pred = th_pred.log_softmax(2)
+            # Use the deterministic CuDNN implementation of CTC loss to avoid
+            #  [issue#17798](https://github.com/pytorch/pytorch/issues/17798)
+            with torch.backends.cudnn.flags(deterministic=True):
+                loss = self.ctc_loss(th_pred, th_target, th_ilen, th_olen)
+            # Batch-size average
+            loss = loss / th_pred.size(1)
+            return loss
+        elif self.ctc_type == "warpctc":
+            return self.ctc_loss(th_pred, th_target, th_ilen, th_olen)
+        elif self.ctc_type == "gtnctc":
+            targets = [t.tolist() for t in th_target]
+            log_probs = torch.nn.functional.log_softmax(th_pred, dim=2)
+            return self.ctc_loss(log_probs, targets, th_ilen, 0, "none")
+        else:
+            raise NotImplementedError
+
+    def forward(self, hs_pad, hlens, ys_pad):
+        """CTC forward
+
+        :param torch.Tensor hs_pad: batch of padded hidden state sequences (B, Tmax, D)
+        :param torch.Tensor hlens: batch of lengths of hidden state sequences (B)
+        :param torch.Tensor ys_pad:
+            batch of padded character id sequence tensor (B, Lmax)
+        :return: ctc loss value
+        :rtype: torch.Tensor
+        """
+        # TODO(kan-bayashi): need to make more smart way
+        ys = [y[y != self.ignore_id] for y in ys_pad]  # parse padded ys
+
+        # zero padding for hs
+        ys_hat = self.ctc_lo(self.dropout(hs_pad))
+        if self.ctc_type != "gtnctc":
+            ys_hat = ys_hat.transpose(0, 1)
+
+        if self.ctc_type == "builtin":
+            olens = to_device(ys_hat, torch.LongTensor([len(s) for s in ys]))
+            hlens = hlens.long()
+            ys_pad = torch.cat(ys)  # without this the code breaks for asr_mix
+            self.loss = self.loss_fn(ys_hat, ys_pad, hlens, olens)
+        else:
+            self.loss = None
+            hlens = torch.from_numpy(np.fromiter(hlens, dtype=np.int32))
+            olens = torch.from_numpy(
+                np.fromiter((x.size(0) for x in ys), dtype=np.int32)
+            )
+            # zero padding for ys
+            ys_true = torch.cat(ys).cpu().int()  # batch x olen
+            # get ctc loss
+            # expected shape of seqLength x batchSize x alphabet_size
+            dtype = ys_hat.dtype
+            if self.ctc_type == "warpctc" or dtype == torch.float16:
+                # warpctc only supports float32
+                # torch.ctc does not support float16 (#1751)
+                ys_hat = ys_hat.to(dtype=torch.float32)
+            if self.ctc_type == "cudnnctc":
+                # use GPU when using the cuDNN implementation
+                ys_true = to_device(hs_pad, ys_true)
+            if self.ctc_type == "gtnctc":
+                # keep as list for gtn
+                ys_true = ys
+            self.loss = to_device(
+                hs_pad, self.loss_fn(ys_hat, ys_true, hlens, olens)
+            ).to(dtype=dtype)
+
+        # get length info
+        logging.info(
+            self.__class__.__name__
+            + " input lengths:  "
+            + "".join(str(hlens).split("\n"))
+        )
+        logging.info(
+            self.__class__.__name__
+            + " output lengths: "
+            + "".join(str(olens).split("\n"))
+        )
+
+        if self.reduce:
+            # NOTE: sum() is needed to keep consistency
+            # since warpctc return as tensor w/ shape (1,)
+            # but builtin return as tensor w/o shape (scalar).
+            self.loss = self.loss.sum()
+            logging.info("ctc loss:" + str(float(self.loss)))
+
+        return self.loss
+
+    def softmax(self, hs_pad):
+        """softmax of frame activations
+
+        :param torch.Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
+        :return: log softmax applied 3d tensor (B, Tmax, odim)
+        :rtype: torch.Tensor
+        """
+        self.probs = F.softmax(self.ctc_lo(hs_pad), dim=2)
+        return self.probs
+
+    def log_softmax(self, hs_pad):
+        """log_softmax of frame activations
+
+        :param torch.Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
+        :return: log softmax applied 3d tensor (B, Tmax, odim)
+        :rtype: torch.Tensor
+        """
+        return F.log_softmax(self.ctc_lo(hs_pad), dim=2)
+
+    def argmax(self, hs_pad):
+        """argmax of frame activations
+
+        :param torch.Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
+        :return: argmax applied 2d tensor (B, Tmax)
+        :rtype: torch.Tensor
+        """
+        return torch.argmax(self.ctc_lo(hs_pad), dim=2)
+
+    def forced_align(self, h, y, blank_id=0):
+        """forced alignment.
+
+        :param torch.Tensor h: hidden state sequence, 2d tensor (T, D)
+        :param torch.Tensor y: id sequence tensor 1d tensor (L)
+        :param int y: blank symbol index
+        :return: best alignment results
+        :rtype: list
+        """
+
+        def interpolate_blank(label, blank_id=0):
+            """Insert blank token between every two label token."""
+            label = np.expand_dims(label, 1)
+            blanks = np.zeros((label.shape[0], 1), dtype=np.int64) + blank_id
+            label = np.concatenate([blanks, label], axis=1)
+            label = label.reshape(-1)
+            label = np.append(label, label[0])
+            return label
+
+        lpz = self.log_softmax(h)
+        lpz = lpz.squeeze(0)
+
+        y_int = interpolate_blank(y, blank_id)
+
+        logdelta = np.zeros((lpz.size(0), len(y_int))) - 100000000000.0  # log of zero
+        state_path = (
+            np.zeros((lpz.size(0), len(y_int)), dtype=np.int16) - 1
+        )  # state path
+
+        logdelta[0, 0] = lpz[0][y_int[0]]
+        logdelta[0, 1] = lpz[0][y_int[1]]
+
+        for t in six.moves.range(1, lpz.size(0)):
+            for s in six.moves.range(len(y_int)):
+                if y_int[s] == blank_id or s < 2 or y_int[s] == y_int[s - 2]:
+                    candidates = np.array([logdelta[t - 1, s], logdelta[t - 1, s - 1]])
+                    prev_state = [s, s - 1]
+                else:
+                    candidates = np.array(
+                        [
+                            logdelta[t - 1, s],
+                            logdelta[t - 1, s - 1],
+                            logdelta[t - 1, s - 2],
+                        ]
+                    )
+                    prev_state = [s, s - 1, s - 2]
+                logdelta[t, s] = np.max(candidates) + lpz[t][y_int[s]]
+                state_path[t, s] = prev_state[np.argmax(candidates)]
+
+        state_seq = -1 * np.ones((lpz.size(0), 1), dtype=np.int16)
+
+        candidates = np.array(
+            [logdelta[-1, len(y_int) - 1], logdelta[-1, len(y_int) - 2]]
+        )
+        prev_state = [len(y_int) - 1, len(y_int) - 2]
+        state_seq[-1] = prev_state[np.argmax(candidates)]
+        for t in six.moves.range(lpz.size(0) - 2, -1, -1):
+            state_seq[t] = state_path[t + 1, state_seq[t + 1, 0]]
+
+        output_state_seq = []
+        for t in six.moves.range(0, lpz.size(0)):
+            output_state_seq.append(y_int[state_seq[t, 0]])
+
+        return output_state_seq
+
+
+def ctc_for(args, odim, reduce=True):
+    """Returns the CTC module for the given args and output dimension
+
+    :param Namespace args: the program args
+    :param int odim : The output dimension
+    :param bool reduce : return the CTC loss in a scalar
+    :return: the corresponding CTC module
+    """
+    num_encs = getattr(args, "num_encs", 1)  # use getattr to keep compatibility
+    if num_encs == 1:
+        # compatible with single encoder asr mode
+        return CTC(
+            odim, args.eprojs, args.dropout_rate, ctc_type=args.ctc_type, reduce=reduce
+        )
+    elif num_encs >= 1:
+        ctcs_list = torch.nn.ModuleList()
+        if args.share_ctc:
+            # use dropout_rate of the first encoder
+            ctc = CTC(
+                odim,
+                args.eprojs,
+                args.dropout_rate[0],
+                ctc_type=args.ctc_type,
+                reduce=reduce,
+            )
+            ctcs_list.append(ctc)
+        else:
+            for idx in range(num_encs):
+                ctc = CTC(
+                    odim,
+                    args.eprojs,
+                    args.dropout_rate[idx],
+                    ctc_type=args.ctc_type,
+                    reduce=reduce,
+                )
+                ctcs_list.append(ctc)
+        return ctcs_list
+    else:
+        raise ValueError(
+            "Number of encoders needs to be more than one. {}".format(num_encs)
+        )
diff --git a/espnet/nets/pytorch_backend/e2e_asr_transformer.py b/espnet/nets/pytorch_backend/e2e_asr_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..ac89e0a9e3b80e3dd8afdabf10fe07eedd4b469b
--- /dev/null
+++ b/espnet/nets/pytorch_backend/e2e_asr_transformer.py
@@ -0,0 +1,320 @@
+# Copyright 2019 Shigeki Karita
+#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+"""Transformer speech recognition model (pytorch)."""
+
+from argparse import Namespace
+from distutils.util import strtobool
+import logging
+import math
+
+import numpy
+import torch
+
+from espnet.nets.ctc_prefix_score import CTCPrefixScore
+from espnet.nets.e2e_asr_common import end_detect
+from espnet.nets.e2e_asr_common import ErrorCalculator
+from espnet.nets.pytorch_backend.ctc import CTC
+from espnet.nets.pytorch_backend.nets_utils import get_subsample
+from espnet.nets.pytorch_backend.nets_utils import make_non_pad_mask
+from espnet.nets.pytorch_backend.nets_utils import th_accuracy
+from espnet.nets.pytorch_backend.transformer.add_sos_eos import add_sos_eos
+from espnet.nets.pytorch_backend.transformer.attention import (
+    MultiHeadedAttention,  # noqa: H301
+    RelPositionMultiHeadedAttention,  # noqa: H301
+)
+from espnet.nets.pytorch_backend.transformer.decoder import Decoder
+from espnet.nets.pytorch_backend.transformer.encoder import Encoder
+from espnet.nets.pytorch_backend.transformer.label_smoothing_loss import (
+    LabelSmoothingLoss,  # noqa: H301
+)
+from espnet.nets.pytorch_backend.transformer.mask import subsequent_mask
+from espnet.nets.pytorch_backend.transformer.mask import target_mask
+from espnet.nets.scorers.ctc import CTCPrefixScorer
+
+
+class E2E(torch.nn.Module):
+    """E2E module.
+
+    :param int idim: dimension of inputs
+    :param int odim: dimension of outputs
+    :param Namespace args: argument Namespace containing options
+
+    """
+
+    @staticmethod
+    def add_arguments(parser):
+        """Add arguments."""
+        group = parser.add_argument_group("transformer model setting")
+
+        group.add_argument(
+            "--transformer-init",
+            type=str,
+            default="pytorch",
+            choices=[
+                "pytorch",
+                "xavier_uniform",
+                "xavier_normal",
+                "kaiming_uniform",
+                "kaiming_normal",
+            ],
+            help="how to initialize transformer parameters",
+        )
+        group.add_argument(
+            "--transformer-input-layer",
+            type=str,
+            default="conv2d",
+            choices=["conv3d", "conv2d", "conv1d", "linear", "embed"],
+            help="transformer input layer type",
+        )
+        group.add_argument(
+            "--transformer-encoder-attn-layer-type",
+            type=str,
+            default="mha",
+            choices=["mha", "rel_mha", "legacy_rel_mha"],
+            help="transformer encoder attention layer type",
+        )
+        group.add_argument(
+            "--transformer-attn-dropout-rate",
+            default=None,
+            type=float,
+            help="dropout in transformer attention. use --dropout-rate if None is set",
+        )
+        group.add_argument(
+            "--transformer-lr",
+            default=10.0,
+            type=float,
+            help="Initial value of learning rate",
+        )
+        group.add_argument(
+            "--transformer-warmup-steps",
+            default=25000,
+            type=int,
+            help="optimizer warmup steps",
+        )
+        group.add_argument(
+            "--transformer-length-normalized-loss",
+            default=True,
+            type=strtobool,
+            help="normalize loss by length",
+        )
+        group.add_argument(
+            "--dropout-rate",
+            default=0.0,
+            type=float,
+            help="Dropout rate for the encoder",
+        )
+        group.add_argument(
+            "--macaron-style",
+            default=False,
+            type=strtobool,
+            help="Whether to use macaron style for positionwise layer",
+        )
+        # -- input
+        group.add_argument(
+            "--a-upsample-ratio",
+            default=1,
+            type=int,
+            help="Upsample rate for audio",
+        )
+        group.add_argument(
+            "--relu-type",
+            default="swish",
+            type=str,
+            help="the type of activation layer",
+        )
+        # Encoder
+        group.add_argument(
+            "--elayers",
+            default=4,
+            type=int,
+            help="Number of encoder layers (for shared recognition part "
+            "in multi-speaker asr mode)",
+        )
+        group.add_argument(
+            "--eunits",
+            "-u",
+            default=300,
+            type=int,
+            help="Number of encoder hidden units",
+        )
+        group.add_argument(
+            "--use-cnn-module",
+            default=False,
+            type=strtobool,
+            help="Use convolution module or not",
+        )
+        group.add_argument(
+            "--cnn-module-kernel",
+            default=31,
+            type=int,
+            help="Kernel size of convolution module.",
+        )
+        # Attention
+        group.add_argument(
+            "--adim",
+            default=320,
+            type=int,
+            help="Number of attention transformation dimensions",
+        )
+        group.add_argument(
+            "--aheads",
+            default=4,
+            type=int,
+            help="Number of heads for multi head attention",
+        )
+        group.add_argument(
+            "--zero-triu",
+            default=False,
+            type=strtobool,
+            help="If true, zero the uppper triangular part of attention matrix.",
+        )
+        # Relative positional encoding
+        group.add_argument(
+            "--rel-pos-type",
+            type=str,
+            default="legacy",
+            choices=["legacy", "latest"],
+            help="Whether to use the latest relative positional encoding or the legacy one."
+            "The legacy relative positional encoding will be deprecated in the future."
+            "More Details can be found in https://github.com/espnet/espnet/pull/2816.",
+        )
+        # Decoder
+        group.add_argument(
+            "--dlayers", default=1, type=int, help="Number of decoder layers"
+        )
+        group.add_argument(
+            "--dunits", default=320, type=int, help="Number of decoder hidden units"
+        )
+        # -- pretrain
+        group.add_argument("--pretrain-dataset",
+            default="",
+            type=str,
+            help='pre-trained dataset for encoder'
+        )
+        # -- custom name
+        group.add_argument("--custom-pretrain-name",
+            default="",
+            type=str,
+            help='pre-trained model for encoder'
+        )
+        return parser
+
+    @property
+    def attention_plot_class(self):
+        """Return PlotAttentionReport."""
+        return PlotAttentionReport
+
+    def __init__(self, odim, args, ignore_id=-1):
+        """Construct an E2E object.
+        :param int odim: dimension of outputs
+        :param Namespace args: argument Namespace containing options
+        """
+        torch.nn.Module.__init__(self)
+        if args.transformer_attn_dropout_rate is None:
+            args.transformer_attn_dropout_rate = args.dropout_rate
+        # Check the relative positional encoding type
+        self.rel_pos_type = getattr(args, "rel_pos_type", None)
+        if self.rel_pos_type is None and args.transformer_encoder_attn_layer_type == "rel_mha":
+            args.transformer_encoder_attn_layer_type = "legacy_rel_mha"
+            logging.warning(
+                "Using legacy_rel_pos and it will be deprecated in the future."
+            )
+
+        idim = 80
+
+        self.encoder = Encoder(
+            idim=idim,
+            attention_dim=args.adim,
+            attention_heads=args.aheads,
+            linear_units=args.eunits,
+            num_blocks=args.elayers,
+            input_layer=args.transformer_input_layer,
+            dropout_rate=args.dropout_rate,
+            positional_dropout_rate=args.dropout_rate,
+            attention_dropout_rate=args.transformer_attn_dropout_rate,
+            encoder_attn_layer_type=args.transformer_encoder_attn_layer_type,
+            macaron_style=args.macaron_style,
+            use_cnn_module=args.use_cnn_module,
+            cnn_module_kernel=args.cnn_module_kernel,
+            zero_triu=getattr(args, "zero_triu", False),
+            a_upsample_ratio=args.a_upsample_ratio,
+            relu_type=getattr(args, "relu_type", "swish"),
+        )
+
+        self.transformer_input_layer = args.transformer_input_layer
+        self.a_upsample_ratio = args.a_upsample_ratio
+
+        if args.mtlalpha < 1:
+            self.decoder = Decoder(
+                odim=odim,
+                attention_dim=args.adim,
+                attention_heads=args.aheads,
+                linear_units=args.dunits,
+                num_blocks=args.dlayers,
+                dropout_rate=args.dropout_rate,
+                positional_dropout_rate=args.dropout_rate,
+                self_attention_dropout_rate=args.transformer_attn_dropout_rate,
+                src_attention_dropout_rate=args.transformer_attn_dropout_rate,
+            )
+        else:
+            self.decoder = None
+        self.blank = 0
+        self.sos = odim - 1
+        self.eos = odim - 1
+        self.odim = odim
+        self.ignore_id = ignore_id
+        self.subsample = get_subsample(args, mode="asr", arch="transformer")
+
+        # self.lsm_weight = a
+        self.criterion = LabelSmoothingLoss(
+            self.odim,
+            self.ignore_id,
+            args.lsm_weight,
+            args.transformer_length_normalized_loss,
+        )
+
+        self.adim = args.adim
+        self.mtlalpha = args.mtlalpha
+        if args.mtlalpha > 0.0:
+            self.ctc = CTC(
+                odim, args.adim, args.dropout_rate, ctc_type=args.ctc_type, reduce=True
+            )
+        else:
+            self.ctc = None
+
+        if args.report_cer or args.report_wer:
+            self.error_calculator = ErrorCalculator(
+                args.char_list,
+                args.sym_space,
+                args.sym_blank,
+                args.report_cer,
+                args.report_wer,
+            )
+        else:
+            self.error_calculator = None
+        self.rnnlm = None
+
+    def scorers(self):
+        """Scorers."""
+        return dict(decoder=self.decoder, ctc=CTCPrefixScorer(self.ctc, self.eos))
+
+    def encode(self, x, extract_resnet_feats=False):
+        """Encode acoustic features.
+
+        :param ndarray x: source acoustic feature (T, D)
+        :return: encoder outputs
+        :rtype: torch.Tensor
+        """
+        self.eval()
+        x = torch.as_tensor(x).unsqueeze(0)
+        if extract_resnet_feats:
+            resnet_feats = self.encoder(
+                x,
+                None,
+                extract_resnet_feats=extract_resnet_feats,
+            )
+            return resnet_feats.squeeze(0)
+        else:
+            enc_output, _ = self.encoder(x, None)
+            return enc_output.squeeze(0)
diff --git a/espnet/nets/pytorch_backend/e2e_asr_transformer_av.py b/espnet/nets/pytorch_backend/e2e_asr_transformer_av.py
new file mode 100644
index 0000000000000000000000000000000000000000..9f4cb81b933da330933807bda6c32735d0df0f25
--- /dev/null
+++ b/espnet/nets/pytorch_backend/e2e_asr_transformer_av.py
@@ -0,0 +1,352 @@
+# Copyright 2019 Shigeki Karita
+#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+"""Transformer speech recognition model (pytorch)."""
+
+from argparse import Namespace
+from distutils.util import strtobool
+import logging
+import math
+
+import numpy
+import torch
+
+from espnet.nets.ctc_prefix_score import CTCPrefixScore
+from espnet.nets.e2e_asr_common import end_detect
+from espnet.nets.e2e_asr_common import ErrorCalculator
+from espnet.nets.pytorch_backend.ctc import CTC
+from espnet.nets.pytorch_backend.nets_utils import get_subsample
+from espnet.nets.pytorch_backend.nets_utils import make_non_pad_mask
+from espnet.nets.pytorch_backend.nets_utils import th_accuracy
+from espnet.nets.pytorch_backend.transformer.add_sos_eos import add_sos_eos
+from espnet.nets.pytorch_backend.transformer.attention import (
+    MultiHeadedAttention,  # noqa: H301
+    RelPositionMultiHeadedAttention,  # noqa: H301
+)
+from espnet.nets.pytorch_backend.transformer.decoder import Decoder
+from espnet.nets.pytorch_backend.transformer.encoder import Encoder
+from espnet.nets.pytorch_backend.transformer.label_smoothing_loss import (
+    LabelSmoothingLoss,  # noqa: H301
+)
+from espnet.nets.pytorch_backend.transformer.mask import subsequent_mask
+from espnet.nets.pytorch_backend.transformer.mask import target_mask
+from espnet.nets.scorers.ctc import CTCPrefixScorer
+from espnet.nets.pytorch_backend.nets_utils import MLPHead
+
+
+class E2E(torch.nn.Module):
+    """E2E module.
+
+    :param int idim: dimension of inputs
+    :param int odim: dimension of outputs
+    :param Namespace args: argument Namespace containing options
+
+    """
+
+    @staticmethod
+    def add_arguments(parser):
+        """Add arguments."""
+        group = parser.add_argument_group("transformer model setting")
+
+        group.add_argument(
+            "--transformer-init",
+            type=str,
+            default="pytorch",
+            choices=[
+                "pytorch",
+                "xavier_uniform",
+                "xavier_normal",
+                "kaiming_uniform",
+                "kaiming_normal",
+            ],
+            help="how to initialize transformer parameters",
+        )
+        group.add_argument(
+            "--transformer-input-layer",
+            type=str,
+            default="conv2d",
+            choices=["conv3d", "conv2d", "conv1d", "linear", "embed"],
+            help="transformer input layer type",
+        )
+        group.add_argument(
+            "--transformer-encoder-attn-layer-type",
+            type=str,
+            default="mha",
+            choices=["mha", "rel_mha", "legacy_rel_mha"],
+            help="transformer encoder attention layer type",
+        )
+        group.add_argument(
+            "--transformer-attn-dropout-rate",
+            default=None,
+            type=float,
+            help="dropout in transformer attention. use --dropout-rate if None is set",
+        )
+        group.add_argument(
+            "--transformer-lr",
+            default=10.0,
+            type=float,
+            help="Initial value of learning rate",
+        )
+        group.add_argument(
+            "--transformer-warmup-steps",
+            default=25000,
+            type=int,
+            help="optimizer warmup steps",
+        )
+        group.add_argument(
+            "--transformer-length-normalized-loss",
+            default=True,
+            type=strtobool,
+            help="normalize loss by length",
+        )
+        group.add_argument(
+            "--dropout-rate",
+            default=0.0,
+            type=float,
+            help="Dropout rate for the encoder",
+        )
+        group.add_argument(
+            "--macaron-style",
+            default=False,
+            type=strtobool,
+            help="Whether to use macaron style for positionwise layer",
+        )
+        # -- input
+        group.add_argument(
+            "--a-upsample-ratio",
+            default=1,
+            type=int,
+            help="Upsample rate for audio",
+        )
+        group.add_argument(
+            "--relu-type",
+            default="swish",
+            type=str,
+            help="the type of activation layer",
+        )
+        # Encoder
+        group.add_argument(
+            "--elayers",
+            default=4,
+            type=int,
+            help="Number of encoder layers (for shared recognition part "
+            "in multi-speaker asr mode)",
+        )
+        group.add_argument(
+            "--eunits",
+            "-u",
+            default=300,
+            type=int,
+            help="Number of encoder hidden units",
+        )
+        group.add_argument(
+            "--use-cnn-module",
+            default=False,
+            type=strtobool,
+            help="Use convolution module or not",
+        )
+        group.add_argument(
+            "--cnn-module-kernel",
+            default=31,
+            type=int,
+            help="Kernel size of convolution module.",
+        )
+        # Attention
+        group.add_argument(
+            "--adim",
+            default=320,
+            type=int,
+            help="Number of attention transformation dimensions",
+        )
+        group.add_argument(
+            "--aheads",
+            default=4,
+            type=int,
+            help="Number of heads for multi head attention",
+        )
+        group.add_argument(
+            "--zero-triu",
+            default=False,
+            type=strtobool,
+            help="If true, zero the uppper triangular part of attention matrix.",
+        )
+        # Relative positional encoding
+        group.add_argument(
+            "--rel-pos-type",
+            type=str,
+            default="legacy",
+            choices=["legacy", "latest"],
+            help="Whether to use the latest relative positional encoding or the legacy one."
+            "The legacy relative positional encoding will be deprecated in the future."
+            "More Details can be found in https://github.com/espnet/espnet/pull/2816.",
+        )
+        # Decoder
+        group.add_argument(
+            "--dlayers", default=1, type=int, help="Number of decoder layers"
+        )
+        group.add_argument(
+            "--dunits", default=320, type=int, help="Number of decoder hidden units"
+        )
+        # -- pretrain
+        group.add_argument("--pretrain-dataset",
+            default="",
+            type=str,
+            help='pre-trained dataset for encoder'
+        )
+        # -- custom name
+        group.add_argument("--custom-pretrain-name",
+            default="",
+            type=str,
+            help='pre-trained model for encoder'
+        )
+        return parser
+
+    @property
+    def attention_plot_class(self):
+        """Return PlotAttentionReport."""
+        return PlotAttentionReport
+
+    def __init__(self, odim, args, ignore_id=-1):
+        """Construct an E2E object.
+        :param int odim: dimension of outputs
+        :param Namespace args: argument Namespace containing options
+        """
+        torch.nn.Module.__init__(self)
+        if args.transformer_attn_dropout_rate is None:
+            args.transformer_attn_dropout_rate = args.dropout_rate
+        # Check the relative positional encoding type
+        self.rel_pos_type = getattr(args, "rel_pos_type", None)
+        if self.rel_pos_type is None and args.transformer_encoder_attn_layer_type == "rel_mha":
+            args.transformer_encoder_attn_layer_type = "legacy_rel_mha"
+            logging.warning(
+                "Using legacy_rel_pos and it will be deprecated in the future."
+            )
+
+        idim = 80
+
+        self.encoder = Encoder(
+            idim=idim,
+            attention_dim=args.adim,
+            attention_heads=args.aheads,
+            linear_units=args.eunits,
+            num_blocks=args.elayers,
+            input_layer=args.transformer_input_layer,
+            dropout_rate=args.dropout_rate,
+            positional_dropout_rate=args.dropout_rate,
+            attention_dropout_rate=args.transformer_attn_dropout_rate,
+            encoder_attn_layer_type=args.transformer_encoder_attn_layer_type,
+            macaron_style=args.macaron_style,
+            use_cnn_module=args.use_cnn_module,
+            cnn_module_kernel=args.cnn_module_kernel,
+            zero_triu=getattr(args, "zero_triu", False),
+            a_upsample_ratio=args.a_upsample_ratio,
+            relu_type=getattr(args, "relu_type", "swish"),
+        )
+
+        self.transformer_input_layer = args.transformer_input_layer
+        self.a_upsample_ratio = args.a_upsample_ratio
+
+        self.aux_encoder = Encoder(
+            idim=idim,
+            attention_dim=args.aux_adim,
+            attention_heads=args.aux_aheads,
+            linear_units=args.aux_eunits,
+            num_blocks=args.aux_elayers,
+            input_layer=args.aux_transformer_input_layer,
+            dropout_rate=args.aux_dropout_rate,
+            positional_dropout_rate=args.aux_dropout_rate,
+            attention_dropout_rate=args.aux_transformer_attn_dropout_rate,
+            encoder_attn_layer_type=args.aux_transformer_encoder_attn_layer_type,
+            macaron_style=args.aux_macaron_style,
+            use_cnn_module=args.aux_use_cnn_module,
+            cnn_module_kernel=args.aux_cnn_module_kernel,
+            zero_triu=getattr(args, "aux_zero_triu", False),
+            a_upsample_ratio=args.aux_a_upsample_ratio,
+            relu_type=getattr(args, "aux_relu_type", "swish"),
+        )
+        self.aux_transformer_input_layer = args.aux_transformer_input_layer
+
+        self.fusion = MLPHead(
+            idim=args.adim + args.aux_adim,
+            hdim=args.fusion_hdim,
+            odim=args.adim,
+            norm=args.fusion_norm,
+        )
+
+        if args.mtlalpha < 1:
+            self.decoder = Decoder(
+                odim=odim,
+                attention_dim=args.adim,
+                attention_heads=args.aheads,
+                linear_units=args.dunits,
+                num_blocks=args.dlayers,
+                dropout_rate=args.dropout_rate,
+                positional_dropout_rate=args.dropout_rate,
+                self_attention_dropout_rate=args.transformer_attn_dropout_rate,
+                src_attention_dropout_rate=args.transformer_attn_dropout_rate,
+            )
+        else:
+            self.decoder = None
+        self.blank = 0
+        self.sos = odim - 1
+        self.eos = odim - 1
+        self.odim = odim
+        self.ignore_id = ignore_id
+        self.subsample = get_subsample(args, mode="asr", arch="transformer")
+
+        # self.lsm_weight = a
+        self.criterion = LabelSmoothingLoss(
+            self.odim,
+            self.ignore_id,
+            args.lsm_weight,
+            args.transformer_length_normalized_loss,
+        )
+
+        self.adim = args.adim
+        self.mtlalpha = args.mtlalpha
+        if args.mtlalpha > 0.0:
+            self.ctc = CTC(
+                odim, args.adim, args.dropout_rate, ctc_type=args.ctc_type, reduce=True
+            )
+        else:
+            self.ctc = None
+
+        if args.report_cer or args.report_wer:
+            self.error_calculator = ErrorCalculator(
+                args.char_list,
+                args.sym_space,
+                args.sym_blank,
+                args.report_cer,
+                args.report_wer,
+            )
+        else:
+            self.error_calculator = None
+        self.rnnlm = None
+
+    def scorers(self):
+        """Scorers."""
+        return dict(decoder=self.decoder, ctc=CTCPrefixScorer(self.ctc, self.eos))
+
+    def encode(self, x, aux_x, extract_resnet_feats=False):
+        """Encode acoustic features.
+
+        :param ndarray x: source acoustic feature (T, D)
+        :return: encoder outputs
+        :rtype: torch.Tensor
+        """
+        self.eval()
+        if extract_resnet_feats:
+            x = torch.as_tensor(x).unsqueeze(0)
+            resnet_feats = self.encoder(
+                x,
+                None,
+                extract_resnet_feats=extract_resnet_feats,
+            )
+            return resnet_feats.squeeze(0)
+        else:
+            x = torch.as_tensor(x).unsqueeze(0)
+            aux_x = torch.as_tensor(aux_x).unsqueeze(0)
+            feat, _ = self.encoder(x, None)
+            aux_feat, _ = self.aux_encoder(aux_x, None)
+            fus_output = self.fusion(torch.cat((feat, aux_feat), dim=-1))
+            return fus_output.squeeze(0)
diff --git a/espnet/nets/pytorch_backend/lm/__init__.py b/espnet/nets/pytorch_backend/lm/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..b7f177368e62a5578b8706300e101f831a3972ac
--- /dev/null
+++ b/espnet/nets/pytorch_backend/lm/__init__.py
@@ -0,0 +1 @@
+"""Initialize sub package."""
diff --git a/espnet/nets/pytorch_backend/lm/default.py b/espnet/nets/pytorch_backend/lm/default.py
new file mode 100644
index 0000000000000000000000000000000000000000..01bb26ea4a071e1672952ee0cfb754d16ad6d8e6
--- /dev/null
+++ b/espnet/nets/pytorch_backend/lm/default.py
@@ -0,0 +1,431 @@
+"""Default Recurrent Neural Network Languge Model in `lm_train.py`."""
+
+from typing import Any
+from typing import List
+from typing import Tuple
+
+import logging
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from espnet.nets.lm_interface import LMInterface
+from espnet.nets.pytorch_backend.e2e_asr import to_device
+from espnet.nets.scorer_interface import BatchScorerInterface
+from espnet.utils.cli_utils import strtobool
+
+
+class DefaultRNNLM(BatchScorerInterface, LMInterface, nn.Module):
+    """Default RNNLM for `LMInterface` Implementation.
+
+    Note:
+        PyTorch seems to have memory leak when one GPU compute this after data parallel.
+        If parallel GPUs compute this, it seems to be fine.
+        See also https://github.com/espnet/espnet/issues/1075
+
+    """
+
+    @staticmethod
+    def add_arguments(parser):
+        """Add arguments to command line argument parser."""
+        parser.add_argument(
+            "--type",
+            type=str,
+            default="lstm",
+            nargs="?",
+            choices=["lstm", "gru"],
+            help="Which type of RNN to use",
+        )
+        parser.add_argument(
+            "--layer", "-l", type=int, default=2, help="Number of hidden layers"
+        )
+        parser.add_argument(
+            "--unit", "-u", type=int, default=650, help="Number of hidden units"
+        )
+        parser.add_argument(
+            "--embed-unit",
+            default=None,
+            type=int,
+            help="Number of hidden units in embedding layer, "
+            "if it is not specified, it keeps the same number with hidden units.",
+        )
+        parser.add_argument(
+            "--dropout-rate", type=float, default=0.5, help="dropout probability"
+        )
+        parser.add_argument(
+            "--emb-dropout-rate",
+            type=float,
+            default=0.0,
+            help="emb dropout probability",
+        )
+        parser.add_argument(
+            "--tie-weights",
+            type=strtobool,
+            default=False,
+            help="Tie input and output embeddings",
+        )
+        return parser
+
+    def __init__(self, n_vocab, args):
+        """Initialize class.
+
+        Args:
+            n_vocab (int): The size of the vocabulary
+            args (argparse.Namespace): configurations. see py:method:`add_arguments`
+
+        """
+        nn.Module.__init__(self)
+        # NOTE: for a compatibility with less than 0.5.0 version models
+        dropout_rate = getattr(args, "dropout_rate", 0.0)
+        # NOTE: for a compatibility with less than 0.6.1 version models
+        embed_unit = getattr(args, "embed_unit", None)
+        # NOTE: for a compatibility with less than 0.9.7 version models
+        emb_dropout_rate = getattr(args, "emb_dropout_rate", 0.0)
+        # NOTE: for a compatibility with less than 0.9.7 version models
+        tie_weights = getattr(args, "tie_weights", False)
+
+        self.model = ClassifierWithState(
+            RNNLM(
+                n_vocab,
+                args.layer,
+                args.unit,
+                embed_unit,
+                args.type,
+                dropout_rate,
+                emb_dropout_rate,
+                tie_weights,
+            )
+        )
+
+    def state_dict(self):
+        """Dump state dict."""
+        return self.model.state_dict()
+
+    def load_state_dict(self, d):
+        """Load state dict."""
+        self.model.load_state_dict(d)
+
+    def forward(self, x, t):
+        """Compute LM loss value from buffer sequences.
+
+        Args:
+            x (torch.Tensor): Input ids. (batch, len)
+            t (torch.Tensor): Target ids. (batch, len)
+
+        Returns:
+            tuple[torch.Tensor, torch.Tensor, torch.Tensor]: Tuple of
+                loss to backward (scalar),
+                negative log-likelihood of t: -log p(t) (scalar) and
+                the number of elements in x (scalar)
+
+        Notes:
+            The last two return values are used
+            in perplexity: p(t)^{-n} = exp(-log p(t) / n)
+
+        """
+        loss = 0
+        logp = 0
+        count = torch.tensor(0).long()
+        state = None
+        batch_size, sequence_length = x.shape
+        for i in range(sequence_length):
+            # Compute the loss at this time step and accumulate it
+            state, loss_batch = self.model(state, x[:, i], t[:, i])
+            non_zeros = torch.sum(x[:, i] != 0, dtype=loss_batch.dtype)
+            loss += loss_batch.mean() * non_zeros
+            logp += torch.sum(loss_batch * non_zeros)
+            count += int(non_zeros)
+        return loss / batch_size, loss, count.to(loss.device)
+
+    def score(self, y, state, x):
+        """Score new token.
+
+        Args:
+            y (torch.Tensor): 1D torch.int64 prefix tokens.
+            state: Scorer state for prefix tokens
+            x (torch.Tensor): 2D encoder feature that generates ys.
+
+        Returns:
+            tuple[torch.Tensor, Any]: Tuple of
+                torch.float32 scores for next token (n_vocab)
+                and next state for ys
+
+        """
+        new_state, scores = self.model.predict(state, y[-1].unsqueeze(0))
+        return scores.squeeze(0), new_state
+
+    def final_score(self, state):
+        """Score eos.
+
+        Args:
+            state: Scorer state for prefix tokens
+
+        Returns:
+            float: final score
+
+        """
+        return self.model.final(state)
+
+    # batch beam search API (see BatchScorerInterface)
+    def batch_score(
+        self, ys: torch.Tensor, states: List[Any], xs: torch.Tensor
+    ) -> Tuple[torch.Tensor, List[Any]]:
+        """Score new token batch.
+
+        Args:
+            ys (torch.Tensor): torch.int64 prefix tokens (n_batch, ylen).
+            states (List[Any]): Scorer states for prefix tokens.
+            xs (torch.Tensor):
+                The encoder feature that generates ys (n_batch, xlen, n_feat).
+
+        Returns:
+            tuple[torch.Tensor, List[Any]]: Tuple of
+                batchfied scores for next token with shape of `(n_batch, n_vocab)`
+                and next state list for ys.
+
+        """
+        # merge states
+        n_batch = len(ys)
+        n_layers = self.model.predictor.n_layers
+        if self.model.predictor.typ == "lstm":
+            keys = ("c", "h")
+        else:
+            keys = ("h",)
+
+        if states[0] is None:
+            states = None
+        else:
+            # transpose state of [batch, key, layer] into [key, layer, batch]
+            states = {
+                k: [
+                    torch.stack([states[b][k][i] for b in range(n_batch)])
+                    for i in range(n_layers)
+                ]
+                for k in keys
+            }
+        states, logp = self.model.predict(states, ys[:, -1])
+
+        # transpose state of [key, layer, batch] into [batch, key, layer]
+        return (
+            logp,
+            [
+                {k: [states[k][i][b] for i in range(n_layers)] for k in keys}
+                for b in range(n_batch)
+            ],
+        )
+
+
+class ClassifierWithState(nn.Module):
+    """A wrapper for pytorch RNNLM."""
+
+    def __init__(
+        self, predictor, lossfun=nn.CrossEntropyLoss(reduction="none"), label_key=-1
+    ):
+        """Initialize class.
+
+        :param torch.nn.Module predictor : The RNNLM
+        :param function lossfun : The loss function to use
+        :param int/str label_key :
+
+        """
+        if not (isinstance(label_key, (int, str))):
+            raise TypeError("label_key must be int or str, but is %s" % type(label_key))
+        super(ClassifierWithState, self).__init__()
+        self.lossfun = lossfun
+        self.y = None
+        self.loss = None
+        self.label_key = label_key
+        self.predictor = predictor
+
+    def forward(self, state, *args, **kwargs):
+        """Compute the loss value for an input and label pair.
+
+        Notes:
+            It also computes accuracy and stores it to the attribute.
+            When ``label_key`` is ``int``, the corresponding element in ``args``
+            is treated as ground truth labels. And when it is ``str``, the
+            element in ``kwargs`` is used.
+            The all elements of ``args`` and ``kwargs`` except the groundtruth
+            labels are features.
+            It feeds features to the predictor and compare the result
+            with ground truth labels.
+
+        :param torch.Tensor state : the LM state
+        :param list[torch.Tensor] args : Input minibatch
+        :param dict[torch.Tensor] kwargs : Input minibatch
+        :return loss value
+        :rtype torch.Tensor
+
+        """
+        if isinstance(self.label_key, int):
+            if not (-len(args) <= self.label_key < len(args)):
+                msg = "Label key %d is out of bounds" % self.label_key
+                raise ValueError(msg)
+            t = args[self.label_key]
+            if self.label_key == -1:
+                args = args[:-1]
+            else:
+                args = args[: self.label_key] + args[self.label_key + 1 :]
+        elif isinstance(self.label_key, str):
+            if self.label_key not in kwargs:
+                msg = 'Label key "%s" is not found' % self.label_key
+                raise ValueError(msg)
+            t = kwargs[self.label_key]
+            del kwargs[self.label_key]
+
+        self.y = None
+        self.loss = None
+        state, self.y = self.predictor(state, *args, **kwargs)
+        self.loss = self.lossfun(self.y, t)
+        return state, self.loss
+
+    def predict(self, state, x):
+        """Predict log probabilities for given state and input x using the predictor.
+
+        :param torch.Tensor state : The current state
+        :param torch.Tensor x : The input
+        :return a tuple (new state, log prob vector)
+        :rtype (torch.Tensor, torch.Tensor)
+        """
+        if hasattr(self.predictor, "normalized") and self.predictor.normalized:
+            return self.predictor(state, x)
+        else:
+            state, z = self.predictor(state, x)
+            return state, F.log_softmax(z, dim=1)
+
+    def buff_predict(self, state, x, n):
+        """Predict new tokens from buffered inputs."""
+        if self.predictor.__class__.__name__ == "RNNLM":
+            return self.predict(state, x)
+
+        new_state = []
+        new_log_y = []
+        for i in range(n):
+            state_i = None if state is None else state[i]
+            state_i, log_y = self.predict(state_i, x[i].unsqueeze(0))
+            new_state.append(state_i)
+            new_log_y.append(log_y)
+
+        return new_state, torch.cat(new_log_y)
+
+    def final(self, state, index=None):
+        """Predict final log probabilities for given state using the predictor.
+
+        :param state: The state
+        :return The final log probabilities
+        :rtype torch.Tensor
+        """
+        if hasattr(self.predictor, "final"):
+            if index is not None:
+                return self.predictor.final(state[index])
+            else:
+                return self.predictor.final(state)
+        else:
+            return 0.0
+
+
+# Definition of a recurrent net for language modeling
+class RNNLM(nn.Module):
+    """A pytorch RNNLM."""
+
+    def __init__(
+        self,
+        n_vocab,
+        n_layers,
+        n_units,
+        n_embed=None,
+        typ="lstm",
+        dropout_rate=0.5,
+        emb_dropout_rate=0.0,
+        tie_weights=False,
+    ):
+        """Initialize class.
+
+        :param int n_vocab: The size of the vocabulary
+        :param int n_layers: The number of layers to create
+        :param int n_units: The number of units per layer
+        :param str typ: The RNN type
+        """
+        super(RNNLM, self).__init__()
+        if n_embed is None:
+            n_embed = n_units
+
+        self.embed = nn.Embedding(n_vocab, n_embed)
+
+        if emb_dropout_rate == 0.0:
+            self.embed_drop = None
+        else:
+            self.embed_drop = nn.Dropout(emb_dropout_rate)
+
+        if typ == "lstm":
+            self.rnn = nn.ModuleList(
+                [nn.LSTMCell(n_embed, n_units)]
+                + [nn.LSTMCell(n_units, n_units) for _ in range(n_layers - 1)]
+            )
+        else:
+            self.rnn = nn.ModuleList(
+                [nn.GRUCell(n_embed, n_units)]
+                + [nn.GRUCell(n_units, n_units) for _ in range(n_layers - 1)]
+            )
+
+        self.dropout = nn.ModuleList(
+            [nn.Dropout(dropout_rate) for _ in range(n_layers + 1)]
+        )
+        self.lo = nn.Linear(n_units, n_vocab)
+        self.n_layers = n_layers
+        self.n_units = n_units
+        self.typ = typ
+
+        logging.info("Tie weights set to {}".format(tie_weights))
+        logging.info("Dropout set to {}".format(dropout_rate))
+        logging.info("Emb Dropout set to {}".format(emb_dropout_rate))
+
+        if tie_weights:
+            assert (
+                n_embed == n_units
+            ), "Tie Weights: True need embedding and final dimensions to match"
+            self.lo.weight = self.embed.weight
+
+        # initialize parameters from uniform distribution
+        for param in self.parameters():
+            param.data.uniform_(-0.1, 0.1)
+
+    def zero_state(self, batchsize):
+        """Initialize state."""
+        p = next(self.parameters())
+        return torch.zeros(batchsize, self.n_units).to(device=p.device, dtype=p.dtype)
+
+    def forward(self, state, x):
+        """Forward neural networks."""
+        if state is None:
+            h = [to_device(x, self.zero_state(x.size(0))) for n in range(self.n_layers)]
+            state = {"h": h}
+            if self.typ == "lstm":
+                c = [
+                    to_device(x, self.zero_state(x.size(0)))
+                    for n in range(self.n_layers)
+                ]
+                state = {"c": c, "h": h}
+
+        h = [None] * self.n_layers
+        if self.embed_drop is not None:
+            emb = self.embed_drop(self.embed(x))
+        else:
+            emb = self.embed(x)
+        if self.typ == "lstm":
+            c = [None] * self.n_layers
+            h[0], c[0] = self.rnn[0](
+                self.dropout[0](emb), (state["h"][0], state["c"][0])
+            )
+            for n in range(1, self.n_layers):
+                h[n], c[n] = self.rnn[n](
+                    self.dropout[n](h[n - 1]), (state["h"][n], state["c"][n])
+                )
+            state = {"c": c, "h": h}
+        else:
+            h[0] = self.rnn[0](self.dropout[0](emb), state["h"][0])
+            for n in range(1, self.n_layers):
+                h[n] = self.rnn[n](self.dropout[n](h[n - 1]), state["h"][n])
+            state = {"h": h}
+        y = self.lo(self.dropout[-1](h[-1]))
+        return state, y
diff --git a/espnet/nets/pytorch_backend/lm/seq_rnn.py b/espnet/nets/pytorch_backend/lm/seq_rnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee5f026e3811c790f283dc9298e1221d783c0e4f
--- /dev/null
+++ b/espnet/nets/pytorch_backend/lm/seq_rnn.py
@@ -0,0 +1,178 @@
+"""Sequential implementation of Recurrent Neural Network Language Model."""
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from espnet.nets.lm_interface import LMInterface
+
+
+class SequentialRNNLM(LMInterface, torch.nn.Module):
+    """Sequential RNNLM.
+
+    See also:
+        https://github.com/pytorch/examples/blob/4581968193699de14b56527296262dd76ab43557/word_language_model/model.py
+
+    """
+
+    @staticmethod
+    def add_arguments(parser):
+        """Add arguments to command line argument parser."""
+        parser.add_argument(
+            "--type",
+            type=str,
+            default="lstm",
+            nargs="?",
+            choices=["lstm", "gru"],
+            help="Which type of RNN to use",
+        )
+        parser.add_argument(
+            "--layer", "-l", type=int, default=2, help="Number of hidden layers"
+        )
+        parser.add_argument(
+            "--unit", "-u", type=int, default=650, help="Number of hidden units"
+        )
+        parser.add_argument(
+            "--dropout-rate", type=float, default=0.5, help="dropout probability"
+        )
+        return parser
+
+    def __init__(self, n_vocab, args):
+        """Initialize class.
+
+        Args:
+            n_vocab (int): The size of the vocabulary
+            args (argparse.Namespace): configurations. see py:method:`add_arguments`
+
+        """
+        torch.nn.Module.__init__(self)
+        self._setup(
+            rnn_type=args.type.upper(),
+            ntoken=n_vocab,
+            ninp=args.unit,
+            nhid=args.unit,
+            nlayers=args.layer,
+            dropout=args.dropout_rate,
+        )
+
+    def _setup(
+        self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, tie_weights=False
+    ):
+        self.drop = nn.Dropout(dropout)
+        self.encoder = nn.Embedding(ntoken, ninp)
+        if rnn_type in ["LSTM", "GRU"]:
+            self.rnn = getattr(nn, rnn_type)(ninp, nhid, nlayers, dropout=dropout)
+        else:
+            try:
+                nonlinearity = {"RNN_TANH": "tanh", "RNN_RELU": "relu"}[rnn_type]
+            except KeyError:
+                raise ValueError(
+                    "An invalid option for `--model` was supplied, "
+                    "options are ['LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']"
+                )
+            self.rnn = nn.RNN(
+                ninp, nhid, nlayers, nonlinearity=nonlinearity, dropout=dropout
+            )
+        self.decoder = nn.Linear(nhid, ntoken)
+
+        # Optionally tie weights as in:
+        # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
+        # https://arxiv.org/abs/1608.05859
+        # and
+        # "Tying Word Vectors and Word Classifiers:
+        #  A Loss Framework for Language Modeling" (Inan et al. 2016)
+        # https://arxiv.org/abs/1611.01462
+        if tie_weights:
+            if nhid != ninp:
+                raise ValueError(
+                    "When using the tied flag, nhid must be equal to emsize"
+                )
+            self.decoder.weight = self.encoder.weight
+
+        self._init_weights()
+
+        self.rnn_type = rnn_type
+        self.nhid = nhid
+        self.nlayers = nlayers
+
+    def _init_weights(self):
+        # NOTE: original init in pytorch/examples
+        # initrange = 0.1
+        # self.encoder.weight.data.uniform_(-initrange, initrange)
+        # self.decoder.bias.data.zero_()
+        # self.decoder.weight.data.uniform_(-initrange, initrange)
+        # NOTE: our default.py:RNNLM init
+        for param in self.parameters():
+            param.data.uniform_(-0.1, 0.1)
+
+    def forward(self, x, t):
+        """Compute LM loss value from buffer sequences.
+
+        Args:
+            x (torch.Tensor): Input ids. (batch, len)
+            t (torch.Tensor): Target ids. (batch, len)
+
+        Returns:
+            tuple[torch.Tensor, torch.Tensor, torch.Tensor]: Tuple of
+                loss to backward (scalar),
+                negative log-likelihood of t: -log p(t) (scalar) and
+                the number of elements in x (scalar)
+
+        Notes:
+            The last two return values are used
+            in perplexity: p(t)^{-n} = exp(-log p(t) / n)
+
+        """
+        y = self._before_loss(x, None)[0]
+        mask = (x != 0).to(y.dtype)
+        loss = F.cross_entropy(y.view(-1, y.shape[-1]), t.view(-1), reduction="none")
+        logp = loss * mask.view(-1)
+        logp = logp.sum()
+        count = mask.sum()
+        return logp / count, logp, count
+
+    def _before_loss(self, input, hidden):
+        emb = self.drop(self.encoder(input))
+        output, hidden = self.rnn(emb, hidden)
+        output = self.drop(output)
+        decoded = self.decoder(
+            output.view(output.size(0) * output.size(1), output.size(2))
+        )
+        return decoded.view(output.size(0), output.size(1), decoded.size(1)), hidden
+
+    def init_state(self, x):
+        """Get an initial state for decoding.
+
+        Args:
+            x (torch.Tensor): The encoded feature tensor
+
+        Returns: initial state
+
+        """
+        bsz = 1
+        weight = next(self.parameters())
+        if self.rnn_type == "LSTM":
+            return (
+                weight.new_zeros(self.nlayers, bsz, self.nhid),
+                weight.new_zeros(self.nlayers, bsz, self.nhid),
+            )
+        else:
+            return weight.new_zeros(self.nlayers, bsz, self.nhid)
+
+    def score(self, y, state, x):
+        """Score new token.
+
+        Args:
+            y (torch.Tensor): 1D torch.int64 prefix tokens.
+            state: Scorer state for prefix tokens
+            x (torch.Tensor): 2D encoder feature that generates ys.
+
+        Returns:
+            tuple[torch.Tensor, Any]: Tuple of
+                torch.float32 scores for next token (n_vocab)
+                and next state for ys
+
+        """
+        y, new_state = self._before_loss(y[-1].view(1, 1), state)
+        logp = y.log_softmax(dim=-1).view(-1)
+        return logp, new_state
diff --git a/espnet/nets/pytorch_backend/lm/transformer.py b/espnet/nets/pytorch_backend/lm/transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..42c2f86d461b5d6125f4b5455b7b31cd6944f75d
--- /dev/null
+++ b/espnet/nets/pytorch_backend/lm/transformer.py
@@ -0,0 +1,252 @@
+"""Transformer language model."""
+
+from typing import Any
+from typing import List
+from typing import Tuple
+
+import logging
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from espnet.nets.lm_interface import LMInterface
+from espnet.nets.pytorch_backend.transformer.embedding import PositionalEncoding
+from espnet.nets.pytorch_backend.transformer.encoder import Encoder
+from espnet.nets.pytorch_backend.transformer.mask import subsequent_mask
+from espnet.nets.scorer_interface import BatchScorerInterface
+from espnet.utils.cli_utils import strtobool
+
+
+class TransformerLM(nn.Module, LMInterface, BatchScorerInterface):
+    """Transformer language model."""
+
+    @staticmethod
+    def add_arguments(parser):
+        """Add arguments to command line argument parser."""
+        parser.add_argument(
+            "--layer", type=int, default=4, help="Number of hidden layers"
+        )
+        parser.add_argument(
+            "--unit",
+            type=int,
+            default=1024,
+            help="Number of hidden units in feedforward layer",
+        )
+        parser.add_argument(
+            "--att-unit",
+            type=int,
+            default=256,
+            help="Number of hidden units in attention layer",
+        )
+        parser.add_argument(
+            "--embed-unit",
+            type=int,
+            default=128,
+            help="Number of hidden units in embedding layer",
+        )
+        parser.add_argument(
+            "--head", type=int, default=2, help="Number of multi head attention"
+        )
+        parser.add_argument(
+            "--dropout-rate", type=float, default=0.5, help="dropout probability"
+        )
+        parser.add_argument(
+            "--att-dropout-rate",
+            type=float,
+            default=0.0,
+            help="att dropout probability",
+        )
+        parser.add_argument(
+            "--emb-dropout-rate",
+            type=float,
+            default=0.0,
+            help="emb dropout probability",
+        )
+        parser.add_argument(
+            "--tie-weights",
+            type=strtobool,
+            default=False,
+            help="Tie input and output embeddings",
+        )
+        parser.add_argument(
+            "--pos-enc",
+            default="sinusoidal",
+            choices=["sinusoidal", "none"],
+            help="positional encoding",
+        )
+        return parser
+
+    def __init__(self, n_vocab, args):
+        """Initialize class.
+
+        Args:
+            n_vocab (int): The size of the vocabulary
+            args (argparse.Namespace): configurations. see py:method:`add_arguments`
+
+        """
+        nn.Module.__init__(self)
+
+        # NOTE: for a compatibility with less than 0.9.7 version models
+        emb_dropout_rate = getattr(args, "emb_dropout_rate", 0.0)
+        # NOTE: for a compatibility with less than 0.9.7 version models
+        tie_weights = getattr(args, "tie_weights", False)
+        # NOTE: for a compatibility with less than 0.9.7 version models
+        att_dropout_rate = getattr(args, "att_dropout_rate", 0.0)
+
+        if args.pos_enc == "sinusoidal":
+            pos_enc_class = PositionalEncoding
+        elif args.pos_enc == "none":
+
+            def pos_enc_class(*args, **kwargs):
+                return nn.Sequential()  # indentity
+
+        else:
+            raise ValueError(f"unknown pos-enc option: {args.pos_enc}")
+
+        self.embed = nn.Embedding(n_vocab, args.embed_unit)
+
+        if emb_dropout_rate == 0.0:
+            self.embed_drop = None
+        else:
+            self.embed_drop = nn.Dropout(emb_dropout_rate)
+
+        self.encoder = Encoder(
+            idim=args.embed_unit,
+            attention_dim=args.att_unit,
+            attention_heads=args.head,
+            linear_units=args.unit,
+            num_blocks=args.layer,
+            dropout_rate=args.dropout_rate,
+            attention_dropout_rate=att_dropout_rate,
+            input_layer="linear",
+            pos_enc_class=pos_enc_class,
+        )
+        self.decoder = nn.Linear(args.att_unit, n_vocab)
+
+        logging.info("Tie weights set to {}".format(tie_weights))
+        logging.info("Dropout set to {}".format(args.dropout_rate))
+        logging.info("Emb Dropout set to {}".format(emb_dropout_rate))
+        logging.info("Att Dropout set to {}".format(att_dropout_rate))
+
+        if tie_weights:
+            assert (
+                args.att_unit == args.embed_unit
+            ), "Tie Weights: True need embedding and final dimensions to match"
+            self.decoder.weight = self.embed.weight
+
+    def _target_mask(self, ys_in_pad):
+        ys_mask = ys_in_pad != 0
+        m = subsequent_mask(ys_mask.size(-1), device=ys_mask.device).unsqueeze(0)
+        return ys_mask.unsqueeze(-2) & m
+
+    def forward(
+        self, x: torch.Tensor, t: torch.Tensor
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        """Compute LM loss value from buffer sequences.
+
+        Args:
+            x (torch.Tensor): Input ids. (batch, len)
+            t (torch.Tensor): Target ids. (batch, len)
+
+        Returns:
+            tuple[torch.Tensor, torch.Tensor, torch.Tensor]: Tuple of
+                loss to backward (scalar),
+                negative log-likelihood of t: -log p(t) (scalar) and
+                the number of elements in x (scalar)
+
+        Notes:
+            The last two return values are used
+            in perplexity: p(t)^{-n} = exp(-log p(t) / n)
+
+        """
+        xm = x != 0
+
+        if self.embed_drop is not None:
+            emb = self.embed_drop(self.embed(x))
+        else:
+            emb = self.embed(x)
+
+        h, _ = self.encoder(emb, self._target_mask(x))
+        y = self.decoder(h)
+        loss = F.cross_entropy(y.view(-1, y.shape[-1]), t.view(-1), reduction="none")
+        mask = xm.to(dtype=loss.dtype)
+        logp = loss * mask.view(-1)
+        logp = logp.sum()
+        count = mask.sum()
+        return logp / count, logp, count
+
+    def score(
+        self, y: torch.Tensor, state: Any, x: torch.Tensor
+    ) -> Tuple[torch.Tensor, Any]:
+        """Score new token.
+
+        Args:
+            y (torch.Tensor): 1D torch.int64 prefix tokens.
+            state: Scorer state for prefix tokens
+            x (torch.Tensor): encoder feature that generates ys.
+
+        Returns:
+            tuple[torch.Tensor, Any]: Tuple of
+                torch.float32 scores for next token (n_vocab)
+                and next state for ys
+
+        """
+        y = y.unsqueeze(0)
+
+        if self.embed_drop is not None:
+            emb = self.embed_drop(self.embed(y))
+        else:
+            emb = self.embed(y)
+
+        h, _, cache = self.encoder.forward_one_step(
+            emb, self._target_mask(y), cache=state
+        )
+        h = self.decoder(h[:, -1])
+        logp = h.log_softmax(dim=-1).squeeze(0)
+        return logp, cache
+
+    # batch beam search API (see BatchScorerInterface)
+    def batch_score(
+        self, ys: torch.Tensor, states: List[Any], xs: torch.Tensor
+    ) -> Tuple[torch.Tensor, List[Any]]:
+        """Score new token batch (required).
+
+        Args:
+            ys (torch.Tensor): torch.int64 prefix tokens (n_batch, ylen).
+            states (List[Any]): Scorer states for prefix tokens.
+            xs (torch.Tensor):
+                The encoder feature that generates ys (n_batch, xlen, n_feat).
+
+        Returns:
+            tuple[torch.Tensor, List[Any]]: Tuple of
+                batchfied scores for next token with shape of `(n_batch, n_vocab)`
+                and next state list for ys.
+
+        """
+        # merge states
+        n_batch = len(ys)
+        n_layers = len(self.encoder.encoders)
+        if states[0] is None:
+            batch_state = None
+        else:
+            # transpose state of [batch, layer] into [layer, batch]
+            batch_state = [
+                torch.stack([states[b][i] for b in range(n_batch)])
+                for i in range(n_layers)
+            ]
+
+        if self.embed_drop is not None:
+            emb = self.embed_drop(self.embed(ys))
+        else:
+            emb = self.embed(ys)
+
+        # batch decoding
+        h, _, states = self.encoder.forward_one_step(
+            emb, self._target_mask(ys), cache=batch_state
+        )
+        h = self.decoder(h[:, -1])
+        logp = h.log_softmax(dim=-1)
+
+        # transpose state of [layer, batch] into [batch, layer]
+        state_list = [[states[i][b] for i in range(n_layers)] for b in range(n_batch)]
+        return logp, state_list
diff --git a/espnet/nets/pytorch_backend/nets_utils.py b/espnet/nets/pytorch_backend/nets_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..02dcbfa445a3212fac7316e6af666df328b6fcf3
--- /dev/null
+++ b/espnet/nets/pytorch_backend/nets_utils.py
@@ -0,0 +1,526 @@
+# -*- coding: utf-8 -*-
+
+"""Network related utility tools."""
+
+import logging
+from typing import Dict
+
+import numpy as np
+import torch
+
+
+def to_device(m, x):
+    """Send tensor into the device of the module.
+
+    Args:
+        m (torch.nn.Module): Torch module.
+        x (Tensor): Torch tensor.
+
+    Returns:
+        Tensor: Torch tensor located in the same place as torch module.
+
+    """
+    if isinstance(m, torch.nn.Module):
+        device = next(m.parameters()).device
+    elif isinstance(m, torch.Tensor):
+        device = m.device
+    else:
+        raise TypeError(
+            "Expected torch.nn.Module or torch.tensor, " f"bot got: {type(m)}"
+        )
+    return x.to(device)
+
+
+def pad_list(xs, pad_value):
+    """Perform padding for the list of tensors.
+
+    Args:
+        xs (List): List of Tensors [(T_1, `*`), (T_2, `*`), ..., (T_B, `*`)].
+        pad_value (float): Value for padding.
+
+    Returns:
+        Tensor: Padded tensor (B, Tmax, `*`).
+
+    Examples:
+        >>> x = [torch.ones(4), torch.ones(2), torch.ones(1)]
+        >>> x
+        [tensor([1., 1., 1., 1.]), tensor([1., 1.]), tensor([1.])]
+        >>> pad_list(x, 0)
+        tensor([[1., 1., 1., 1.],
+                [1., 1., 0., 0.],
+                [1., 0., 0., 0.]])
+
+    """
+    n_batch = len(xs)
+    max_len = max(x.size(0) for x in xs)
+    pad = xs[0].new(n_batch, max_len, *xs[0].size()[1:]).fill_(pad_value)
+
+    for i in range(n_batch):
+        pad[i, : xs[i].size(0)] = xs[i]
+
+    return pad
+
+
+def make_pad_mask(lengths, xs=None, length_dim=-1, maxlen=None):
+    """Make mask tensor containing indices of padded part.
+
+    Args:
+        lengths (LongTensor or List): Batch of lengths (B,).
+        xs (Tensor, optional): The reference tensor.
+            If set, masks will be the same shape as this tensor.
+        length_dim (int, optional): Dimension indicator of the above tensor.
+            See the example.
+
+    Returns:
+        Tensor: Mask tensor containing indices of padded part.
+                dtype=torch.uint8 in PyTorch 1.2-
+                dtype=torch.bool in PyTorch 1.2+ (including 1.2)
+
+    Examples:
+        With only lengths.
+
+        >>> lengths = [5, 3, 2]
+        >>> make_pad_mask(lengths)
+        masks = [[0, 0, 0, 0 ,0],
+                 [0, 0, 0, 1, 1],
+                 [0, 0, 1, 1, 1]]
+
+        With the reference tensor.
+
+        >>> xs = torch.zeros((3, 2, 4))
+        >>> make_pad_mask(lengths, xs)
+        tensor([[[0, 0, 0, 0],
+                 [0, 0, 0, 0]],
+                [[0, 0, 0, 1],
+                 [0, 0, 0, 1]],
+                [[0, 0, 1, 1],
+                 [0, 0, 1, 1]]], dtype=torch.uint8)
+        >>> xs = torch.zeros((3, 2, 6))
+        >>> make_pad_mask(lengths, xs)
+        tensor([[[0, 0, 0, 0, 0, 1],
+                 [0, 0, 0, 0, 0, 1]],
+                [[0, 0, 0, 1, 1, 1],
+                 [0, 0, 0, 1, 1, 1]],
+                [[0, 0, 1, 1, 1, 1],
+                 [0, 0, 1, 1, 1, 1]]], dtype=torch.uint8)
+
+        With the reference tensor and dimension indicator.
+
+        >>> xs = torch.zeros((3, 6, 6))
+        >>> make_pad_mask(lengths, xs, 1)
+        tensor([[[0, 0, 0, 0, 0, 0],
+                 [0, 0, 0, 0, 0, 0],
+                 [0, 0, 0, 0, 0, 0],
+                 [0, 0, 0, 0, 0, 0],
+                 [0, 0, 0, 0, 0, 0],
+                 [1, 1, 1, 1, 1, 1]],
+                [[0, 0, 0, 0, 0, 0],
+                 [0, 0, 0, 0, 0, 0],
+                 [0, 0, 0, 0, 0, 0],
+                 [1, 1, 1, 1, 1, 1],
+                 [1, 1, 1, 1, 1, 1],
+                 [1, 1, 1, 1, 1, 1]],
+                [[0, 0, 0, 0, 0, 0],
+                 [0, 0, 0, 0, 0, 0],
+                 [1, 1, 1, 1, 1, 1],
+                 [1, 1, 1, 1, 1, 1],
+                 [1, 1, 1, 1, 1, 1],
+                 [1, 1, 1, 1, 1, 1]]], dtype=torch.uint8)
+        >>> make_pad_mask(lengths, xs, 2)
+        tensor([[[0, 0, 0, 0, 0, 1],
+                 [0, 0, 0, 0, 0, 1],
+                 [0, 0, 0, 0, 0, 1],
+                 [0, 0, 0, 0, 0, 1],
+                 [0, 0, 0, 0, 0, 1],
+                 [0, 0, 0, 0, 0, 1]],
+                [[0, 0, 0, 1, 1, 1],
+                 [0, 0, 0, 1, 1, 1],
+                 [0, 0, 0, 1, 1, 1],
+                 [0, 0, 0, 1, 1, 1],
+                 [0, 0, 0, 1, 1, 1],
+                 [0, 0, 0, 1, 1, 1]],
+                [[0, 0, 1, 1, 1, 1],
+                 [0, 0, 1, 1, 1, 1],
+                 [0, 0, 1, 1, 1, 1],
+                 [0, 0, 1, 1, 1, 1],
+                 [0, 0, 1, 1, 1, 1],
+                 [0, 0, 1, 1, 1, 1]]], dtype=torch.uint8)
+
+    """
+    if length_dim == 0:
+        raise ValueError("length_dim cannot be 0: {}".format(length_dim))
+
+    if not isinstance(lengths, list):
+        lengths = lengths.tolist()
+    bs = int(len(lengths))
+    if maxlen is None:
+        if xs is None:
+            maxlen = int(max(lengths))
+        else:
+            maxlen = xs.size(length_dim)
+    else:
+        assert xs is None
+        assert maxlen >= int(max(lengths))
+
+    seq_range = torch.arange(0, maxlen, dtype=torch.int64)
+    seq_range_expand = seq_range.unsqueeze(0).expand(bs, maxlen)
+    seq_length_expand = seq_range_expand.new(lengths).unsqueeze(-1)
+    mask = seq_range_expand >= seq_length_expand
+
+    if xs is not None:
+        assert xs.size(0) == bs, (xs.size(0), bs)
+
+        if length_dim < 0:
+            length_dim = xs.dim() + length_dim
+        # ind = (:, None, ..., None, :, , None, ..., None)
+        ind = tuple(
+            slice(None) if i in (0, length_dim) else None for i in range(xs.dim())
+        )
+        mask = mask[ind].expand_as(xs).to(xs.device)
+    return mask
+
+
+def make_non_pad_mask(lengths, xs=None, length_dim=-1):
+    """Make mask tensor containing indices of non-padded part.
+
+    Args:
+        lengths (LongTensor or List): Batch of lengths (B,).
+        xs (Tensor, optional): The reference tensor.
+            If set, masks will be the same shape as this tensor.
+        length_dim (int, optional): Dimension indicator of the above tensor.
+            See the example.
+
+    Returns:
+        ByteTensor: mask tensor containing indices of padded part.
+                    dtype=torch.uint8 in PyTorch 1.2-
+                    dtype=torch.bool in PyTorch 1.2+ (including 1.2)
+
+    Examples:
+        With only lengths.
+
+        >>> lengths = [5, 3, 2]
+        >>> make_non_pad_mask(lengths)
+        masks = [[1, 1, 1, 1 ,1],
+                 [1, 1, 1, 0, 0],
+                 [1, 1, 0, 0, 0]]
+
+        With the reference tensor.
+
+        >>> xs = torch.zeros((3, 2, 4))
+        >>> make_non_pad_mask(lengths, xs)
+        tensor([[[1, 1, 1, 1],
+                 [1, 1, 1, 1]],
+                [[1, 1, 1, 0],
+                 [1, 1, 1, 0]],
+                [[1, 1, 0, 0],
+                 [1, 1, 0, 0]]], dtype=torch.uint8)
+        >>> xs = torch.zeros((3, 2, 6))
+        >>> make_non_pad_mask(lengths, xs)
+        tensor([[[1, 1, 1, 1, 1, 0],
+                 [1, 1, 1, 1, 1, 0]],
+                [[1, 1, 1, 0, 0, 0],
+                 [1, 1, 1, 0, 0, 0]],
+                [[1, 1, 0, 0, 0, 0],
+                 [1, 1, 0, 0, 0, 0]]], dtype=torch.uint8)
+
+        With the reference tensor and dimension indicator.
+
+        >>> xs = torch.zeros((3, 6, 6))
+        >>> make_non_pad_mask(lengths, xs, 1)
+        tensor([[[1, 1, 1, 1, 1, 1],
+                 [1, 1, 1, 1, 1, 1],
+                 [1, 1, 1, 1, 1, 1],
+                 [1, 1, 1, 1, 1, 1],
+                 [1, 1, 1, 1, 1, 1],
+                 [0, 0, 0, 0, 0, 0]],
+                [[1, 1, 1, 1, 1, 1],
+                 [1, 1, 1, 1, 1, 1],
+                 [1, 1, 1, 1, 1, 1],
+                 [0, 0, 0, 0, 0, 0],
+                 [0, 0, 0, 0, 0, 0],
+                 [0, 0, 0, 0, 0, 0]],
+                [[1, 1, 1, 1, 1, 1],
+                 [1, 1, 1, 1, 1, 1],
+                 [0, 0, 0, 0, 0, 0],
+                 [0, 0, 0, 0, 0, 0],
+                 [0, 0, 0, 0, 0, 0],
+                 [0, 0, 0, 0, 0, 0]]], dtype=torch.uint8)
+        >>> make_non_pad_mask(lengths, xs, 2)
+        tensor([[[1, 1, 1, 1, 1, 0],
+                 [1, 1, 1, 1, 1, 0],
+                 [1, 1, 1, 1, 1, 0],
+                 [1, 1, 1, 1, 1, 0],
+                 [1, 1, 1, 1, 1, 0],
+                 [1, 1, 1, 1, 1, 0]],
+                [[1, 1, 1, 0, 0, 0],
+                 [1, 1, 1, 0, 0, 0],
+                 [1, 1, 1, 0, 0, 0],
+                 [1, 1, 1, 0, 0, 0],
+                 [1, 1, 1, 0, 0, 0],
+                 [1, 1, 1, 0, 0, 0]],
+                [[1, 1, 0, 0, 0, 0],
+                 [1, 1, 0, 0, 0, 0],
+                 [1, 1, 0, 0, 0, 0],
+                 [1, 1, 0, 0, 0, 0],
+                 [1, 1, 0, 0, 0, 0],
+                 [1, 1, 0, 0, 0, 0]]], dtype=torch.uint8)
+
+    """
+    return ~make_pad_mask(lengths, xs, length_dim)
+
+
+def mask_by_length(xs, lengths, fill=0):
+    """Mask tensor according to length.
+
+    Args:
+        xs (Tensor): Batch of input tensor (B, `*`).
+        lengths (LongTensor or List): Batch of lengths (B,).
+        fill (int or float): Value to fill masked part.
+
+    Returns:
+        Tensor: Batch of masked input tensor (B, `*`).
+
+    Examples:
+        >>> x = torch.arange(5).repeat(3, 1) + 1
+        >>> x
+        tensor([[1, 2, 3, 4, 5],
+                [1, 2, 3, 4, 5],
+                [1, 2, 3, 4, 5]])
+        >>> lengths = [5, 3, 2]
+        >>> mask_by_length(x, lengths)
+        tensor([[1, 2, 3, 4, 5],
+                [1, 2, 3, 0, 0],
+                [1, 2, 0, 0, 0]])
+
+    """
+    assert xs.size(0) == len(lengths)
+    ret = xs.data.new(*xs.size()).fill_(fill)
+    for i, l in enumerate(lengths):
+        ret[i, :l] = xs[i, :l]
+    return ret
+
+
+def th_accuracy(pad_outputs, pad_targets, ignore_label):
+    """Calculate accuracy.
+
+    Args:
+        pad_outputs (Tensor): Prediction tensors (B * Lmax, D).
+        pad_targets (LongTensor): Target label tensors (B, Lmax, D).
+        ignore_label (int): Ignore label id.
+
+    Returns:
+        float: Accuracy value (0.0 - 1.0).
+
+    """
+    pad_pred = pad_outputs.view(
+        pad_targets.size(0), pad_targets.size(1), pad_outputs.size(1)
+    ).argmax(2)
+    mask = pad_targets != ignore_label
+    numerator = torch.sum(
+        pad_pred.masked_select(mask) == pad_targets.masked_select(mask)
+    )
+    denominator = torch.sum(mask)
+    return float(numerator) / float(denominator)
+
+
+def to_torch_tensor(x):
+    """Change to torch.Tensor or ComplexTensor from numpy.ndarray.
+
+    Args:
+        x: Inputs. It should be one of numpy.ndarray, Tensor, ComplexTensor, and dict.
+
+    Returns:
+        Tensor or ComplexTensor: Type converted inputs.
+
+    Examples:
+        >>> xs = np.ones(3, dtype=np.float32)
+        >>> xs = to_torch_tensor(xs)
+        tensor([1., 1., 1.])
+        >>> xs = torch.ones(3, 4, 5)
+        >>> assert to_torch_tensor(xs) is xs
+        >>> xs = {'real': xs, 'imag': xs}
+        >>> to_torch_tensor(xs)
+        ComplexTensor(
+        Real:
+        tensor([1., 1., 1.])
+        Imag;
+        tensor([1., 1., 1.])
+        )
+
+    """
+    # If numpy, change to torch tensor
+    if isinstance(x, np.ndarray):
+        if x.dtype.kind == "c":
+            # Dynamically importing because torch_complex requires python3
+            from torch_complex.tensor import ComplexTensor
+
+            return ComplexTensor(x)
+        else:
+            return torch.from_numpy(x)
+
+    # If {'real': ..., 'imag': ...}, convert to ComplexTensor
+    elif isinstance(x, dict):
+        # Dynamically importing because torch_complex requires python3
+        from torch_complex.tensor import ComplexTensor
+
+        if "real" not in x or "imag" not in x:
+            raise ValueError("has 'real' and 'imag' keys: {}".format(list(x)))
+        # Relative importing because of using python3 syntax
+        return ComplexTensor(x["real"], x["imag"])
+
+    # If torch.Tensor, as it is
+    elif isinstance(x, torch.Tensor):
+        return x
+
+    else:
+        error = (
+            "x must be numpy.ndarray, torch.Tensor or a dict like "
+            "{{'real': torch.Tensor, 'imag': torch.Tensor}}, "
+            "but got {}".format(type(x))
+        )
+        try:
+            from torch_complex.tensor import ComplexTensor
+        except Exception:
+            # If PY2
+            raise ValueError(error)
+        else:
+            # If PY3
+            if isinstance(x, ComplexTensor):
+                return x
+            else:
+                raise ValueError(error)
+
+
+def get_subsample(train_args, mode, arch):
+    """Parse the subsampling factors from the args for the specified `mode` and `arch`.
+
+    Args:
+        train_args: argument Namespace containing options.
+        mode: one of ('asr', 'mt', 'st')
+        arch: one of ('rnn', 'rnn-t', 'rnn_mix', 'rnn_mulenc', 'transformer')
+
+    Returns:
+        np.ndarray / List[np.ndarray]: subsampling factors.
+    """
+    if arch == "transformer":
+        return np.array([1])
+
+    elif mode == "mt" and arch == "rnn":
+        # +1 means input (+1) and layers outputs (train_args.elayer)
+        subsample = np.ones(train_args.elayers + 1, dtype=np.int)
+        logging.warning("Subsampling is not performed for machine translation.")
+        logging.info("subsample: " + " ".join([str(x) for x in subsample]))
+        return subsample
+
+    elif (
+        (mode == "asr" and arch in ("rnn", "rnn-t"))
+        or (mode == "mt" and arch == "rnn")
+        or (mode == "st" and arch == "rnn")
+    ):
+        subsample = np.ones(train_args.elayers + 1, dtype=np.int)
+        if train_args.etype.endswith("p") and not train_args.etype.startswith("vgg"):
+            ss = train_args.subsample.split("_")
+            for j in range(min(train_args.elayers + 1, len(ss))):
+                subsample[j] = int(ss[j])
+        else:
+            logging.warning(
+                "Subsampling is not performed for vgg*. "
+                "It is performed in max pooling layers at CNN."
+            )
+        logging.info("subsample: " + " ".join([str(x) for x in subsample]))
+        return subsample
+
+    elif mode == "asr" and arch == "rnn_mix":
+        subsample = np.ones(
+            train_args.elayers_sd + train_args.elayers + 1, dtype=np.int
+        )
+        if train_args.etype.endswith("p") and not train_args.etype.startswith("vgg"):
+            ss = train_args.subsample.split("_")
+            for j in range(
+                min(train_args.elayers_sd + train_args.elayers + 1, len(ss))
+            ):
+                subsample[j] = int(ss[j])
+        else:
+            logging.warning(
+                "Subsampling is not performed for vgg*. "
+                "It is performed in max pooling layers at CNN."
+            )
+        logging.info("subsample: " + " ".join([str(x) for x in subsample]))
+        return subsample
+
+    elif mode == "asr" and arch == "rnn_mulenc":
+        subsample_list = []
+        for idx in range(train_args.num_encs):
+            subsample = np.ones(train_args.elayers[idx] + 1, dtype=np.int)
+            if train_args.etype[idx].endswith("p") and not train_args.etype[
+                idx
+            ].startswith("vgg"):
+                ss = train_args.subsample[idx].split("_")
+                for j in range(min(train_args.elayers[idx] + 1, len(ss))):
+                    subsample[j] = int(ss[j])
+            else:
+                logging.warning(
+                    "Encoder %d: Subsampling is not performed for vgg*. "
+                    "It is performed in max pooling layers at CNN.",
+                    idx + 1,
+                )
+            logging.info("subsample: " + " ".join([str(x) for x in subsample]))
+            subsample_list.append(subsample)
+        return subsample_list
+
+    else:
+        raise ValueError("Invalid options: mode={}, arch={}".format(mode, arch))
+
+
+def rename_state_dict(
+    old_prefix: str, new_prefix: str, state_dict: Dict[str, torch.Tensor]
+):
+    """Replace keys of old prefix with new prefix in state dict."""
+    # need this list not to break the dict iterator
+    old_keys = [k for k in state_dict if k.startswith(old_prefix)]
+    if len(old_keys) > 0:
+        logging.warning(f"Rename: {old_prefix} -> {new_prefix}")
+    for k in old_keys:
+        v = state_dict.pop(k)
+        new_k = k.replace(old_prefix, new_prefix)
+        state_dict[new_k] = v
+
+
+def get_activation(act):
+    """Return activation function."""
+    # Lazy load to avoid unused import
+    from espnet.nets.pytorch_backend.conformer.swish import Swish
+
+    activation_funcs = {
+        "hardtanh": torch.nn.Hardtanh,
+        "tanh": torch.nn.Tanh,
+        "relu": torch.nn.ReLU,
+        "selu": torch.nn.SELU,
+        "swish": Swish,
+    }
+
+    return activation_funcs[act]()
+
+
+class MLPHead(torch.nn.Module):
+    def __init__(self, idim, hdim, odim, norm="batchnorm"):
+        super(MLPHead, self).__init__()
+        self.norm = norm
+
+        self.fc1 = torch.nn.Linear(idim, hdim)
+        if norm == "batchnorm":
+            self.bn1 = torch.nn.BatchNorm1d(hdim)
+        elif norm == "layernorm":
+            self.norm1 = torch.nn.LayerNorm(hdim)
+        self.nonlin1 = torch.nn.ReLU(inplace=True)
+        self.fc2 = torch.nn.Linear( hdim, odim)
+
+    def forward(self, x):
+        x = self.fc1(x)
+        if self.norm == "batchnorm":
+            x = self.bn1(x.transpose(1,2)).transpose(1,2)
+        elif self.norm == "layernorm":
+            x = self.norm1(x)
+        x = self.nonlin1(x)
+        x = self.fc2(x)
+        return x
diff --git a/espnet/nets/pytorch_backend/transformer/__init__.py b/espnet/nets/pytorch_backend/transformer/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..b7f177368e62a5578b8706300e101f831a3972ac
--- /dev/null
+++ b/espnet/nets/pytorch_backend/transformer/__init__.py
@@ -0,0 +1 @@
+"""Initialize sub package."""
diff --git a/espnet/nets/pytorch_backend/transformer/add_sos_eos.py b/espnet/nets/pytorch_backend/transformer/add_sos_eos.py
new file mode 100644
index 0000000000000000000000000000000000000000..c550c5e58bc4525d7890b63b2b723e9495329016
--- /dev/null
+++ b/espnet/nets/pytorch_backend/transformer/add_sos_eos.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Shigeki Karita
+#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+"""Unility funcitons for Transformer."""
+
+import torch
+
+
+def add_sos_eos(ys_pad, sos, eos, ignore_id):
+    """Add <sos> and <eos> labels.
+
+    :param torch.Tensor ys_pad: batch of padded target sequences (B, Lmax)
+    :param int sos: index of <sos>
+    :param int eos: index of <eeos>
+    :param int ignore_id: index of padding
+    :return: padded tensor (B, Lmax)
+    :rtype: torch.Tensor
+    :return: padded tensor (B, Lmax)
+    :rtype: torch.Tensor
+    """
+    from espnet.nets.pytorch_backend.nets_utils import pad_list
+
+    _sos = ys_pad.new([sos])
+    _eos = ys_pad.new([eos])
+    ys = [y[y != ignore_id] for y in ys_pad]  # parse padded ys
+    ys_in = [torch.cat([_sos, y], dim=0) for y in ys]
+    ys_out = [torch.cat([y, _eos], dim=0) for y in ys]
+    return pad_list(ys_in, eos), pad_list(ys_out, ignore_id)
diff --git a/espnet/nets/pytorch_backend/transformer/attention.py b/espnet/nets/pytorch_backend/transformer/attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..888ce2bff2aa669af5f1f1ac9fc314d41a79d4e0
--- /dev/null
+++ b/espnet/nets/pytorch_backend/transformer/attention.py
@@ -0,0 +1,280 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Shigeki Karita
+#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+"""Multi-Head Attention layer definition."""
+
+import math
+
+import numpy
+import torch
+from torch import nn
+
+
+class MultiHeadedAttention(nn.Module):
+    """Multi-Head Attention layer.
+    Args:
+        n_head (int): The number of heads.
+        n_feat (int): The number of features.
+        dropout_rate (float): Dropout rate.
+    """
+
+    def __init__(self, n_head, n_feat, dropout_rate):
+        """Construct an MultiHeadedAttention object."""
+        super(MultiHeadedAttention, self).__init__()
+        assert n_feat % n_head == 0
+        # We assume d_v always equals d_k
+        self.d_k = n_feat // n_head
+        self.h = n_head
+        self.linear_q = nn.Linear(n_feat, n_feat)
+        self.linear_k = nn.Linear(n_feat, n_feat)
+        self.linear_v = nn.Linear(n_feat, n_feat)
+        self.linear_out = nn.Linear(n_feat, n_feat)
+        self.attn = None
+        self.dropout = nn.Dropout(p=dropout_rate)
+
+    def forward_qkv(self, query, key, value):
+        """Transform query, key and value.
+        Args:
+            query (torch.Tensor): Query tensor (#batch, time1, size).
+            key (torch.Tensor): Key tensor (#batch, time2, size).
+            value (torch.Tensor): Value tensor (#batch, time2, size).
+        Returns:
+            torch.Tensor: Transformed query tensor (#batch, n_head, time1, d_k).
+            torch.Tensor: Transformed key tensor (#batch, n_head, time2, d_k).
+            torch.Tensor: Transformed value tensor (#batch, n_head, time2, d_k).
+        """
+        n_batch = query.size(0)
+        q = self.linear_q(query).view(n_batch, -1, self.h, self.d_k)
+        k = self.linear_k(key).view(n_batch, -1, self.h, self.d_k)
+        v = self.linear_v(value).view(n_batch, -1, self.h, self.d_k)
+        q = q.transpose(1, 2)  # (batch, head, time1, d_k)
+        k = k.transpose(1, 2)  # (batch, head, time2, d_k)
+        v = v.transpose(1, 2)  # (batch, head, time2, d_k)
+
+        return q, k, v
+
+    def forward_attention(self, value, scores, mask, rtn_attn=False):
+        """Compute attention context vector.
+        Args:
+            value (torch.Tensor): Transformed value (#batch, n_head, time2, d_k).
+            scores (torch.Tensor): Attention score (#batch, n_head, time1, time2).
+            mask (torch.Tensor): Mask (#batch, 1, time2) or (#batch, time1, time2).
+            rtn_attn (boolean): Flag of return attention score
+        Returns:
+            torch.Tensor: Transformed value (#batch, time1, d_model)
+                weighted by the attention score (#batch, time1, time2).
+        """
+        n_batch = value.size(0)
+        if mask is not None:
+            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
+            min_value = float(
+                numpy.finfo(torch.tensor(0, dtype=scores.dtype).numpy().dtype).min
+            )
+            scores = scores.masked_fill(mask, min_value)
+            self.attn = torch.softmax(scores, dim=-1).masked_fill(
+                mask, 0.0
+            )  # (batch, head, time1, time2)
+        else:
+            self.attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
+
+        p_attn = self.dropout(self.attn)
+        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
+        x = (
+            x.transpose(1, 2).contiguous().view(n_batch, -1, self.h * self.d_k)
+        )  # (batch, time1, d_model)
+        if rtn_attn:
+            return self.linear_out(x), self.attn
+        return self.linear_out(x)  # (batch, time1, d_model)
+
+    def forward(self, query, key, value, mask, rtn_attn=False):
+        """Compute scaled dot product attention.
+        Args:
+            query (torch.Tensor): Query tensor (#batch, time1, size).
+            key (torch.Tensor): Key tensor (#batch, time2, size).
+            value (torch.Tensor): Value tensor (#batch, time2, size).
+            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
+                (#batch, time1, time2).
+            rtn_attn (boolean): Flag of return attention score
+        Returns:
+            torch.Tensor: Output tensor (#batch, time1, d_model).
+        """
+        q, k, v = self.forward_qkv(query, key, value)
+        scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)
+        return self.forward_attention(v, scores, mask, rtn_attn)
+
+
+class LegacyRelPositionMultiHeadedAttention(MultiHeadedAttention):
+    """Multi-Head Attention layer with relative position encoding (old version).
+    Details can be found in https://github.com/espnet/espnet/pull/2816.
+    Paper: https://arxiv.org/abs/1901.02860
+    Args:
+        n_head (int): The number of heads.
+        n_feat (int): The number of features.
+        dropout_rate (float): Dropout rate.
+        zero_triu (bool): Whether to zero the upper triangular part of attention matrix.
+    """
+
+    def __init__(self, n_head, n_feat, dropout_rate, zero_triu=False):
+        """Construct an RelPositionMultiHeadedAttention object."""
+        super().__init__(n_head, n_feat, dropout_rate)
+        self.zero_triu = zero_triu
+        # linear transformation for positional encoding
+        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
+        # these two learnable bias are used in matrix c and matrix d
+        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
+        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
+        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
+        torch.nn.init.xavier_uniform_(self.pos_bias_u)
+        torch.nn.init.xavier_uniform_(self.pos_bias_v)
+
+    def rel_shift(self, x):
+        """Compute relative positional encoding.
+        Args:
+            x (torch.Tensor): Input tensor (batch, head, time1, time2).
+        Returns:
+            torch.Tensor: Output tensor.
+        """
+        zero_pad = torch.zeros((*x.size()[:3], 1), device=x.device, dtype=x.dtype)
+        x_padded = torch.cat([zero_pad, x], dim=-1)
+
+        x_padded = x_padded.view(*x.size()[:2], x.size(3) + 1, x.size(2))
+        x = x_padded[:, :, 1:].view_as(x)
+
+        if self.zero_triu:
+            ones = torch.ones((x.size(2), x.size(3)))
+            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
+
+        return x
+
+    def forward(self, query, key, value, pos_emb, mask):
+        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
+        Args:
+            query (torch.Tensor): Query tensor (#batch, time1, size).
+            key (torch.Tensor): Key tensor (#batch, time2, size).
+            value (torch.Tensor): Value tensor (#batch, time2, size).
+            pos_emb (torch.Tensor): Positional embedding tensor (#batch, time1, size).
+            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
+                (#batch, time1, time2).
+        Returns:
+            torch.Tensor: Output tensor (#batch, time1, d_model).
+        """
+        q, k, v = self.forward_qkv(query, key, value)
+        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
+
+        n_batch_pos = pos_emb.size(0)
+        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
+        p = p.transpose(1, 2)  # (batch, head, time1, d_k)
+
+        # (batch, head, time1, d_k)
+        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
+        # (batch, head, time1, d_k)
+        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
+
+        # compute attention score
+        # first compute matrix a and matrix c
+        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
+        # (batch, head, time1, time2)
+        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
+
+        # compute matrix b and matrix d
+        # (batch, head, time1, time1)
+        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
+        matrix_bd = self.rel_shift(matrix_bd)
+
+        scores = (matrix_ac + matrix_bd) / math.sqrt(
+            self.d_k
+        )  # (batch, head, time1, time2)
+
+        return self.forward_attention(v, scores, mask)
+
+
+class RelPositionMultiHeadedAttention(MultiHeadedAttention):
+    """Multi-Head Attention layer with relative position encoding (new implementation).
+    Details can be found in https://github.com/espnet/espnet/pull/2816.
+    Paper: https://arxiv.org/abs/1901.02860
+    Args:
+        n_head (int): The number of heads.
+        n_feat (int): The number of features.
+        dropout_rate (float): Dropout rate.
+        zero_triu (bool): Whether to zero the upper triangular part of attention matrix.
+    """
+
+    def __init__(self, n_head, n_feat, dropout_rate, zero_triu=False):
+        """Construct an RelPositionMultiHeadedAttention object."""
+        super().__init__(n_head, n_feat, dropout_rate)
+        self.zero_triu = zero_triu
+        # linear transformation for positional encoding
+        self.linear_pos = nn.Linear(n_feat, n_feat, bias=False)
+        # these two learnable bias are used in matrix c and matrix d
+        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
+        self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
+        self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
+        torch.nn.init.xavier_uniform_(self.pos_bias_u)
+        torch.nn.init.xavier_uniform_(self.pos_bias_v)
+
+    def rel_shift(self, x):
+        """Compute relative positional encoding.
+        Args:
+            x (torch.Tensor): Input tensor (batch, head, time1, 2*time1-1).
+            time1 means the length of query vector.
+        Returns:
+            torch.Tensor: Output tensor.
+        """
+        zero_pad = torch.zeros((*x.size()[:3], 1), device=x.device, dtype=x.dtype)
+        x_padded = torch.cat([zero_pad, x], dim=-1)
+
+        x_padded = x_padded.view(*x.size()[:2], x.size(3) + 1, x.size(2))
+        x = x_padded[:, :, 1:].view_as(x)[
+            :, :, :, : x.size(-1) // 2 + 1
+        ]  # only keep the positions from 0 to time2
+
+        if self.zero_triu:
+            ones = torch.ones((x.size(2), x.size(3)), device=x.device)
+            x = x * torch.tril(ones, x.size(3) - x.size(2))[None, None, :, :]
+
+        return x
+
+    def forward(self, query, key, value, pos_emb, mask):
+        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
+        Args:
+            query (torch.Tensor): Query tensor (#batch, time1, size).
+            key (torch.Tensor): Key tensor (#batch, time2, size).
+            value (torch.Tensor): Value tensor (#batch, time2, size).
+            pos_emb (torch.Tensor): Positional embedding tensor
+                (#batch, 2*time1-1, size).
+            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
+                (#batch, time1, time2).
+        Returns:
+            torch.Tensor: Output tensor (#batch, time1, d_model).
+        """
+        q, k, v = self.forward_qkv(query, key, value)
+        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
+
+        n_batch_pos = pos_emb.size(0)
+        p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.d_k)
+        p = p.transpose(1, 2)  # (batch, head, 2*time1-1, d_k)
+
+        # (batch, head, time1, d_k)
+        q_with_bias_u = (q + self.pos_bias_u).transpose(1, 2)
+        # (batch, head, time1, d_k)
+        q_with_bias_v = (q + self.pos_bias_v).transpose(1, 2)
+
+        # compute attention score
+        # first compute matrix a and matrix c
+        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
+        # (batch, head, time1, time2)
+        matrix_ac = torch.matmul(q_with_bias_u, k.transpose(-2, -1))
+
+        # compute matrix b and matrix d
+        # (batch, head, time1, 2*time1-1)
+        matrix_bd = torch.matmul(q_with_bias_v, p.transpose(-2, -1))
+        matrix_bd = self.rel_shift(matrix_bd)
+
+        scores = (matrix_ac + matrix_bd) / math.sqrt(
+            self.d_k
+        )  # (batch, head, time1, time2)
+
+        return self.forward_attention(v, scores, mask)
diff --git a/espnet/nets/pytorch_backend/transformer/convolution.py b/espnet/nets/pytorch_backend/transformer/convolution.py
new file mode 100644
index 0000000000000000000000000000000000000000..d659b0307b00521977af2b7df0abad4feaef2376
--- /dev/null
+++ b/espnet/nets/pytorch_backend/transformer/convolution.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2020 Johns Hopkins University (Shinji Watanabe)
+#                Northwestern Polytechnical University (Pengcheng Guo)
+#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+"""ConvolutionModule definition."""
+
+import torch
+from torch import nn
+
+
+class ConvolutionModule(nn.Module):
+    """ConvolutionModule in Conformer model.
+
+    :param int channels: channels of cnn
+    :param int kernel_size: kernerl size of cnn
+
+    """
+
+    def __init__(self, channels, kernel_size, bias=True):
+        """Construct an ConvolutionModule object."""
+        super(ConvolutionModule, self).__init__()
+        # kernerl_size should be a odd number for 'SAME' padding
+        assert (kernel_size - 1) % 2 == 0
+
+        self.pointwise_cov1 = nn.Conv1d(
+            channels, 2 * channels, kernel_size=1, stride=1, padding=0, bias=bias,
+        )
+        self.depthwise_conv = nn.Conv1d(
+            channels,
+            channels,
+            kernel_size,
+            stride=1,
+            padding=(kernel_size - 1) // 2,
+            groups=channels,
+            bias=bias,
+        )
+        self.norm = nn.BatchNorm1d(channels)
+        self.pointwise_cov2 = nn.Conv1d(
+            channels, channels, kernel_size=1, stride=1, padding=0, bias=bias,
+        )
+        self.activation = Swish()
+
+    def forward(self, x):
+        """Compute covolution module.
+
+        :param torch.Tensor x: (batch, time, size)
+        :return torch.Tensor: convoluted `value` (batch, time, d_model)
+        """
+        # exchange the temporal dimension and the feature dimension
+        x = x.transpose(1, 2)
+
+        # GLU mechanism
+        x = self.pointwise_cov1(x)  # (batch, 2*channel, dim)
+        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)
+
+        # 1D Depthwise Conv
+        x = self.depthwise_conv(x)
+        x = self.activation(self.norm(x))
+
+        x = self.pointwise_cov2(x)
+
+        return x.transpose(1, 2)
+
+
+class Swish(nn.Module):
+    """Construct an Swish object."""
+
+    def forward(self, x):
+        """Return Swich activation function."""
+        return x * torch.sigmoid(x)
diff --git a/espnet/nets/pytorch_backend/transformer/decoder.py b/espnet/nets/pytorch_backend/transformer/decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..a0cd052ef97dc13dcf560ac3dd8d2e6763c9cb51
--- /dev/null
+++ b/espnet/nets/pytorch_backend/transformer/decoder.py
@@ -0,0 +1,229 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Shigeki Karita
+#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+"""Decoder definition."""
+
+from typing import Any
+from typing import List
+from typing import Tuple
+
+import torch
+
+from espnet.nets.pytorch_backend.nets_utils import rename_state_dict
+from espnet.nets.pytorch_backend.transformer.attention import MultiHeadedAttention
+from espnet.nets.pytorch_backend.transformer.decoder_layer import DecoderLayer
+from espnet.nets.pytorch_backend.transformer.embedding import PositionalEncoding
+from espnet.nets.pytorch_backend.transformer.layer_norm import LayerNorm
+from espnet.nets.pytorch_backend.transformer.mask import subsequent_mask
+from espnet.nets.pytorch_backend.transformer.positionwise_feed_forward import (
+    PositionwiseFeedForward,  # noqa: H301
+)
+from espnet.nets.pytorch_backend.transformer.repeat import repeat
+from espnet.nets.scorer_interface import BatchScorerInterface
+
+
+def _pre_hook(
+    state_dict,
+    prefix,
+    local_metadata,
+    strict,
+    missing_keys,
+    unexpected_keys,
+    error_msgs,
+):
+    # https://github.com/espnet/espnet/commit/3d422f6de8d4f03673b89e1caef698745ec749ea#diff-bffb1396f038b317b2b64dd96e6d3563
+    rename_state_dict(prefix + "output_norm.", prefix + "after_norm.", state_dict)
+
+
+class Decoder(BatchScorerInterface, torch.nn.Module):
+    """Transfomer decoder module.
+
+    :param int odim: output dim
+    :param int attention_dim: dimention of attention
+    :param int attention_heads: the number of heads of multi head attention
+    :param int linear_units: the number of units of position-wise feed forward
+    :param int num_blocks: the number of decoder blocks
+    :param float dropout_rate: dropout rate
+    :param float attention_dropout_rate: dropout rate for attention
+    :param str or torch.nn.Module input_layer: input layer type
+    :param bool use_output_layer: whether to use output layer
+    :param class pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
+    :param bool normalize_before: whether to use layer_norm before the first block
+    :param bool concat_after: whether to concat attention layer's input and output
+        if True, additional linear will be applied.
+        i.e. x -> x + linear(concat(x, att(x)))
+        if False, no additional linear will be applied. i.e. x -> x + att(x)
+    """
+
+    def __init__(
+        self,
+        odim,
+        attention_dim=256,
+        attention_heads=4,
+        linear_units=2048,
+        num_blocks=6,
+        dropout_rate=0.1,
+        positional_dropout_rate=0.1,
+        self_attention_dropout_rate=0.0,
+        src_attention_dropout_rate=0.0,
+        input_layer="embed",
+        use_output_layer=True,
+        pos_enc_class=PositionalEncoding,
+        normalize_before=True,
+        concat_after=False,
+    ):
+        """Construct an Decoder object."""
+        torch.nn.Module.__init__(self)
+        self._register_load_state_dict_pre_hook(_pre_hook)
+        if input_layer == "embed":
+            self.embed = torch.nn.Sequential(
+                torch.nn.Embedding(odim, attention_dim),
+                pos_enc_class(attention_dim, positional_dropout_rate),
+            )
+        elif input_layer == "linear":
+            self.embed = torch.nn.Sequential(
+                torch.nn.Linear(odim, attention_dim),
+                torch.nn.LayerNorm(attention_dim),
+                torch.nn.Dropout(dropout_rate),
+                torch.nn.ReLU(),
+                pos_enc_class(attention_dim, positional_dropout_rate),
+            )
+        elif isinstance(input_layer, torch.nn.Module):
+            self.embed = torch.nn.Sequential(
+                input_layer, pos_enc_class(attention_dim, positional_dropout_rate)
+            )
+        else:
+            raise NotImplementedError("only `embed` or torch.nn.Module is supported.")
+        self.normalize_before = normalize_before
+        self.decoders = repeat(
+            num_blocks,
+            lambda: DecoderLayer(
+                attention_dim,
+                MultiHeadedAttention(
+                    attention_heads, attention_dim, self_attention_dropout_rate
+                ),
+                MultiHeadedAttention(
+                    attention_heads, attention_dim, src_attention_dropout_rate
+                ),
+                PositionwiseFeedForward(attention_dim, linear_units, dropout_rate),
+                dropout_rate,
+                normalize_before,
+                concat_after,
+            ),
+        )
+        if self.normalize_before:
+            self.after_norm = LayerNorm(attention_dim)
+        if use_output_layer:
+            self.output_layer = torch.nn.Linear(attention_dim, odim)
+        else:
+            self.output_layer = None
+
+    def forward(self, tgt, tgt_mask, memory, memory_mask):
+        """Forward decoder.
+        :param torch.Tensor tgt: input token ids, int64 (batch, maxlen_out)
+                                 if input_layer == "embed"
+                                 input tensor (batch, maxlen_out, #mels)
+                                 in the other cases
+        :param torch.Tensor tgt_mask: input token mask,  (batch, maxlen_out)
+                                      dtype=torch.uint8 in PyTorch 1.2-
+                                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
+        :param torch.Tensor memory: encoded memory, float32  (batch, maxlen_in, feat)
+        :param torch.Tensor memory_mask: encoded memory mask,  (batch, maxlen_in)
+                                         dtype=torch.uint8 in PyTorch 1.2-
+                                         dtype=torch.bool in PyTorch 1.2+ (include 1.2)
+        :return x: decoded token score before softmax (batch, maxlen_out, token)
+                   if use_output_layer is True,
+                   final block outputs (batch, maxlen_out, attention_dim)
+                   in the other cases
+        :rtype: torch.Tensor
+        :return tgt_mask: score mask before softmax (batch, maxlen_out)
+        :rtype: torch.Tensor
+        """
+        x = self.embed(tgt)
+        x, tgt_mask, memory, memory_mask = self.decoders(
+            x, tgt_mask, memory, memory_mask
+        )
+        if self.normalize_before:
+            x = self.after_norm(x)
+        if self.output_layer is not None:
+            x = self.output_layer(x)
+        return x, tgt_mask
+
+    def forward_one_step(self, tgt, tgt_mask, memory, memory_mask=None, cache=None):
+        """Forward one step.
+        :param torch.Tensor tgt: input token ids, int64 (batch, maxlen_out)
+        :param torch.Tensor tgt_mask: input token mask,  (batch, maxlen_out)
+                                      dtype=torch.uint8 in PyTorch 1.2-
+                                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
+        :param torch.Tensor memory: encoded memory, float32  (batch, maxlen_in, feat)
+        :param List[torch.Tensor] cache:
+            cached output list of (batch, max_time_out-1, size)
+        :return y, cache: NN output value and cache per `self.decoders`.
+            `y.shape` is (batch, maxlen_out, token)
+        :rtype: Tuple[torch.Tensor, List[torch.Tensor]]
+        """
+        x = self.embed(tgt)
+        if cache is None:
+            cache = [None] * len(self.decoders)
+        new_cache = []
+        for c, decoder in zip(cache, self.decoders):
+            x, tgt_mask, memory, memory_mask = decoder(
+                x, tgt_mask, memory, memory_mask, cache=c
+            )
+            new_cache.append(x)
+
+        if self.normalize_before:
+            y = self.after_norm(x[:, -1])
+        else:
+            y = x[:, -1]
+        if self.output_layer is not None:
+            y = torch.log_softmax(self.output_layer(y), dim=-1)
+
+        return y, new_cache
+
+    # beam search API (see ScorerInterface)
+    def score(self, ys, state, x):
+        """Score."""
+        ys_mask = subsequent_mask(len(ys), device=x.device).unsqueeze(0)
+        logp, state = self.forward_one_step(
+            ys.unsqueeze(0), ys_mask, x.unsqueeze(0), cache=state
+        )
+        return logp.squeeze(0), state
+
+    # batch beam search API (see BatchScorerInterface)
+    def batch_score(
+        self, ys: torch.Tensor, states: List[Any], xs: torch.Tensor
+    ) -> Tuple[torch.Tensor, List[Any]]:
+        """Score new token batch (required).
+        Args:
+            ys (torch.Tensor): torch.int64 prefix tokens (n_batch, ylen).
+            states (List[Any]): Scorer states for prefix tokens.
+            xs (torch.Tensor):
+                The encoder feature that generates ys (n_batch, xlen, n_feat).
+        Returns:
+            tuple[torch.Tensor, List[Any]]: Tuple of
+                batchfied scores for next token with shape of `(n_batch, n_vocab)`
+                and next state list for ys.
+        """
+        # merge states
+        n_batch = len(ys)
+        n_layers = len(self.decoders)
+        if states[0] is None:
+            batch_state = None
+        else:
+            # transpose state of [batch, layer] into [layer, batch]
+            batch_state = [
+                torch.stack([states[b][l] for b in range(n_batch)])
+                for l in range(n_layers)
+            ]
+
+        # batch decoding
+        ys_mask = subsequent_mask(ys.size(-1), device=xs.device).unsqueeze(0)
+        logp, states = self.forward_one_step(ys, ys_mask, xs, cache=batch_state)
+
+        # transpose state of [layer, batch] into [batch, layer]
+        state_list = [[states[l][b] for l in range(n_layers)] for b in range(n_batch)]
+        return logp, state_list
diff --git a/espnet/nets/pytorch_backend/transformer/decoder_layer.py b/espnet/nets/pytorch_backend/transformer/decoder_layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..c9658d9c603d3c8bc8a8a68d9d6c9904c03886ba
--- /dev/null
+++ b/espnet/nets/pytorch_backend/transformer/decoder_layer.py
@@ -0,0 +1,121 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Shigeki Karita
+#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+"""Decoder self-attention layer definition."""
+
+import torch
+from torch import nn
+
+from espnet.nets.pytorch_backend.transformer.layer_norm import LayerNorm
+
+
+class DecoderLayer(nn.Module):
+    """Single decoder layer module.
+    :param int size: input dim
+    :param espnet.nets.pytorch_backend.transformer.attention.MultiHeadedAttention
+        self_attn: self attention module
+    :param espnet.nets.pytorch_backend.transformer.attention.MultiHeadedAttention
+        src_attn: source attention module
+    :param espnet.nets.pytorch_backend.transformer.positionwise_feed_forward.
+        PositionwiseFeedForward feed_forward: feed forward layer module
+    :param float dropout_rate: dropout rate
+    :param bool normalize_before: whether to use layer_norm before the first block
+    :param bool concat_after: whether to concat attention layer's input and output
+        if True, additional linear will be applied.
+        i.e. x -> x + linear(concat(x, att(x)))
+        if False, no additional linear will be applied. i.e. x -> x + att(x)
+    """
+
+    def __init__(
+        self,
+        size,
+        self_attn,
+        src_attn,
+        feed_forward,
+        dropout_rate,
+        normalize_before=True,
+        concat_after=False,
+    ):
+        """Construct an DecoderLayer object."""
+        super(DecoderLayer, self).__init__()
+        self.size = size
+        self.self_attn = self_attn
+        self.src_attn = src_attn
+        self.feed_forward = feed_forward
+        self.norm1 = LayerNorm(size)
+        self.norm2 = LayerNorm(size)
+        self.norm3 = LayerNorm(size)
+        self.dropout = nn.Dropout(dropout_rate)
+        self.normalize_before = normalize_before
+        self.concat_after = concat_after
+        if self.concat_after:
+            self.concat_linear1 = nn.Linear(size + size, size)
+            self.concat_linear2 = nn.Linear(size + size, size)
+
+    def forward(self, tgt, tgt_mask, memory, memory_mask, cache=None):
+        """Compute decoded features.
+        Args:
+            tgt (torch.Tensor):
+                decoded previous target features (batch, max_time_out, size)
+            tgt_mask (torch.Tensor): mask for x (batch, max_time_out)
+            memory (torch.Tensor): encoded source features (batch, max_time_in, size)
+            memory_mask (torch.Tensor): mask for memory (batch, max_time_in)
+            cache (torch.Tensor): cached output (batch, max_time_out-1, size)
+        """
+        residual = tgt
+        if self.normalize_before:
+            tgt = self.norm1(tgt)
+
+        if cache is None:
+            tgt_q = tgt
+            tgt_q_mask = tgt_mask
+        else:
+            # compute only the last frame query keeping dim: max_time_out -> 1
+            assert cache.shape == (
+                tgt.shape[0],
+                tgt.shape[1] - 1,
+                self.size,
+            ), f"{cache.shape} == {(tgt.shape[0], tgt.shape[1] - 1, self.size)}"
+            tgt_q = tgt[:, -1:, :]
+            residual = residual[:, -1:, :]
+            tgt_q_mask = None
+            if tgt_mask is not None:
+                tgt_q_mask = tgt_mask[:, -1:, :]
+
+        if self.concat_after:
+            tgt_concat = torch.cat(
+                (tgt_q, self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)), dim=-1
+            )
+            x = residual + self.concat_linear1(tgt_concat)
+        else:
+            x = residual + self.dropout(self.self_attn(tgt_q, tgt, tgt, tgt_q_mask))
+        if not self.normalize_before:
+            x = self.norm1(x)
+
+        residual = x
+        if self.normalize_before:
+            x = self.norm2(x)
+        if self.concat_after:
+            x_concat = torch.cat(
+                (x, self.src_attn(x, memory, memory, memory_mask)), dim=-1
+            )
+            x = residual + self.concat_linear2(x_concat)
+        else:
+            x = residual + self.dropout(self.src_attn(x, memory, memory, memory_mask))
+        if not self.normalize_before:
+            x = self.norm2(x)
+
+        residual = x
+        if self.normalize_before:
+            x = self.norm3(x)
+        x = residual + self.dropout(self.feed_forward(x))
+        if not self.normalize_before:
+            x = self.norm3(x)
+
+        if cache is not None:
+            x = torch.cat([cache, x], dim=1)
+
+        return x, tgt_mask, memory, memory_mask
diff --git a/espnet/nets/pytorch_backend/transformer/embedding.py b/espnet/nets/pytorch_backend/transformer/embedding.py
new file mode 100644
index 0000000000000000000000000000000000000000..82f59bbb75efbcc4f9c11448a2b0598e2b707993
--- /dev/null
+++ b/espnet/nets/pytorch_backend/transformer/embedding.py
@@ -0,0 +1,217 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Shigeki Karita
+#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+"""Positional Encoding Module."""
+
+import math
+
+import torch
+
+
+def _pre_hook(
+    state_dict,
+    prefix,
+    local_metadata,
+    strict,
+    missing_keys,
+    unexpected_keys,
+    error_msgs,
+):
+    """Perform pre-hook in load_state_dict for backward compatibility.
+    Note:
+        We saved self.pe until v.0.5.2 but we have omitted it later.
+        Therefore, we remove the item "pe" from `state_dict` for backward compatibility.
+    """
+    k = prefix + "pe"
+    if k in state_dict:
+        state_dict.pop(k)
+
+
+class PositionalEncoding(torch.nn.Module):
+    """Positional encoding.
+    Args:
+        d_model (int): Embedding dimension.
+        dropout_rate (float): Dropout rate.
+        max_len (int): Maximum input length.
+        reverse (bool): Whether to reverse the input position. Only for
+        the class LegacyRelPositionalEncoding. We remove it in the current
+        class RelPositionalEncoding.
+    """
+
+    def __init__(self, d_model, dropout_rate, max_len=5000, reverse=False):
+        """Construct an PositionalEncoding object."""
+        super(PositionalEncoding, self).__init__()
+        self.d_model = d_model
+        self.reverse = reverse
+        self.xscale = math.sqrt(self.d_model)
+        self.dropout = torch.nn.Dropout(p=dropout_rate)
+        self.pe = None
+        self.extend_pe(torch.tensor(0.0).expand(1, max_len))
+        self._register_load_state_dict_pre_hook(_pre_hook)
+
+    def extend_pe(self, x):
+        """Reset the positional encodings."""
+        if self.pe is not None:
+            if self.pe.size(1) >= x.size(1):
+                if self.pe.dtype != x.dtype or self.pe.device != x.device:
+                    self.pe = self.pe.to(dtype=x.dtype, device=x.device)
+                return
+        pe = torch.zeros(x.size(1), self.d_model)
+        if self.reverse:
+            position = torch.arange(
+                x.size(1) - 1, -1, -1.0, dtype=torch.float32
+            ).unsqueeze(1)
+        else:
+            position = torch.arange(0, x.size(1), dtype=torch.float32).unsqueeze(1)
+        div_term = torch.exp(
+            torch.arange(0, self.d_model, 2, dtype=torch.float32)
+            * -(math.log(10000.0) / self.d_model)
+        )
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+        pe = pe.unsqueeze(0)
+        self.pe = pe.to(device=x.device, dtype=x.dtype)
+
+    def forward(self, x: torch.Tensor):
+        """Add positional encoding.
+        Args:
+            x (torch.Tensor): Input tensor (batch, time, `*`).
+        Returns:
+            torch.Tensor: Encoded tensor (batch, time, `*`).
+        """
+        self.extend_pe(x)
+        x = x * self.xscale + self.pe[:, : x.size(1)]
+        return self.dropout(x)
+
+
+class ScaledPositionalEncoding(PositionalEncoding):
+    """Scaled positional encoding module.
+    See Sec. 3.2  https://arxiv.org/abs/1809.08895
+    Args:
+        d_model (int): Embedding dimension.
+        dropout_rate (float): Dropout rate.
+        max_len (int): Maximum input length.
+    """
+
+    def __init__(self, d_model, dropout_rate, max_len=5000):
+        """Initialize class."""
+        super().__init__(d_model=d_model, dropout_rate=dropout_rate, max_len=max_len)
+        self.alpha = torch.nn.Parameter(torch.tensor(1.0))
+
+    def reset_parameters(self):
+        """Reset parameters."""
+        self.alpha.data = torch.tensor(1.0)
+
+    def forward(self, x):
+        """Add positional encoding.
+        Args:
+            x (torch.Tensor): Input tensor (batch, time, `*`).
+        Returns:
+            torch.Tensor: Encoded tensor (batch, time, `*`).
+        """
+        self.extend_pe(x)
+        x = x + self.alpha * self.pe[:, : x.size(1)]
+        return self.dropout(x)
+
+
+class LegacyRelPositionalEncoding(PositionalEncoding):
+    """Relative positional encoding module (old version).
+    Details can be found in https://github.com/espnet/espnet/pull/2816.
+    See : Appendix B in https://arxiv.org/abs/1901.02860
+    Args:
+        d_model (int): Embedding dimension.
+        dropout_rate (float): Dropout rate.
+        max_len (int): Maximum input length.
+    """
+
+    def __init__(self, d_model, dropout_rate, max_len=5000):
+        """Initialize class."""
+        super().__init__(
+            d_model=d_model,
+            dropout_rate=dropout_rate,
+            max_len=max_len,
+            reverse=True,
+        )
+
+    def forward(self, x):
+        """Compute positional encoding.
+        Args:
+            x (torch.Tensor): Input tensor (batch, time, `*`).
+        Returns:
+            torch.Tensor: Encoded tensor (batch, time, `*`).
+            torch.Tensor: Positional embedding tensor (1, time, `*`).
+        """
+        self.extend_pe(x)
+        x = x * self.xscale
+        pos_emb = self.pe[:, : x.size(1)]
+        return self.dropout(x), self.dropout(pos_emb)
+
+
+class RelPositionalEncoding(torch.nn.Module):
+    """Relative positional encoding module (new implementation).
+    Details can be found in https://github.com/espnet/espnet/pull/2816.
+    See : Appendix B in https://arxiv.org/abs/1901.02860
+    Args:
+        d_model (int): Embedding dimension.
+        dropout_rate (float): Dropout rate.
+        max_len (int): Maximum input length.
+    """
+
+    def __init__(self, d_model, dropout_rate, max_len=5000):
+        """Construct an PositionalEncoding object."""
+        super(RelPositionalEncoding, self).__init__()
+        self.d_model = d_model
+        self.xscale = math.sqrt(self.d_model)
+        self.dropout = torch.nn.Dropout(p=dropout_rate)
+        self.pe = None
+        self.extend_pe(torch.tensor(0.0).expand(1, max_len))
+
+    def extend_pe(self, x):
+        """Reset the positional encodings."""
+        if self.pe is not None:
+            # self.pe contains both positive and negative parts
+            # the length of self.pe is 2 * input_len - 1
+            if self.pe.size(1) >= x.size(1) * 2 - 1:
+                if self.pe.dtype != x.dtype or self.pe.device != x.device:
+                    self.pe = self.pe.to(dtype=x.dtype, device=x.device)
+                return
+        # Suppose `i` means to the position of query vecotr and `j` means the
+        # position of key vector. We use position relative positions when keys
+        # are to the left (i>j) and negative relative positions otherwise (i<j).
+        pe_positive = torch.zeros(x.size(1), self.d_model)
+        pe_negative = torch.zeros(x.size(1), self.d_model)
+        position = torch.arange(0, x.size(1), dtype=torch.float32).unsqueeze(1)
+        div_term = torch.exp(
+            torch.arange(0, self.d_model, 2, dtype=torch.float32)
+            * -(math.log(10000.0) / self.d_model)
+        )
+        pe_positive[:, 0::2] = torch.sin(position * div_term)
+        pe_positive[:, 1::2] = torch.cos(position * div_term)
+        pe_negative[:, 0::2] = torch.sin(-1 * position * div_term)
+        pe_negative[:, 1::2] = torch.cos(-1 * position * div_term)
+
+        # Reserve the order of positive indices and concat both positive and
+        # negative indices. This is used to support the shifting trick
+        # as in https://arxiv.org/abs/1901.02860
+        pe_positive = torch.flip(pe_positive, [0]).unsqueeze(0)
+        pe_negative = pe_negative[1:].unsqueeze(0)
+        pe = torch.cat([pe_positive, pe_negative], dim=1)
+        self.pe = pe.to(device=x.device, dtype=x.dtype)
+
+    def forward(self, x: torch.Tensor):
+        """Add positional encoding.
+        Args:
+            x (torch.Tensor): Input tensor (batch, time, `*`).
+        Returns:
+            torch.Tensor: Encoded tensor (batch, time, `*`).
+        """
+        self.extend_pe(x)
+        x = x * self.xscale
+        pos_emb = self.pe[
+            :,
+            self.pe.size(1) // 2 - x.size(1) + 1 : self.pe.size(1) // 2 + x.size(1),
+        ]
+        return self.dropout(x), self.dropout(pos_emb)
diff --git a/espnet/nets/pytorch_backend/transformer/encoder.py b/espnet/nets/pytorch_backend/transformer/encoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..1f20c51707bcf440041639084f785362907f20be
--- /dev/null
+++ b/espnet/nets/pytorch_backend/transformer/encoder.py
@@ -0,0 +1,283 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Shigeki Karita
+#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+"""Encoder definition."""
+
+import torch
+
+from espnet.nets.pytorch_backend.nets_utils import rename_state_dict
+#from espnet.nets.pytorch_backend.transducer.vgg import VGG2L
+from espnet.nets.pytorch_backend.transformer.attention import (
+    MultiHeadedAttention,  # noqa: H301
+    RelPositionMultiHeadedAttention,  # noqa: H301
+    LegacyRelPositionMultiHeadedAttention,  # noqa: H301
+)
+from espnet.nets.pytorch_backend.transformer.convolution import ConvolutionModule
+from espnet.nets.pytorch_backend.transformer.embedding import (
+    PositionalEncoding,  # noqa: H301
+    RelPositionalEncoding,  # noqa: H301
+    LegacyRelPositionalEncoding, # noqa: H301
+)
+from espnet.nets.pytorch_backend.transformer.encoder_layer import EncoderLayer
+from espnet.nets.pytorch_backend.transformer.layer_norm import LayerNorm
+from espnet.nets.pytorch_backend.transformer.multi_layer_conv import Conv1dLinear
+from espnet.nets.pytorch_backend.transformer.multi_layer_conv import MultiLayeredConv1d
+from espnet.nets.pytorch_backend.transformer.positionwise_feed_forward import (
+    PositionwiseFeedForward,  # noqa: H301
+)
+from espnet.nets.pytorch_backend.transformer.repeat import repeat
+from espnet.nets.pytorch_backend.transformer.subsampling import Conv2dSubsampling
+from espnet.nets.pytorch_backend.transformer.raw_embeddings import VideoEmbedding
+from espnet.nets.pytorch_backend.transformer.raw_embeddings import AudioEmbedding
+from espnet.nets.pytorch_backend.backbones.conv3d_extractor  import Conv3dResNet
+from espnet.nets.pytorch_backend.backbones.conv1d_extractor  import Conv1dResNet
+
+
+def _pre_hook(
+    state_dict,
+    prefix,
+    local_metadata,
+    strict,
+    missing_keys,
+    unexpected_keys,
+    error_msgs,
+):
+    # https://github.com/espnet/espnet/commit/21d70286c354c66c0350e65dc098d2ee236faccc#diff-bffb1396f038b317b2b64dd96e6d3563
+    rename_state_dict(prefix + "input_layer.", prefix + "embed.", state_dict)
+    # https://github.com/espnet/espnet/commit/3d422f6de8d4f03673b89e1caef698745ec749ea#diff-bffb1396f038b317b2b64dd96e6d3563
+    rename_state_dict(prefix + "norm.", prefix + "after_norm.", state_dict)
+
+
+class Encoder(torch.nn.Module):
+    """Transformer encoder module.
+
+    :param int idim: input dim
+    :param int attention_dim: dimention of attention
+    :param int attention_heads: the number of heads of multi head attention
+    :param int linear_units: the number of units of position-wise feed forward
+    :param int num_blocks: the number of decoder blocks
+    :param float dropout_rate: dropout rate
+    :param float attention_dropout_rate: dropout rate in attention
+    :param float positional_dropout_rate: dropout rate after adding positional encoding
+    :param str or torch.nn.Module input_layer: input layer type
+    :param class pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
+    :param bool normalize_before: whether to use layer_norm before the first block
+    :param bool concat_after: whether to concat attention layer's input and output
+        if True, additional linear will be applied.
+        i.e. x -> x + linear(concat(x, att(x)))
+        if False, no additional linear will be applied. i.e. x -> x + att(x)
+    :param str positionwise_layer_type: linear of conv1d
+    :param int positionwise_conv_kernel_size: kernel size of positionwise conv1d layer
+    :param str encoder_attn_layer_type: encoder attention layer type
+    :param bool macaron_style: whether to use macaron style for positionwise layer
+    :param bool use_cnn_module: whether to use convolution module
+    :param bool zero_triu: whether to zero the upper triangular part of attention matrix
+    :param int cnn_module_kernel: kernerl size of convolution module
+    :param int padding_idx: padding_idx for input_layer=embed
+    """
+
+    def __init__(
+        self,
+        idim,
+        attention_dim=256,
+        attention_heads=4,
+        linear_units=2048,
+        num_blocks=6,
+        dropout_rate=0.1,
+        positional_dropout_rate=0.1,
+        attention_dropout_rate=0.0,
+        input_layer="conv2d",
+        pos_enc_class=PositionalEncoding,
+        normalize_before=True,
+        concat_after=False,
+        positionwise_layer_type="linear",
+        positionwise_conv_kernel_size=1,
+        macaron_style=False,
+        encoder_attn_layer_type="mha",
+        use_cnn_module=False,
+        zero_triu=False,
+        cnn_module_kernel=31,
+        padding_idx=-1,
+        relu_type="prelu",
+        a_upsample_ratio=1,
+    ):
+        """Construct an Encoder object."""
+        super(Encoder, self).__init__()
+        self._register_load_state_dict_pre_hook(_pre_hook)
+
+        if encoder_attn_layer_type == "rel_mha":
+            pos_enc_class = RelPositionalEncoding
+        elif encoder_attn_layer_type == "legacy_rel_mha":
+            pos_enc_class = LegacyRelPositionalEncoding
+        # -- frontend module.
+        if input_layer == "conv1d":
+            self.frontend = Conv1dResNet(
+                relu_type=relu_type,
+                a_upsample_ratio=a_upsample_ratio,
+            )
+        elif input_layer == "conv3d":
+            self.frontend = Conv3dResNet(relu_type=relu_type)
+        else:
+            self.frontend = None
+        # -- backend module.
+        if input_layer == "linear":
+            self.embed = torch.nn.Sequential(
+                torch.nn.Linear(idim, attention_dim),
+                torch.nn.LayerNorm(attention_dim),
+                torch.nn.Dropout(dropout_rate),
+                torch.nn.ReLU(),
+                pos_enc_class(attention_dim, positional_dropout_rate),
+            )
+        elif input_layer == "conv2d":
+            self.embed = Conv2dSubsampling(
+                idim,
+                attention_dim,
+                dropout_rate,
+                pos_enc_class(attention_dim, dropout_rate),
+            )
+        elif input_layer == "vgg2l":
+            self.embed = VGG2L(idim, attention_dim)
+        elif input_layer == "embed":
+            self.embed = torch.nn.Sequential(
+                torch.nn.Embedding(idim, attention_dim, padding_idx=padding_idx),
+                pos_enc_class(attention_dim, positional_dropout_rate),
+            )
+        elif isinstance(input_layer, torch.nn.Module):
+            self.embed = torch.nn.Sequential(
+                input_layer, pos_enc_class(attention_dim, positional_dropout_rate),
+            )
+        elif input_layer in ["conv1d", "conv3d"]:
+            self.embed = torch.nn.Sequential(
+                torch.nn.Linear(512, attention_dim),
+                pos_enc_class(attention_dim, positional_dropout_rate)
+            )
+        elif input_layer is None:
+            self.embed = torch.nn.Sequential(
+                pos_enc_class(attention_dim, positional_dropout_rate)
+            )
+        else:
+            raise ValueError("unknown input_layer: " + input_layer)
+        self.normalize_before = normalize_before
+        if positionwise_layer_type == "linear":
+            positionwise_layer = PositionwiseFeedForward
+            positionwise_layer_args = (attention_dim, linear_units, dropout_rate)
+        elif positionwise_layer_type == "conv1d":
+            positionwise_layer = MultiLayeredConv1d
+            positionwise_layer_args = (
+                attention_dim,
+                linear_units,
+                positionwise_conv_kernel_size,
+                dropout_rate,
+            )
+        elif positionwise_layer_type == "conv1d-linear":
+            positionwise_layer = Conv1dLinear
+            positionwise_layer_args = (
+                attention_dim,
+                linear_units,
+                positionwise_conv_kernel_size,
+                dropout_rate,
+            )
+        else:
+            raise NotImplementedError("Support only linear or conv1d.")
+
+        if encoder_attn_layer_type == "mha":
+            encoder_attn_layer = MultiHeadedAttention
+            encoder_attn_layer_args = (
+                attention_heads,
+                attention_dim,
+                attention_dropout_rate,
+            )
+        elif encoder_attn_layer_type == "legacy_rel_mha":
+            encoder_attn_layer = LegacyRelPositionMultiHeadedAttention
+            encoder_attn_layer_args = (
+                attention_heads,
+                attention_dim,
+                attention_dropout_rate,
+            )
+        elif encoder_attn_layer_type == "rel_mha":
+            encoder_attn_layer = RelPositionMultiHeadedAttention
+            encoder_attn_layer_args = (
+                attention_heads,
+                attention_dim,
+                attention_dropout_rate,
+                zero_triu,
+            )
+        else:
+            raise ValueError("unknown encoder_attn_layer: " + encoder_attn_layer)
+
+        convolution_layer = ConvolutionModule
+        convolution_layer_args = (attention_dim, cnn_module_kernel)
+
+        self.encoders = repeat(
+            num_blocks,
+            lambda: EncoderLayer(
+                attention_dim,
+                encoder_attn_layer(*encoder_attn_layer_args),
+                positionwise_layer(*positionwise_layer_args),
+                convolution_layer(*convolution_layer_args) if use_cnn_module else None,
+                dropout_rate,
+                normalize_before,
+                concat_after,
+                macaron_style,
+            ),
+        )
+        if self.normalize_before:
+            self.after_norm = LayerNorm(attention_dim)
+
+    def forward(self, xs, masks, extract_resnet_feats=False):
+        """Encode input sequence.
+
+        :param torch.Tensor xs: input tensor
+        :param torch.Tensor masks: input mask
+        :param str extract_features: the position for feature extraction
+        :return: position embedded tensor and mask
+        :rtype Tuple[torch.Tensor, torch.Tensor]:
+        """
+        if isinstance(self.frontend, (Conv1dResNet, Conv3dResNet)):
+            xs = self.frontend(xs)
+        if extract_resnet_feats:
+            return xs
+
+        if isinstance(self.embed, Conv2dSubsampling):
+            xs, masks = self.embed(xs, masks)
+        else:
+            xs = self.embed(xs)
+        
+        xs, masks = self.encoders(xs, masks)
+
+        if isinstance(xs, tuple):
+            xs = xs[0]
+
+        if self.normalize_before:
+            xs = self.after_norm(xs)
+
+        return xs, masks
+
+    def forward_one_step(self, xs, masks, cache=None):
+        """Encode input frame.
+
+        :param torch.Tensor xs: input tensor
+        :param torch.Tensor masks: input mask
+        :param List[torch.Tensor] cache: cache tensors
+        :return: position embedded tensor, mask and new cache
+        :rtype Tuple[torch.Tensor, torch.Tensor, List[torch.Tensor]]:
+        """
+        if isinstance(self.frontend, (Conv1dResNet, Conv3dResNet)):
+            xs = self.frontend(xs)
+
+        if isinstance(self.embed, Conv2dSubsampling):
+            xs, masks = self.embed(xs, masks)
+        else:
+            xs = self.embed(xs)
+        if cache is None:
+            cache = [None for _ in range(len(self.encoders))]
+        new_cache = []
+        for c, e in zip(cache, self.encoders):
+            xs, masks = e(xs, masks, cache=c)
+            new_cache.append(xs)
+        if self.normalize_before:
+            xs = self.after_norm(xs)
+        return xs, masks, new_cache
diff --git a/espnet/nets/pytorch_backend/transformer/encoder_layer.py b/espnet/nets/pytorch_backend/transformer/encoder_layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..528dd8cc92a7273fa57013d7f935c6f22ab0ca58
--- /dev/null
+++ b/espnet/nets/pytorch_backend/transformer/encoder_layer.py
@@ -0,0 +1,149 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Shigeki Karita
+#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+"""Encoder self-attention layer definition."""
+
+import copy
+import torch
+
+from torch import nn
+
+from espnet.nets.pytorch_backend.transformer.layer_norm import LayerNorm
+
+
+class EncoderLayer(nn.Module):
+    """Encoder layer module.
+
+    :param int size: input dim
+    :param espnet.nets.pytorch_backend.transformer.attention.
+        MultiHeadedAttention self_attn: self attention module
+        RelPositionMultiHeadedAttention self_attn: self attention module
+    :param espnet.nets.pytorch_backend.transformer.positionwise_feed_forward.
+        PositionwiseFeedForward feed_forward:
+        feed forward module
+    :param espnet.nets.pytorch_backend.transformer.convolution.
+        ConvolutionModule feed_foreard:
+        feed forward module
+    :param float dropout_rate: dropout rate
+    :param bool normalize_before: whether to use layer_norm before the first block
+    :param bool concat_after: whether to concat attention layer's input and output
+        if True, additional linear will be applied.
+        i.e. x -> x + linear(concat(x, att(x)))
+        if False, no additional linear will be applied. i.e. x -> x + att(x)
+    :param bool macaron_style: whether to use macaron style for PositionwiseFeedForward
+
+    """
+
+    def __init__(
+        self,
+        size,
+        self_attn,
+        feed_forward,
+        conv_module,
+        dropout_rate,
+        normalize_before=True,
+        concat_after=False,
+        macaron_style=False,
+    ):
+        """Construct an EncoderLayer object."""
+        super(EncoderLayer, self).__init__()
+        self.self_attn = self_attn
+        self.feed_forward = feed_forward
+        self.ff_scale = 1.0
+        self.conv_module = conv_module
+        self.macaron_style = macaron_style
+        self.norm_ff = LayerNorm(size)  # for the FNN module
+        self.norm_mha = LayerNorm(size)  # for the MHA module
+        if self.macaron_style:
+            self.feed_forward_macaron = copy.deepcopy(feed_forward)
+            self.ff_scale = 0.5
+            # for another FNN module in macaron style
+            self.norm_ff_macaron = LayerNorm(size)
+        if self.conv_module is not None:
+            self.norm_conv = LayerNorm(size)  # for the CNN module
+            self.norm_final = LayerNorm(size)  # for the final output of the block
+        self.dropout = nn.Dropout(dropout_rate)
+        self.size = size
+        self.normalize_before = normalize_before
+        self.concat_after = concat_after
+        if self.concat_after:
+            self.concat_linear = nn.Linear(size + size, size)
+
+    def forward(self, x_input, mask, cache=None):
+        """Compute encoded features.
+
+        :param torch.Tensor x_input: encoded source features (batch, max_time_in, size)
+        :param torch.Tensor mask: mask for x (batch, max_time_in)
+        :param torch.Tensor cache: cache for x (batch, max_time_in - 1, size)
+        :rtype: Tuple[torch.Tensor, torch.Tensor]
+        """
+        if isinstance(x_input, tuple):
+            x, pos_emb = x_input[0], x_input[1]
+        else:
+            x, pos_emb = x_input, None
+
+        # whether to use macaron style
+        if self.macaron_style:
+            residual = x
+            if self.normalize_before:
+                x = self.norm_ff_macaron(x)
+            x = residual + self.ff_scale * self.dropout(self.feed_forward_macaron(x))
+            if not self.normalize_before:
+                x = self.norm_ff_macaron(x)
+
+        # multi-headed self-attention module
+        residual = x
+        if self.normalize_before:
+            x = self.norm_mha(x)
+
+        if cache is None:
+            x_q = x
+        else:
+            assert cache.shape == (x.shape[0], x.shape[1] - 1, self.size)
+            x_q = x[:, -1:, :]
+            residual = residual[:, -1:, :]
+            mask = None if mask is None else mask[:, -1:, :]
+
+        if pos_emb is not None:
+            x_att = self.self_attn(x_q, x, x, pos_emb, mask)
+        else:
+            x_att = self.self_attn(x_q, x, x, mask)
+
+        if self.concat_after:
+            x_concat = torch.cat((x, x_att), dim=-1)
+            x = residual + self.concat_linear(x_concat)
+        else:
+            x = residual + self.dropout(x_att)
+        if not self.normalize_before:
+            x = self.norm_mha(x)
+
+        # convolution module
+        if self.conv_module is not None:
+            residual = x
+            if self.normalize_before:
+                x = self.norm_conv(x)
+            x = residual + self.dropout(self.conv_module(x))
+            if not self.normalize_before:
+                x = self.norm_conv(x)
+
+        # feed forward module
+        residual = x
+        if self.normalize_before:
+            x = self.norm_ff(x)
+        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))
+        if not self.normalize_before:
+            x = self.norm_ff(x)
+
+        if self.conv_module is not None:
+            x = self.norm_final(x)
+
+        if cache is not None:
+            x = torch.cat([cache, x], dim=1)
+
+        if pos_emb is not None:
+            return (x, pos_emb), mask
+        else:
+            return x, mask
diff --git a/espnet/nets/pytorch_backend/transformer/label_smoothing_loss.py b/espnet/nets/pytorch_backend/transformer/label_smoothing_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..0d8b30338a778da9ba27870d51db24afd10d9b24
--- /dev/null
+++ b/espnet/nets/pytorch_backend/transformer/label_smoothing_loss.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Shigeki Karita
+#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+"""Label smoothing module."""
+
+import torch
+from torch import nn
+
+
+class LabelSmoothingLoss(nn.Module):
+    """Label-smoothing loss.
+
+    :param int size: the number of class
+    :param int padding_idx: ignored class id
+    :param float smoothing: smoothing rate (0.0 means the conventional CE)
+    :param bool normalize_length: normalize loss by sequence length if True
+    :param torch.nn.Module criterion: loss function to be smoothed
+    """
+
+    def __init__(
+        self,
+        size,
+        padding_idx,
+        smoothing,
+        normalize_length=False,
+        criterion=nn.KLDivLoss(reduction="none"),
+    ):
+        """Construct an LabelSmoothingLoss object."""
+        super(LabelSmoothingLoss, self).__init__()
+        self.criterion = criterion
+        self.padding_idx = padding_idx
+        self.confidence = 1.0 - smoothing
+        self.smoothing = smoothing
+        self.size = size
+        self.true_dist = None
+        self.normalize_length = normalize_length
+
+    def forward(self, x, target):
+        """Compute loss between x and target.
+
+        :param torch.Tensor x: prediction (batch, seqlen, class)
+        :param torch.Tensor target:
+            target signal masked with self.padding_id (batch, seqlen)
+        :return: scalar float value
+        :rtype torch.Tensor
+        """
+        assert x.size(2) == self.size
+        batch_size = x.size(0)
+        x = x.view(-1, self.size)
+        target = target.view(-1)
+        with torch.no_grad():
+            true_dist = x.clone()
+            true_dist.fill_(self.smoothing / (self.size - 1))
+            ignore = target == self.padding_idx  # (B,)
+            total = len(target) - ignore.sum().item()
+            target = target.masked_fill(ignore, 0)  # avoid -1 index
+            true_dist.scatter_(1, target.unsqueeze(1), self.confidence)
+        kl = self.criterion(torch.log_softmax(x, dim=1), true_dist)
+        denom = total if self.normalize_length else batch_size
+        return kl.masked_fill(ignore.unsqueeze(1), 0).sum() / denom
diff --git a/espnet/nets/pytorch_backend/transformer/layer_norm.py b/espnet/nets/pytorch_backend/transformer/layer_norm.py
new file mode 100644
index 0000000000000000000000000000000000000000..db8be30ff70554edb179109037665e51c04510ec
--- /dev/null
+++ b/espnet/nets/pytorch_backend/transformer/layer_norm.py
@@ -0,0 +1,33 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Shigeki Karita
+#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+"""Layer normalization module."""
+
+import torch
+
+
+class LayerNorm(torch.nn.LayerNorm):
+    """Layer normalization module.
+
+    :param int nout: output dim size
+    :param int dim: dimension to be normalized
+    """
+
+    def __init__(self, nout, dim=-1):
+        """Construct an LayerNorm object."""
+        super(LayerNorm, self).__init__(nout, eps=1e-12)
+        self.dim = dim
+
+    def forward(self, x):
+        """Apply layer normalization.
+
+        :param torch.Tensor x: input tensor
+        :return: layer normalized tensor
+        :rtype torch.Tensor
+        """
+        if self.dim == -1:
+            return super(LayerNorm, self).forward(x)
+        return super(LayerNorm, self).forward(x.transpose(1, -1)).transpose(1, -1)
diff --git a/espnet/nets/pytorch_backend/transformer/mask.py b/espnet/nets/pytorch_backend/transformer/mask.py
new file mode 100644
index 0000000000000000000000000000000000000000..127f7a200e22674916ae976e045c6eacee81bc1c
--- /dev/null
+++ b/espnet/nets/pytorch_backend/transformer/mask.py
@@ -0,0 +1,51 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Shigeki Karita
+#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+"""Mask module."""
+
+from distutils.version import LooseVersion
+
+import torch
+
+is_torch_1_2_plus = LooseVersion(torch.__version__) >= LooseVersion("1.2.0")
+# LooseVersion('1.2.0') == LooseVersion(torch.__version__) can't include e.g. 1.2.0+aaa
+is_torch_1_2 = (
+    LooseVersion("1.3") > LooseVersion(torch.__version__) >= LooseVersion("1.2")
+)
+datatype = torch.bool if is_torch_1_2_plus else torch.uint8
+
+
+def subsequent_mask(size, device="cpu", dtype=datatype):
+    """Create mask for subsequent steps (1, size, size).
+
+    :param int size: size of mask
+    :param str device: "cpu" or "cuda" or torch.Tensor.device
+    :param torch.dtype dtype: result dtype
+    :rtype: torch.Tensor
+    >>> subsequent_mask(3)
+    [[1, 0, 0],
+     [1, 1, 0],
+     [1, 1, 1]]
+    """
+    if is_torch_1_2 and dtype == torch.bool:
+        # torch=1.2 doesn't support tril for bool tensor
+        ret = torch.ones(size, size, device=device, dtype=torch.uint8)
+        return torch.tril(ret, out=ret).type(dtype)
+    else:
+        ret = torch.ones(size, size, device=device, dtype=dtype)
+        return torch.tril(ret, out=ret)
+
+
+def target_mask(ys_in_pad, ignore_id):
+    """Create mask for decoder self-attention.
+
+    :param torch.Tensor ys_pad: batch of padded target sequences (B, Lmax)
+    :param int ignore_id: index of padding
+    :param torch.dtype dtype: result dtype
+    :rtype: torch.Tensor
+    """
+    ys_mask = ys_in_pad != ignore_id
+    m = subsequent_mask(ys_mask.size(-1), device=ys_mask.device).unsqueeze(0)
+    return ys_mask.unsqueeze(-2) & m
diff --git a/espnet/nets/pytorch_backend/transformer/multi_layer_conv.py b/espnet/nets/pytorch_backend/transformer/multi_layer_conv.py
new file mode 100644
index 0000000000000000000000000000000000000000..fdb7fe70810eda54c727367efc986ce02ce581cc
--- /dev/null
+++ b/espnet/nets/pytorch_backend/transformer/multi_layer_conv.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Tomoki Hayashi
+#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+"""Layer modules for FFT block in FastSpeech (Feed-forward Transformer)."""
+
+import torch
+
+
+class MultiLayeredConv1d(torch.nn.Module):
+    """Multi-layered conv1d for Transformer block.
+
+    This is a module of multi-leyered conv1d designed
+    to replace positionwise feed-forward network
+    in Transforner block, which is introduced in
+    `FastSpeech: Fast, Robust and Controllable Text to Speech`_.
+
+    .. _`FastSpeech: Fast, Robust and Controllable Text to Speech`:
+        https://arxiv.org/pdf/1905.09263.pdf
+
+    """
+
+    def __init__(self, in_chans, hidden_chans, kernel_size, dropout_rate):
+        """Initialize MultiLayeredConv1d module.
+
+        Args:
+            in_chans (int): Number of input channels.
+            hidden_chans (int): Number of hidden channels.
+            kernel_size (int): Kernel size of conv1d.
+            dropout_rate (float): Dropout rate.
+
+        """
+        super(MultiLayeredConv1d, self).__init__()
+        self.w_1 = torch.nn.Conv1d(
+            in_chans,
+            hidden_chans,
+            kernel_size,
+            stride=1,
+            padding=(kernel_size - 1) // 2,
+        )
+        self.w_2 = torch.nn.Conv1d(
+            hidden_chans,
+            in_chans,
+            kernel_size,
+            stride=1,
+            padding=(kernel_size - 1) // 2,
+        )
+        self.dropout = torch.nn.Dropout(dropout_rate)
+
+    def forward(self, x):
+        """Calculate forward propagation.
+
+        Args:
+            x (Tensor): Batch of input tensors (B, ..., in_chans).
+
+        Returns:
+            Tensor: Batch of output tensors (B, ..., hidden_chans).
+
+        """
+        x = torch.relu(self.w_1(x.transpose(-1, 1))).transpose(-1, 1)
+        return self.w_2(self.dropout(x).transpose(-1, 1)).transpose(-1, 1)
+
+
+class Conv1dLinear(torch.nn.Module):
+    """Conv1D + Linear for Transformer block.
+
+    A variant of MultiLayeredConv1d, which replaces second conv-layer to linear.
+
+    """
+
+    def __init__(self, in_chans, hidden_chans, kernel_size, dropout_rate):
+        """Initialize Conv1dLinear module.
+
+        Args:
+            in_chans (int): Number of input channels.
+            hidden_chans (int): Number of hidden channels.
+            kernel_size (int): Kernel size of conv1d.
+            dropout_rate (float): Dropout rate.
+
+        """
+        super(Conv1dLinear, self).__init__()
+        self.w_1 = torch.nn.Conv1d(
+            in_chans,
+            hidden_chans,
+            kernel_size,
+            stride=1,
+            padding=(kernel_size - 1) // 2,
+        )
+        self.w_2 = torch.nn.Linear(hidden_chans, in_chans)
+        self.dropout = torch.nn.Dropout(dropout_rate)
+
+    def forward(self, x):
+        """Calculate forward propagation.
+
+        Args:
+            x (Tensor): Batch of input tensors (B, ..., in_chans).
+
+        Returns:
+            Tensor: Batch of output tensors (B, ..., hidden_chans).
+
+        """
+        x = torch.relu(self.w_1(x.transpose(-1, 1))).transpose(-1, 1)
+        return self.w_2(self.dropout(x))
diff --git a/espnet/nets/pytorch_backend/transformer/optimizer.py b/espnet/nets/pytorch_backend/transformer/optimizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f3f36259bec33ad388be46ec2dccfcadaf249be
--- /dev/null
+++ b/espnet/nets/pytorch_backend/transformer/optimizer.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Shigeki Karita
+#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+"""Optimizer module."""
+
+import torch
+
+
+class NoamOpt(object):
+    """Optim wrapper that implements rate."""
+
+    def __init__(self, model_size, factor, warmup, optimizer):
+        """Construct an NoamOpt object."""
+        self.optimizer = optimizer
+        self._step = 0
+        self.warmup = warmup
+        self.factor = factor
+        self.model_size = model_size
+        self._rate = 0
+
+    @property
+    def param_groups(self):
+        """Return param_groups."""
+        return self.optimizer.param_groups
+
+    def step(self):
+        """Update parameters and rate."""
+        self._step += 1
+        rate = self.rate()
+        for p in self.optimizer.param_groups:
+            p["lr"] = rate
+        self._rate = rate
+        self.optimizer.step()
+
+    def rate(self, step=None):
+        """Implement `lrate` above."""
+        if step is None:
+            step = self._step
+        return (
+            self.factor
+            * self.model_size ** (-0.5)
+            * min(step ** (-0.5), step * self.warmup ** (-1.5))
+        )
+
+    def zero_grad(self):
+        """Reset gradient."""
+        self.optimizer.zero_grad()
+
+    def state_dict(self):
+        """Return state_dict."""
+        return {
+            "_step": self._step,
+            "warmup": self.warmup,
+            "factor": self.factor,
+            "model_size": self.model_size,
+            "_rate": self._rate,
+            "optimizer": self.optimizer.state_dict(),
+        }
+
+    def load_state_dict(self, state_dict):
+        """Load state_dict."""
+        for key, value in state_dict.items():
+            if key == "optimizer":
+                self.optimizer.load_state_dict(state_dict["optimizer"])
+            else:
+                setattr(self, key, value)
+
+
+def get_std_opt(model, d_model, warmup, factor):
+    """Get standard NoamOpt."""
+    base = torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)
+    return NoamOpt(d_model, factor, warmup, base)
diff --git a/espnet/nets/pytorch_backend/transformer/plot.py b/espnet/nets/pytorch_backend/transformer/plot.py
new file mode 100644
index 0000000000000000000000000000000000000000..82413c9608c1ac09efc6b9b11c0bd0ad98aaa3f1
--- /dev/null
+++ b/espnet/nets/pytorch_backend/transformer/plot.py
@@ -0,0 +1,134 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Shigeki Karita
+#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+import logging
+
+import matplotlib.pyplot as plt
+import numpy
+
+from espnet.asr import asr_utils
+
+
+def _plot_and_save_attention(att_w, filename, xtokens=None, ytokens=None):
+    # dynamically import matplotlib due to not found error
+    from matplotlib.ticker import MaxNLocator
+    import os
+
+    d = os.path.dirname(filename)
+    if not os.path.exists(d):
+        os.makedirs(d)
+    w, h = plt.figaspect(1.0 / len(att_w))
+    fig = plt.Figure(figsize=(w * 2, h * 2))
+    axes = fig.subplots(1, len(att_w))
+    if len(att_w) == 1:
+        axes = [axes]
+    for ax, aw in zip(axes, att_w):
+        # plt.subplot(1, len(att_w), h)
+        ax.imshow(aw.astype(numpy.float32), aspect="auto")
+        ax.set_xlabel("Input")
+        ax.set_ylabel("Output")
+        ax.xaxis.set_major_locator(MaxNLocator(integer=True))
+        ax.yaxis.set_major_locator(MaxNLocator(integer=True))
+        # Labels for major ticks
+        if xtokens is not None:
+            ax.set_xticks(numpy.linspace(0, len(xtokens) - 1, len(xtokens)))
+            ax.set_xticks(numpy.linspace(0, len(xtokens) - 1, 1), minor=True)
+            ax.set_xticklabels(xtokens + [""], rotation=40)
+        if ytokens is not None:
+            ax.set_yticks(numpy.linspace(0, len(ytokens) - 1, len(ytokens)))
+            ax.set_yticks(numpy.linspace(0, len(ytokens) - 1, 1), minor=True)
+            ax.set_yticklabels(ytokens + [""])
+    fig.tight_layout()
+    return fig
+
+
+def savefig(plot, filename):
+    plot.savefig(filename)
+    plt.clf()
+
+
+def plot_multi_head_attention(
+    data,
+    attn_dict,
+    outdir,
+    suffix="png",
+    savefn=savefig,
+    ikey="input",
+    iaxis=0,
+    okey="output",
+    oaxis=0,
+):
+    """Plot multi head attentions.
+
+    :param dict data: utts info from json file
+    :param dict[str, torch.Tensor] attn_dict: multi head attention dict.
+        values should be torch.Tensor (head, input_length, output_length)
+    :param str outdir: dir to save fig
+    :param str suffix: filename suffix including image type (e.g., png)
+    :param savefn: function to save
+
+    """
+    for name, att_ws in attn_dict.items():
+        for idx, att_w in enumerate(att_ws):
+            filename = "%s/%s.%s.%s" % (outdir, data[idx][0], name, suffix)
+            dec_len = int(data[idx][1][okey][oaxis]["shape"][0])
+            enc_len = int(data[idx][1][ikey][iaxis]["shape"][0])
+            xtokens, ytokens = None, None
+            if "encoder" in name:
+                att_w = att_w[:, :enc_len, :enc_len]
+                # for MT
+                if "token" in data[idx][1][ikey][iaxis].keys():
+                    xtokens = data[idx][1][ikey][iaxis]["token"].split()
+                    ytokens = xtokens[:]
+            elif "decoder" in name:
+                if "self" in name:
+                    att_w = att_w[:, : dec_len + 1, : dec_len + 1]  # +1 for <sos>
+                else:
+                    att_w = att_w[:, : dec_len + 1, :enc_len]  # +1 for <sos>
+                    # for MT
+                    if "token" in data[idx][1][ikey][iaxis].keys():
+                        xtokens = data[idx][1][ikey][iaxis]["token"].split()
+                # for ASR/ST/MT
+                if "token" in data[idx][1][okey][oaxis].keys():
+                    ytokens = ["<sos>"] + data[idx][1][okey][oaxis]["token"].split()
+                    if "self" in name:
+                        xtokens = ytokens[:]
+            else:
+                logging.warning("unknown name for shaping attention")
+            fig = _plot_and_save_attention(att_w, filename, xtokens, ytokens)
+            savefn(fig, filename)
+
+
+class PlotAttentionReport(asr_utils.PlotAttentionReport):
+    def plotfn(self, *args, **kwargs):
+        kwargs["ikey"] = self.ikey
+        kwargs["iaxis"] = self.iaxis
+        kwargs["okey"] = self.okey
+        kwargs["oaxis"] = self.oaxis
+        plot_multi_head_attention(*args, **kwargs)
+
+    def __call__(self, trainer):
+        attn_dict = self.get_attention_weights()
+        suffix = "ep.{.updater.epoch}.png".format(trainer)
+        self.plotfn(self.data, attn_dict, self.outdir, suffix, savefig)
+
+    def get_attention_weights(self):
+        batch = self.converter([self.transform(self.data)], self.device)
+        if isinstance(batch, tuple):
+            att_ws = self.att_vis_fn(*batch)
+        elif isinstance(batch, dict):
+            att_ws = self.att_vis_fn(**batch)
+        return att_ws
+
+    def log_attentions(self, logger, step):
+        def log_fig(plot, filename):
+            from os.path import basename
+
+            logger.add_figure(basename(filename), plot, step)
+            plt.clf()
+
+        attn_dict = self.get_attention_weights()
+        self.plotfn(self.data, attn_dict, self.outdir, "", log_fig)
diff --git a/espnet/nets/pytorch_backend/transformer/positionwise_feed_forward.py b/espnet/nets/pytorch_backend/transformer/positionwise_feed_forward.py
new file mode 100644
index 0000000000000000000000000000000000000000..219679209c2c4ab8ac71bbaf174bb21db65499dc
--- /dev/null
+++ b/espnet/nets/pytorch_backend/transformer/positionwise_feed_forward.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Shigeki Karita
+#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+"""Positionwise feed forward layer definition."""
+
+import torch
+
+
+class PositionwiseFeedForward(torch.nn.Module):
+    """Positionwise feed forward layer.
+
+    :param int idim: input dimenstion
+    :param int hidden_units: number of hidden units
+    :param float dropout_rate: dropout rate
+
+    """
+
+    def __init__(self, idim, hidden_units, dropout_rate):
+        """Construct an PositionwiseFeedForward object."""
+        super(PositionwiseFeedForward, self).__init__()
+        self.w_1 = torch.nn.Linear(idim, hidden_units)
+        self.w_2 = torch.nn.Linear(hidden_units, idim)
+        self.dropout = torch.nn.Dropout(dropout_rate)
+
+    def forward(self, x):
+        """Forward funciton."""
+        return self.w_2(self.dropout(torch.relu(self.w_1(x))))
diff --git a/espnet/nets/pytorch_backend/transformer/raw_embeddings.py b/espnet/nets/pytorch_backend/transformer/raw_embeddings.py
new file mode 100644
index 0000000000000000000000000000000000000000..22d4074dc1613912afed007d9063f030113b003d
--- /dev/null
+++ b/espnet/nets/pytorch_backend/transformer/raw_embeddings.py
@@ -0,0 +1,77 @@
+import torch
+import logging
+
+from espnet.nets.pytorch_backend.backbones.conv3d_extractor  import Conv3dResNet
+from espnet.nets.pytorch_backend.backbones.conv1d_extractor  import Conv1dResNet
+
+
+class VideoEmbedding(torch.nn.Module):
+    """Video Embedding
+
+    :param int idim: input dim
+    :param int odim: output dim
+    :param flaot dropout_rate: dropout rate
+    """
+
+    def __init__(self, idim, odim, dropout_rate, pos_enc_class, backbone_type="resnet", relu_type="prelu"):
+        super(VideoEmbedding, self).__init__()
+        self.trunk = Conv3dResNet(
+            backbone_type=backbone_type,
+            relu_type=relu_type
+        )
+        self.out = torch.nn.Sequential(
+            torch.nn.Linear(idim, odim),
+            pos_enc_class,
+        )
+
+    def forward(self, x, x_mask, extract_feats=None):
+        """video embedding for x
+
+        :param torch.Tensor x: input tensor
+        :param torch.Tensor x_mask: input mask
+        :param str extract_features: the position for feature extraction
+        :return: subsampled x and mask
+        :rtype Tuple[torch.Tensor, torch.Tensor]
+        """
+        x_resnet, x_mask = self.trunk(x, x_mask)
+        x = self.out(x_resnet)
+        if extract_feats:
+            return x, x_mask, x_resnet
+        else:
+            return x, x_mask
+
+
+class AudioEmbedding(torch.nn.Module):
+    """Audio Embedding
+
+    :param int idim: input dim
+    :param int odim: output dim
+    :param flaot dropout_rate: dropout rate
+    """
+
+    def __init__(self, idim, odim, dropout_rate, pos_enc_class, relu_type="prelu", a_upsample_ratio=1):
+        super(AudioEmbedding, self).__init__()
+        self.trunk = Conv1dResNet(
+            relu_type=relu_type,
+            a_upsample_ratio=a_upsample_ratio,
+        )
+        self.out = torch.nn.Sequential(
+            torch.nn.Linear(idim, odim),
+            pos_enc_class,
+        )
+
+    def forward(self, x, x_mask, extract_feats=None):
+        """audio embedding for x
+
+        :param torch.Tensor x: input tensor
+        :param torch.Tensor x_mask: input mask
+        :param str extract_features: the position for feature extraction
+        :return: subsampled x and mask
+        :rtype Tuple[torch.Tensor, torch.Tensor]
+        """
+        x_resnet, x_mask = self.trunk(x, x_mask)
+        x = self.out(x_resnet)
+        if extract_feats:                                                        
+            return x, x_mask, x_resnet                                           
+        else:                                                                    
+            return x, x_mask
diff --git a/espnet/nets/pytorch_backend/transformer/repeat.py b/espnet/nets/pytorch_backend/transformer/repeat.py
new file mode 100644
index 0000000000000000000000000000000000000000..5298fd3aeaf378e7a30999f66529e2d710b8c78d
--- /dev/null
+++ b/espnet/nets/pytorch_backend/transformer/repeat.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Shigeki Karita
+#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+"""Repeat the same layer definition."""
+
+import torch
+
+
+class MultiSequential(torch.nn.Sequential):
+    """Multi-input multi-output torch.nn.Sequential."""
+
+    def forward(self, *args):
+        """Repeat."""
+        for m in self:
+            args = m(*args)
+        return args
+
+
+def repeat(N, fn):
+    """Repeat module N times.
+
+    :param int N: repeat time
+    :param function fn: function to generate module
+    :return: repeated modules
+    :rtype: MultiSequential
+    """
+    return MultiSequential(*[fn() for _ in range(N)])
diff --git a/espnet/nets/pytorch_backend/transformer/subsampling.py b/espnet/nets/pytorch_backend/transformer/subsampling.py
new file mode 100644
index 0000000000000000000000000000000000000000..612d6d6cf0b9fc8c6ea44d141df205e34d34fd38
--- /dev/null
+++ b/espnet/nets/pytorch_backend/transformer/subsampling.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Shigeki Karita
+#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+"""Subsampling layer definition."""
+
+import torch
+
+
+class Conv2dSubsampling(torch.nn.Module):
+    """Convolutional 2D subsampling (to 1/4 length).
+
+    :param int idim: input dim
+    :param int odim: output dim
+    :param flaot dropout_rate: dropout rate
+    :param nn.Module pos_enc_class: positional encoding layer
+
+    """
+
+    def __init__(self, idim, odim, dropout_rate, pos_enc_class):
+        """Construct an Conv2dSubsampling object."""
+        super(Conv2dSubsampling, self).__init__()
+        self.conv = torch.nn.Sequential(
+            torch.nn.Conv2d(1, odim, 3, 2),
+            torch.nn.ReLU(),
+            torch.nn.Conv2d(odim, odim, 3, 2),
+            torch.nn.ReLU(),
+        )
+        self.out = torch.nn.Sequential(
+            torch.nn.Linear(odim * (((idim - 1) // 2 - 1) // 2), odim), pos_enc_class,
+        )
+
+    def forward(self, x, x_mask):
+        """Subsample x.
+
+        :param torch.Tensor x: input tensor
+        :param torch.Tensor x_mask: input mask
+        :return: subsampled x and mask
+        :rtype Tuple[torch.Tensor, torch.Tensor]
+               or Tuple[Tuple[torch.Tensor, torch.Tensor], torch.Tensor]
+        """
+        x = x.unsqueeze(1)  # (b, c, t, f)
+        x = self.conv(x)
+        b, c, t, f = x.size()
+        # if RelPositionalEncoding, x: Tuple[torch.Tensor, torch.Tensor]
+        # else x: torch.Tensor
+        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
+        if x_mask is None:
+            return x, None
+        return x, x_mask[:, :, :-2:2][:, :, :-2:2]
diff --git a/espnet/nets/scorer_interface.py b/espnet/nets/scorer_interface.py
new file mode 100644
index 0000000000000000000000000000000000000000..946ec6be317603d87cfd938cc096d11b7bcbfbdf
--- /dev/null
+++ b/espnet/nets/scorer_interface.py
@@ -0,0 +1,188 @@
+"""Scorer interface module."""
+
+from typing import Any
+from typing import List
+from typing import Tuple
+
+import torch
+import warnings
+
+
+class ScorerInterface:
+    """Scorer interface for beam search.
+
+    The scorer performs scoring of the all tokens in vocabulary.
+
+    Examples:
+        * Search heuristics
+            * :class:`espnet.nets.scorers.length_bonus.LengthBonus`
+        * Decoder networks of the sequence-to-sequence models
+            * :class:`espnet.nets.pytorch_backend.nets.transformer.decoder.Decoder`
+            * :class:`espnet.nets.pytorch_backend.nets.rnn.decoders.Decoder`
+        * Neural language models
+            * :class:`espnet.nets.pytorch_backend.lm.transformer.TransformerLM`
+            * :class:`espnet.nets.pytorch_backend.lm.default.DefaultRNNLM`
+            * :class:`espnet.nets.pytorch_backend.lm.seq_rnn.SequentialRNNLM`
+
+    """
+
+    def init_state(self, x: torch.Tensor) -> Any:
+        """Get an initial state for decoding (optional).
+
+        Args:
+            x (torch.Tensor): The encoded feature tensor
+
+        Returns: initial state
+
+        """
+        return None
+
+    def select_state(self, state: Any, i: int, new_id: int = None) -> Any:
+        """Select state with relative ids in the main beam search.
+
+        Args:
+            state: Decoder state for prefix tokens
+            i (int): Index to select a state in the main beam search
+            new_id (int): New label index to select a state if necessary
+
+        Returns:
+            state: pruned state
+
+        """
+        return None if state is None else state[i]
+
+    def score(
+        self, y: torch.Tensor, state: Any, x: torch.Tensor
+    ) -> Tuple[torch.Tensor, Any]:
+        """Score new token (required).
+
+        Args:
+            y (torch.Tensor): 1D torch.int64 prefix tokens.
+            state: Scorer state for prefix tokens
+            x (torch.Tensor): The encoder feature that generates ys.
+
+        Returns:
+            tuple[torch.Tensor, Any]: Tuple of
+                scores for next token that has a shape of `(n_vocab)`
+                and next state for ys
+
+        """
+        raise NotImplementedError
+
+    def final_score(self, state: Any) -> float:
+        """Score eos (optional).
+
+        Args:
+            state: Scorer state for prefix tokens
+
+        Returns:
+            float: final score
+
+        """
+        return 0.0
+
+
+class BatchScorerInterface(ScorerInterface):
+    """Batch scorer interface."""
+
+    def batch_init_state(self, x: torch.Tensor) -> Any:
+        """Get an initial state for decoding (optional).
+
+        Args:
+            x (torch.Tensor): The encoded feature tensor
+
+        Returns: initial state
+
+        """
+        return self.init_state(x)
+
+    def batch_score(
+        self, ys: torch.Tensor, states: List[Any], xs: torch.Tensor
+    ) -> Tuple[torch.Tensor, List[Any]]:
+        """Score new token batch (required).
+
+        Args:
+            ys (torch.Tensor): torch.int64 prefix tokens (n_batch, ylen).
+            states (List[Any]): Scorer states for prefix tokens.
+            xs (torch.Tensor):
+                The encoder feature that generates ys (n_batch, xlen, n_feat).
+
+        Returns:
+            tuple[torch.Tensor, List[Any]]: Tuple of
+                batchfied scores for next token with shape of `(n_batch, n_vocab)`
+                and next state list for ys.
+
+        """
+        warnings.warn(
+            "{} batch score is implemented through for loop not parallelized".format(
+                self.__class__.__name__
+            )
+        )
+        scores = list()
+        outstates = list()
+        for i, (y, state, x) in enumerate(zip(ys, states, xs)):
+            score, outstate = self.score(y, state, x)
+            outstates.append(outstate)
+            scores.append(score)
+        scores = torch.cat(scores, 0).view(ys.shape[0], -1)
+        return scores, outstates
+
+
+class PartialScorerInterface(ScorerInterface):
+    """Partial scorer interface for beam search.
+
+    The partial scorer performs scoring when non-partial scorer finished scoring,
+    and receives pre-pruned next tokens to score because it is too heavy to score
+    all the tokens.
+
+    Examples:
+         * Prefix search for connectionist-temporal-classification models
+             * :class:`espnet.nets.scorers.ctc.CTCPrefixScorer`
+
+    """
+
+    def score_partial(
+        self, y: torch.Tensor, next_tokens: torch.Tensor, state: Any, x: torch.Tensor
+    ) -> Tuple[torch.Tensor, Any]:
+        """Score new token (required).
+
+        Args:
+            y (torch.Tensor): 1D prefix token
+            next_tokens (torch.Tensor): torch.int64 next token to score
+            state: decoder state for prefix tokens
+            x (torch.Tensor): The encoder feature that generates ys
+
+        Returns:
+            tuple[torch.Tensor, Any]:
+                Tuple of a score tensor for y that has a shape `(len(next_tokens),)`
+                and next state for ys
+
+        """
+        raise NotImplementedError
+
+
+class BatchPartialScorerInterface(BatchScorerInterface, PartialScorerInterface):
+    """Batch partial scorer interface for beam search."""
+
+    def batch_score_partial(
+        self,
+        ys: torch.Tensor,
+        next_tokens: torch.Tensor,
+        states: List[Any],
+        xs: torch.Tensor,
+    ) -> Tuple[torch.Tensor, Any]:
+        """Score new token (required).
+
+        Args:
+            ys (torch.Tensor): torch.int64 prefix tokens (n_batch, ylen).
+            next_tokens (torch.Tensor): torch.int64 tokens to score (n_batch, n_token).
+            states (List[Any]): Scorer states for prefix tokens.
+            xs (torch.Tensor):
+                The encoder feature that generates ys (n_batch, xlen, n_feat).
+
+        Returns:
+            tuple[torch.Tensor, Any]:
+                Tuple of a score tensor for ys that has a shape `(n_batch, n_vocab)`
+                and next states for ys
+        """
+        raise NotImplementedError
diff --git a/espnet/nets/scorers/__init__.py b/espnet/nets/scorers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..b7f177368e62a5578b8706300e101f831a3972ac
--- /dev/null
+++ b/espnet/nets/scorers/__init__.py
@@ -0,0 +1 @@
+"""Initialize sub package."""
diff --git a/espnet/nets/scorers/ctc.py b/espnet/nets/scorers/ctc.py
new file mode 100644
index 0000000000000000000000000000000000000000..1d12ce6e2a2839e05b83c1b3c4484ef8ae1df855
--- /dev/null
+++ b/espnet/nets/scorers/ctc.py
@@ -0,0 +1,158 @@
+"""ScorerInterface implementation for CTC."""
+
+import numpy as np
+import torch
+
+from espnet.nets.ctc_prefix_score import CTCPrefixScore
+from espnet.nets.ctc_prefix_score import CTCPrefixScoreTH
+from espnet.nets.scorer_interface import BatchPartialScorerInterface
+
+
+class CTCPrefixScorer(BatchPartialScorerInterface):
+    """Decoder interface wrapper for CTCPrefixScore."""
+
+    def __init__(self, ctc: torch.nn.Module, eos: int):
+        """Initialize class.
+
+        Args:
+            ctc (torch.nn.Module): The CTC implementation.
+                For example, :class:`espnet.nets.pytorch_backend.ctc.CTC`
+            eos (int): The end-of-sequence id.
+
+        """
+        self.ctc = ctc
+        self.eos = eos
+        self.impl = None
+
+    def init_state(self, x: torch.Tensor):
+        """Get an initial state for decoding.
+
+        Args:
+            x (torch.Tensor): The encoded feature tensor
+
+        Returns: initial state
+
+        """
+        logp = self.ctc.log_softmax(x.unsqueeze(0)).detach().squeeze(0).cpu().numpy()
+        # TODO(karita): use CTCPrefixScoreTH
+        self.impl = CTCPrefixScore(logp, 0, self.eos, np)
+        return 0, self.impl.initial_state()
+
+    def select_state(self, state, i, new_id=None):
+        """Select state with relative ids in the main beam search.
+
+        Args:
+            state: Decoder state for prefix tokens
+            i (int): Index to select a state in the main beam search
+            new_id (int): New label id to select a state if necessary
+
+        Returns:
+            state: pruned state
+
+        """
+        if type(state) == tuple:
+            if len(state) == 2:  # for CTCPrefixScore
+                sc, st = state
+                return sc[i], st[i]
+            else:  # for CTCPrefixScoreTH (need new_id > 0)
+                r, log_psi, f_min, f_max, scoring_idmap = state
+                s = log_psi[i, new_id].expand(log_psi.size(1))
+                if scoring_idmap is not None:
+                    return r[:, :, i, scoring_idmap[i, new_id]], s, f_min, f_max
+                else:
+                    return r[:, :, i, new_id], s, f_min, f_max
+        return None if state is None else state[i]
+
+    def score_partial(self, y, ids, state, x):
+        """Score new token.
+
+        Args:
+            y (torch.Tensor): 1D prefix token
+            next_tokens (torch.Tensor): torch.int64 next token to score
+            state: decoder state for prefix tokens
+            x (torch.Tensor): 2D encoder feature that generates ys
+
+        Returns:
+            tuple[torch.Tensor, Any]:
+                Tuple of a score tensor for y that has a shape `(len(next_tokens),)`
+                and next state for ys
+
+        """
+        prev_score, state = state
+        presub_score, new_st = self.impl(y.cpu(), ids.cpu(), state)
+        tscore = torch.as_tensor(
+            presub_score - prev_score, device=x.device, dtype=x.dtype
+        )
+        return tscore, (presub_score, new_st)
+
+    def batch_init_state(self, x: torch.Tensor):
+        """Get an initial state for decoding.
+
+        Args:
+            x (torch.Tensor): The encoded feature tensor
+
+        Returns: initial state
+
+        """
+        logp = self.ctc.log_softmax(x.unsqueeze(0))  # assuming batch_size = 1
+        xlen = torch.tensor([logp.size(1)])
+        self.impl = CTCPrefixScoreTH(logp, xlen, 0, self.eos)
+        return None
+
+    def batch_score_partial(self, y, ids, state, x):
+        """Score new token.
+
+        Args:
+            y (torch.Tensor): 1D prefix token
+            ids (torch.Tensor): torch.int64 next token to score
+            state: decoder state for prefix tokens
+            x (torch.Tensor): 2D encoder feature that generates ys
+
+        Returns:
+            tuple[torch.Tensor, Any]:
+                Tuple of a score tensor for y that has a shape `(len(next_tokens),)`
+                and next state for ys
+
+        """
+        batch_state = (
+            (
+                torch.stack([s[0] for s in state], dim=2),
+                torch.stack([s[1] for s in state]),
+                state[0][2],
+                state[0][3],
+            )
+            if state[0] is not None
+            else None
+        )
+        return self.impl(y, batch_state, ids)
+
+    def extend_prob(self, x: torch.Tensor):
+        """Extend probs for decoding.
+
+        This extension is for streaming decoding
+        as in Eq (14) in https://arxiv.org/abs/2006.14941
+
+        Args:
+            x (torch.Tensor): The encoded feature tensor
+
+        """
+        logp = self.ctc.log_softmax(x.unsqueeze(0))
+        self.impl.extend_prob(logp)
+
+    def extend_state(self, state):
+        """Extend state for decoding.
+
+        This extension is for streaming decoding
+        as in Eq (14) in https://arxiv.org/abs/2006.14941
+
+        Args:
+            state: The states of hyps
+
+        Returns: exteded state
+
+        """
+        new_state = []
+        for s in state:
+            new_state.append(self.impl.extend_state(s))
+
+        return new_state
diff --git a/espnet/nets/scorers/length_bonus.py b/espnet/nets/scorers/length_bonus.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe32a616211591308c8e7ade144e856230d211d4
--- /dev/null
+++ b/espnet/nets/scorers/length_bonus.py
@@ -0,0 +1,61 @@
+"""Length bonus module."""
+from typing import Any
+from typing import List
+from typing import Tuple
+
+import torch
+
+from espnet.nets.scorer_interface import BatchScorerInterface
+
+
+class LengthBonus(BatchScorerInterface):
+    """Length bonus in beam search."""
+
+    def __init__(self, n_vocab: int):
+        """Initialize class.
+
+        Args:
+            n_vocab (int): The number of tokens in vocabulary for beam search
+
+        """
+        self.n = n_vocab
+
+    def score(self, y, state, x):
+        """Score new token.
+
+        Args:
+            y (torch.Tensor): 1D torch.int64 prefix tokens.
+            state: Scorer state for prefix tokens
+            x (torch.Tensor): 2D encoder feature that generates ys.
+
+        Returns:
+            tuple[torch.Tensor, Any]: Tuple of
+                torch.float32 scores for next token (n_vocab)
+                and None
+
+        """
+        return torch.tensor([1.0], device=x.device, dtype=x.dtype).expand(self.n), None
+
+    def batch_score(
+        self, ys: torch.Tensor, states: List[Any], xs: torch.Tensor
+    ) -> Tuple[torch.Tensor, List[Any]]:
+        """Score new token batch.
+
+        Args:
+            ys (torch.Tensor): torch.int64 prefix tokens (n_batch, ylen).
+            states (List[Any]): Scorer states for prefix tokens.
+            xs (torch.Tensor):
+                The encoder feature that generates ys (n_batch, xlen, n_feat).
+
+        Returns:
+            tuple[torch.Tensor, List[Any]]: Tuple of
+                batchfied scores for next token with shape of `(n_batch, n_vocab)`
+                and next state list for ys.
+
+        """
+        return (
+            torch.tensor([1.0], device=xs.device, dtype=xs.dtype).expand(
+                ys.shape[0], self.n
+            ),
+            None,
+        )
diff --git a/espnet/utils/cli_utils.py b/espnet/utils/cli_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..c4a4cd15b72f832d9118aa7a7377a13de16c329b
--- /dev/null
+++ b/espnet/utils/cli_utils.py
@@ -0,0 +1,65 @@
+from collections.abc import Sequence
+from distutils.util import strtobool as dist_strtobool
+import sys
+
+import numpy
+
+
+def strtobool(x):
+    # distutils.util.strtobool returns integer, but it's confusing,
+    return bool(dist_strtobool(x))
+
+
+def get_commandline_args():
+    extra_chars = [
+        " ",
+        ";",
+        "&",
+        "(",
+        ")",
+        "|",
+        "^",
+        "<",
+        ">",
+        "?",
+        "*",
+        "[",
+        "]",
+        "$",
+        "`",
+        '"',
+        "\\",
+        "!",
+        "{",
+        "}",
+    ]
+
+    # Escape the extra characters for shell
+    argv = [
+        arg.replace("'", "'\\''")
+        if all(char not in arg for char in extra_chars)
+        else "'" + arg.replace("'", "'\\''") + "'"
+        for arg in sys.argv
+    ]
+
+    return sys.executable + " " + " ".join(argv)
+
+
+def is_scipy_wav_style(value):
+    # If Tuple[int, numpy.ndarray] or not
+    return (
+        isinstance(value, Sequence)
+        and len(value) == 2
+        and isinstance(value[0], int)
+        and isinstance(value[1], numpy.ndarray)
+    )
+
+
+def assert_scipy_wav_style(value):
+    assert is_scipy_wav_style(
+        value
+    ), "Must be Tuple[int, numpy.ndarray], but got {}".format(
+        type(value)
+        if not isinstance(value, Sequence)
+        else "{}[{}]".format(type(value), ", ".join(str(type(v)) for v in value))
+    )
diff --git a/espnet/utils/dynamic_import.py b/espnet/utils/dynamic_import.py
new file mode 100644
index 0000000000000000000000000000000000000000..db885d0069bfb8f59dcf03f5477c13706574b217
--- /dev/null
+++ b/espnet/utils/dynamic_import.py
@@ -0,0 +1,23 @@
+import importlib
+
+
+def dynamic_import(import_path, alias=dict()):
+    """dynamic import module and class
+
+    :param str import_path: syntax 'module_name:class_name'
+        e.g., 'espnet.transform.add_deltas:AddDeltas'
+    :param dict alias: shortcut for registered class
+    :return: imported class
+    """
+    if import_path not in alias and ":" not in import_path:
+        raise ValueError(
+            "import_path should be one of {} or "
+            'include ":", e.g. "espnet.transform.add_deltas:AddDeltas" : '
+            "{}".format(set(alias), import_path)
+        )
+    if ":" not in import_path:
+        import_path = alias[import_path]
+
+    module_name, objname = import_path.split(":")
+    m = importlib.import_module(module_name)
+    return getattr(m, objname)
diff --git a/espnet/utils/fill_missing_args.py b/espnet/utils/fill_missing_args.py
new file mode 100644
index 0000000000000000000000000000000000000000..a0fd117529569976780436c0d79e7ce158cd44e9
--- /dev/null
+++ b/espnet/utils/fill_missing_args.py
@@ -0,0 +1,46 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2018 Nagoya University (Tomoki Hayashi)
+#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+import argparse
+import logging
+
+
+def fill_missing_args(args, add_arguments):
+    """Fill missing arguments in args.
+
+    Args:
+        args (Namespace or None): Namesapce containing hyperparameters.
+        add_arguments (function): Function to add arguments.
+
+    Returns:
+        Namespace: Arguments whose missing ones are filled with default value.
+
+    Examples:
+        >>> from argparse import Namespace
+        >>> from espnet.nets.pytorch_backend.e2e_tts_tacotron2 import Tacotron2
+        >>> args = Namespace()
+        >>> fill_missing_args(args, Tacotron2.add_arguments_fn)
+        Namespace(aconv_chans=32, aconv_filts=15, adim=512, atype='location', ...)
+
+    """
+    # check argument type
+    assert isinstance(args, argparse.Namespace) or args is None
+    assert callable(add_arguments)
+
+    # get default arguments
+    default_args, _ = add_arguments(argparse.ArgumentParser()).parse_known_args()
+
+    # convert to dict
+    args = {} if args is None else vars(args)
+    default_args = vars(default_args)
+
+    for key, value in default_args.items():
+        if key not in args:
+            logging.info(
+                'attribute "%s" does not exist. use default %s.' % (key, str(value))
+            )
+            args[key] = value
+
+    return argparse.Namespace(**args)
diff --git a/pipelines/.DS_Store b/pipelines/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..4e746b2e413c60c4bbae49875e5f9bf1451c8fae
Binary files /dev/null and b/pipelines/.DS_Store differ
diff --git a/pipelines/data/.DS_Store b/pipelines/data/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6
Binary files /dev/null and b/pipelines/data/.DS_Store differ
diff --git a/pipelines/data/data_module.py b/pipelines/data/data_module.py
new file mode 100644
index 0000000000000000000000000000000000000000..576da503b4a1d36775b7acaffbc5335ef1b777fb
--- /dev/null
+++ b/pipelines/data/data_module.py
@@ -0,0 +1,68 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2023 Imperial College London (Pingchuan Ma)
+# Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+import torch
+import torchaudio
+import torchvision
+from .transforms import AudioTransform, VideoTransform
+
+
+class AVSRDataLoader:
+    def __init__(self, modality, speed_rate=1, transform=True, detector="retinaface", convert_gray=True):
+        self.modality = modality
+        self.transform = transform
+        if self.modality in ["audio", "audiovisual"]:
+            self.audio_transform = AudioTransform()
+        if self.modality in ["video", "audiovisual"]:
+            if detector == "mediapipe":
+                from pipelines.detectors.mediapipe.video_process import VideoProcess
+                self.video_process = VideoProcess(convert_gray=convert_gray)
+            if detector == "retinaface":
+                from pipelines.detectors.retinaface.video_process import VideoProcess
+                self.video_process = VideoProcess(convert_gray=convert_gray)
+            self.video_transform = VideoTransform(speed_rate=speed_rate)
+
+
+    def load_data(self, data_filename, landmarks=None, transform=True):
+        if self.modality == "audio":
+            audio, sample_rate = self.load_audio(data_filename)
+            audio = self.audio_process(audio, sample_rate)
+            return self.audio_transform(audio) if self.transform else audio
+        if self.modality == "video":
+            video = self.load_video(data_filename)
+            video = self.video_process(video, landmarks)
+            video = torch.tensor(video)
+            return self.video_transform(video) if self.transform else video
+        if self.modality == "audiovisual":
+            rate_ratio = 640
+            audio, sample_rate = self.load_audio(data_filename)
+            audio = self.audio_process(audio, sample_rate)
+            video = self.load_video(data_filename)
+            video = self.video_process(video, landmarks)
+            video = torch.tensor(video)
+            min_t = min(len(video), audio.size(1) // rate_ratio)
+            audio = audio[:, :min_t*rate_ratio]
+            video = video[:min_t]
+            if self.transform:
+                audio = self.audio_transform(audio)
+                video = self.video_transform(video)
+            return video, audio
+
+
+    def load_audio(self, data_filename):
+        waveform, sample_rate = torchaudio.load(data_filename, normalize=True)
+        return waveform, sample_rate
+
+
+    def load_video(self, data_filename):
+        return torchvision.io.read_video(data_filename, pts_unit='sec')[0].numpy()
+
+
+    def audio_process(self, waveform, sample_rate, target_sample_rate=16000):
+        if sample_rate != target_sample_rate:
+            waveform = torchaudio.functional.resample(waveform, sample_rate, target_sample_rate)
+        waveform = torch.mean(waveform, dim=0, keepdim=True)
+        return waveform
diff --git a/pipelines/data/transforms.py b/pipelines/data/transforms.py
new file mode 100644
index 0000000000000000000000000000000000000000..53a8d3513d9e19cd82d7df80692c335a940e56fc
--- /dev/null
+++ b/pipelines/data/transforms.py
@@ -0,0 +1,44 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2023 Imperial College London (Pingchuan Ma)
+# Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+import torch
+import torchaudio
+import torchvision
+
+
+class FunctionalModule(torch.nn.Module):
+    def __init__(self, functional):
+        super().__init__()
+        self.functional = functional
+
+    def forward(self, input):
+        return self.functional(input)
+
+
+class VideoTransform:
+    def __init__(self, speed_rate):
+        self.video_pipeline = torch.nn.Sequential(
+            FunctionalModule(lambda x: x.unsqueeze(-1)),
+            FunctionalModule(lambda x: x if speed_rate == 1 else torch.index_select(x, dim=0, index=torch.linspace(0, x.shape[0]-1, int(x.shape[0] / speed_rate), dtype=torch.int64))),
+            FunctionalModule(lambda x: x.permute(3, 0, 1, 2)),
+            FunctionalModule(lambda x: x / 255.),
+            torchvision.transforms.CenterCrop(88),
+            torchvision.transforms.Normalize(0.421, 0.165),
+        )
+
+    def __call__(self, sample):
+        return self.video_pipeline(sample)
+
+
+class AudioTransform:
+    def __init__(self):
+        self.audio_pipeline = torch.nn.Sequential(
+            FunctionalModule(lambda x: torch.nn.functional.layer_norm(x, x.shape, eps=0)),
+            FunctionalModule(lambda x: x.transpose(0, 1)),
+        )
+
+    def __call__(self, sample):
+        return self.audio_pipeline(sample)
diff --git a/pipelines/detectors/.DS_Store b/pipelines/detectors/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..7b97b0b8349a56e17441fafb92fe013d6b1e82bc
Binary files /dev/null and b/pipelines/detectors/.DS_Store differ
diff --git a/pipelines/detectors/mediapipe/20words_mean_face.npy b/pipelines/detectors/mediapipe/20words_mean_face.npy
new file mode 100755
index 0000000000000000000000000000000000000000..fc5cd3103270737752bebaec497c39b49b2af970
--- /dev/null
+++ b/pipelines/detectors/mediapipe/20words_mean_face.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dbf68b2044171e1160716df7c53e8bbfaa0ee8c61fb41171d04cb6092bb81422
+size 1168
diff --git a/pipelines/detectors/mediapipe/detector.py b/pipelines/detectors/mediapipe/detector.py
new file mode 100644
index 0000000000000000000000000000000000000000..5f51c49418458851c517e956404b13d786ee7b6c
--- /dev/null
+++ b/pipelines/detectors/mediapipe/detector.py
@@ -0,0 +1,57 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Imperial College London (Pingchuan Ma)
+# Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+import warnings
+import torchvision
+import mediapipe as mp
+import os
+import cv2
+import numpy as np
+
+
+class LandmarksDetector:
+    def __init__(self):
+        self.mp_face_detection = mp.solutions.face_detection
+        self.short_range_detector = self.mp_face_detection.FaceDetection(min_detection_confidence=0.5, model_selection=0)
+        self.full_range_detector = self.mp_face_detection.FaceDetection(min_detection_confidence=0.5, model_selection=1)
+
+    def __call__(self, filename):
+        video_frames = torchvision.io.read_video(filename, pts_unit='sec')[0].numpy()
+        landmarks = self.detect(video_frames, self.full_range_detector)
+        if all(element is None for element in landmarks):
+            landmarks = self.detect(video_frames, self.short_range_detector)
+            assert any(l is not None for l in landmarks), "Cannot detect any frames in the video"
+        return landmarks
+
+    def detect(self, video_frames, detector):
+        landmarks = []
+        for frame in video_frames:
+            results = detector.process(frame)
+            if not results.detections:
+                landmarks.append(None)
+                continue
+            face_points = []
+            for idx, detected_faces in enumerate(results.detections):
+                max_id, max_size = 0, 0
+                bboxC = detected_faces.location_data.relative_bounding_box
+                ih, iw, ic = frame.shape
+                bbox = int(bboxC.xmin * iw), int(bboxC.ymin * ih), int(bboxC.width * iw), int(bboxC.height * ih)
+                bbox_size = (bbox[2] - bbox[0]) + (bbox[3] - bbox[1])
+                if bbox_size > max_size:
+                    max_id, max_size = idx, bbox_size
+                lmx = [
+                    [int(detected_faces.location_data.relative_keypoints[self.mp_face_detection.FaceKeyPoint(0).value].x * iw),
+                     int(detected_faces.location_data.relative_keypoints[self.mp_face_detection.FaceKeyPoint(0).value].y * ih)],
+                    [int(detected_faces.location_data.relative_keypoints[self.mp_face_detection.FaceKeyPoint(1).value].x * iw),
+                     int(detected_faces.location_data.relative_keypoints[self.mp_face_detection.FaceKeyPoint(1).value].y * ih)],
+                    [int(detected_faces.location_data.relative_keypoints[self.mp_face_detection.FaceKeyPoint(2).value].x * iw),
+                     int(detected_faces.location_data.relative_keypoints[self.mp_face_detection.FaceKeyPoint(2).value].y * ih)],
+                    [int(detected_faces.location_data.relative_keypoints[self.mp_face_detection.FaceKeyPoint(3).value].x * iw),
+                     int(detected_faces.location_data.relative_keypoints[self.mp_face_detection.FaceKeyPoint(3).value].y * ih)],
+                    ]
+                face_points.append(lmx)
+            landmarks.append(np.array(face_points[max_id]))
+        return landmarks
diff --git a/pipelines/detectors/mediapipe/video_process.py b/pipelines/detectors/mediapipe/video_process.py
new file mode 100644
index 0000000000000000000000000000000000000000..6fd579fc042467c62f0ab4a7ee7be472362c5de7
--- /dev/null
+++ b/pipelines/detectors/mediapipe/video_process.py
@@ -0,0 +1,142 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2023 Imperial College London (Pingchuan Ma)
+# Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+import os
+import cv2
+import numpy as np
+from skimage import transform as tf
+
+
+def linear_interpolate(landmarks, start_idx, stop_idx):
+    start_landmarks = landmarks[start_idx]
+    stop_landmarks = landmarks[stop_idx]
+    delta = stop_landmarks - start_landmarks
+    for idx in range(1, stop_idx-start_idx):
+        landmarks[start_idx+idx] = start_landmarks + idx/float(stop_idx-start_idx) * delta
+    return landmarks
+
+
+def warp_img(src, dst, img, std_size):
+    tform = tf.estimate_transform('similarity', src, dst)
+    warped = tf.warp(img, inverse_map=tform.inverse, output_shape=std_size)
+    warped = (warped * 255).astype('uint8')
+    return warped, tform
+
+
+def apply_transform(transform, img, std_size):
+    warped = tf.warp(img, inverse_map=transform.inverse, output_shape=std_size)
+    warped = (warped * 255).astype('uint8')
+    return warped
+
+
+def cut_patch(img, landmarks, height, width, threshold=5):
+    center_x, center_y = np.mean(landmarks, axis=0)
+    # Check for too much bias in height and width
+    if abs(center_y - img.shape[0] / 2) > height + threshold:
+        raise Exception('too much bias in height')
+    if abs(center_x - img.shape[1] / 2) > width + threshold:
+        raise Exception('too much bias in width')
+    # Calculate bounding box coordinates
+    y_min = int(round(np.clip(center_y - height, 0, img.shape[0])))
+    y_max = int(round(np.clip(center_y + height, 0, img.shape[0])))
+    x_min = int(round(np.clip(center_x - width, 0, img.shape[1])))
+    x_max = int(round(np.clip(center_x + width, 0, img.shape[1])))
+    # Cut the image
+    cutted_img = np.copy(img[y_min:y_max, x_min:x_max])
+    return cutted_img
+
+
+class VideoProcess:
+    def __init__(self, mean_face_path="20words_mean_face.npy", crop_width=96, crop_height=96,
+                 start_idx=3, stop_idx=4, window_margin=12, convert_gray=True):
+        self.reference = np.load(os.path.join(os.path.dirname(__file__), mean_face_path))
+        self.crop_width = crop_width
+        self.crop_height = crop_height
+        self.start_idx = start_idx
+        self.stop_idx = stop_idx
+        self.window_margin = window_margin
+        self.convert_gray = convert_gray
+
+    def __call__(self, video, landmarks):
+        # Pre-process landmarks: interpolate frames that are not detected
+        preprocessed_landmarks = self.interpolate_landmarks(landmarks)
+        # Exclude corner cases: no landmark in all frames
+        if not preprocessed_landmarks:
+            return
+        # Affine transformation and crop patch
+        sequence = self.crop_patch(video, preprocessed_landmarks)
+        assert sequence is not None, f"cannot crop a patch from {filename}."
+        return sequence
+
+
+    def crop_patch(self, video, landmarks):
+        sequence = []
+        for frame_idx, frame in enumerate(video):
+            window_margin = min(self.window_margin // 2, frame_idx, len(landmarks) - 1 - frame_idx)
+            smoothed_landmarks = np.mean([landmarks[x] for x in range(frame_idx - window_margin, frame_idx + window_margin + 1)], axis=0)
+            smoothed_landmarks += landmarks[frame_idx].mean(axis=0) - smoothed_landmarks.mean(axis=0)
+            transformed_frame, transformed_landmarks = self.affine_transform(frame,smoothed_landmarks,self.reference,grayscale=self.convert_gray)
+            patch = cut_patch(transformed_frame, transformed_landmarks[self.start_idx:self.stop_idx], self.crop_height//2, self.crop_width//2,)
+            sequence.append(patch)
+        return np.array(sequence)
+
+
+    def interpolate_landmarks(self, landmarks):
+        valid_frames_idx = [idx for idx, lm in enumerate(landmarks) if lm is not None]
+
+        if not valid_frames_idx:
+            return None
+
+        for idx in range(1, len(valid_frames_idx)):
+            if valid_frames_idx[idx] - valid_frames_idx[idx - 1] > 1:
+                landmarks = linear_interpolate(landmarks, valid_frames_idx[idx - 1], valid_frames_idx[idx])
+
+        valid_frames_idx = [idx for idx, lm in enumerate(landmarks) if lm is not None]
+
+        # Handle corner case: keep frames at the beginning or at the end that failed to be detected
+        if valid_frames_idx:
+            landmarks[:valid_frames_idx[0]] = [landmarks[valid_frames_idx[0]]] * valid_frames_idx[0]
+            landmarks[valid_frames_idx[-1]:] = [landmarks[valid_frames_idx[-1]]] * (len(landmarks) - valid_frames_idx[-1])
+
+        assert all(lm is not None for lm in landmarks), "not every frame has landmark"
+
+        return landmarks
+
+
+    def affine_transform(self, frame, landmarks, reference, grayscale=False,
+                         target_size=(256, 256), reference_size=(256, 256), stable_points=(0, 1, 2, 3),
+                         interpolation=cv2.INTER_LINEAR, border_mode=cv2.BORDER_CONSTANT, border_value=0):
+        if grayscale:
+            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
+        stable_reference = self.get_stable_reference(reference, reference_size, target_size)
+        transform = self.estimate_affine_transform(landmarks, stable_points, stable_reference)
+        transformed_frame, transformed_landmarks = self.apply_affine_transform(frame, landmarks, transform, target_size, interpolation, border_mode, border_value)
+
+        return transformed_frame, transformed_landmarks
+
+
+    def get_stable_reference(self, reference, reference_size, target_size):
+        # -- right eye, left eye, nose tip, mouth center
+        stable_reference = np.vstack([
+            np.mean(reference[36:42], axis=0),
+            np.mean(reference[42:48], axis=0),
+            np.mean(reference[31:36], axis=0),
+            np.mean(reference[48:68], axis=0)
+        ])
+        stable_reference[:, 0] -= (reference_size[0] - target_size[0]) / 2.0
+        stable_reference[:, 1] -= (reference_size[1] - target_size[1]) / 2.0
+        return stable_reference
+
+
+    def estimate_affine_transform(self, landmarks, stable_points, stable_reference):
+        return cv2.estimateAffinePartial2D(np.vstack([landmarks[x] for x in stable_points]), stable_reference, method=cv2.LMEDS)[0]
+
+
+    def apply_affine_transform(self, frame, landmarks, transform, target_size, interpolation, border_mode, border_value):
+        transformed_frame = cv2.warpAffine(frame, transform, dsize=(target_size[0], target_size[1]),
+                                           flags=interpolation, borderMode=border_mode, borderValue=border_value)
+        transformed_landmarks = np.matmul(landmarks, transform[:, :2].transpose()) + transform[:, 2].transpose()
+        return transformed_frame, transformed_landmarks
diff --git a/pipelines/detectors/retinaface/20words_mean_face.npy b/pipelines/detectors/retinaface/20words_mean_face.npy
new file mode 100755
index 0000000000000000000000000000000000000000..fc5cd3103270737752bebaec497c39b49b2af970
--- /dev/null
+++ b/pipelines/detectors/retinaface/20words_mean_face.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dbf68b2044171e1160716df7c53e8bbfaa0ee8c61fb41171d04cb6092bb81422
+size 1168
diff --git a/pipelines/detectors/retinaface/detector.py b/pipelines/detectors/retinaface/detector.py
new file mode 100644
index 0000000000000000000000000000000000000000..ad699adf4b41b8f3eb269b8d9def4d89c87e9e09
--- /dev/null
+++ b/pipelines/detectors/retinaface/detector.py
@@ -0,0 +1,38 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Imperial College London (Pingchuan Ma)
+# Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+import warnings
+import torchvision
+from ibug.face_detection import RetinaFacePredictor
+from ibug.face_alignment import FANPredictor
+warnings.filterwarnings("ignore")
+
+
+class LandmarksDetector:
+    def __init__(self, device="cuda:0", model_name='resnet50'):
+        self.face_detector = RetinaFacePredictor(
+            device=device,
+            threshold=0.8,
+            model=RetinaFacePredictor.get_model(model_name)
+        )
+        self.landmark_detector = FANPredictor(device=device, model=None)
+
+    def __call__(self, filename):
+        video_frames = torchvision.io.read_video(filename, pts_unit='sec')[0].numpy()
+        landmarks = []
+        for frame in video_frames:
+            detected_faces = self.face_detector(frame, rgb=False)
+            face_points, _ = self.landmark_detector(frame, detected_faces, rgb=True)
+            if len(detected_faces) == 0:
+                landmarks.append(None)
+            else:
+                max_id, max_size = 0, 0
+                for idx, bbox in enumerate(detected_faces):
+                    bbox_size = (bbox[2] - bbox[0]) + (bbox[3] - bbox[1])
+                    if bbox_size > max_size:
+                        max_id, max_size = idx, bbox_size
+                landmarks.append(face_points[max_id])
+        return landmarks
diff --git a/pipelines/detectors/retinaface/video_process.py b/pipelines/detectors/retinaface/video_process.py
new file mode 100644
index 0000000000000000000000000000000000000000..383bd12c3e073a40309999c6b633f10501014ca8
--- /dev/null
+++ b/pipelines/detectors/retinaface/video_process.py
@@ -0,0 +1,136 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2023 Imperial College London (Pingchuan Ma)
+# Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+import os
+import cv2
+import numpy as np
+from skimage import transform as tf
+
+
+def linear_interpolate(landmarks, start_idx, stop_idx):
+    start_landmarks = landmarks[start_idx]
+    stop_landmarks = landmarks[stop_idx]
+    delta = stop_landmarks - start_landmarks
+    for idx in range(1, stop_idx-start_idx):
+        landmarks[start_idx+idx] = start_landmarks + idx/float(stop_idx-start_idx) * delta
+    return landmarks
+
+
+def warp_img(src, dst, img, std_size):
+    tform = tf.estimate_transform('similarity', src, dst)
+    warped = tf.warp(img, inverse_map=tform.inverse, output_shape=std_size)
+    warped = (warped * 255).astype('uint8')
+    return warped, tform
+
+
+def apply_transform(transform, img, std_size):
+    warped = tf.warp(img, inverse_map=transform.inverse, output_shape=std_size)
+    warped = (warped * 255).astype('uint8')
+    return warped
+
+
+def cut_patch(img, landmarks, height, width, threshold=5):
+    center_x, center_y = np.mean(landmarks, axis=0)
+    # Check for too much bias in height and width
+    if abs(center_y - img.shape[0] / 2) > height + threshold:
+        raise Exception('too much bias in height')
+    if abs(center_x - img.shape[1] / 2) > width + threshold:
+        raise Exception('too much bias in width')
+    # Calculate bounding box coordinates
+    y_min = int(round(np.clip(center_y - height, 0, img.shape[0])))
+    y_max = int(round(np.clip(center_y + height, 0, img.shape[0])))
+    x_min = int(round(np.clip(center_x - width, 0, img.shape[1])))
+    x_max = int(round(np.clip(center_x + width, 0, img.shape[1])))
+    # Cut the image
+    cutted_img = np.copy(img[y_min:y_max, x_min:x_max])
+    return cutted_img
+
+
+class VideoProcess:
+    def __init__(self, mean_face_path="20words_mean_face.npy", crop_width=96, crop_height=96,
+                 start_idx=48, stop_idx=68, window_margin=12, convert_gray=True):
+        self.reference = np.load(os.path.join(os.path.dirname(__file__), mean_face_path))
+        self.crop_width = crop_width
+        self.crop_height = crop_height
+        self.start_idx = start_idx
+        self.stop_idx = stop_idx
+        self.window_margin = window_margin
+        self.convert_gray = convert_gray
+
+    def __call__(self, video, landmarks):
+        # Pre-process landmarks: interpolate frames that are not detected
+        preprocessed_landmarks = self.interpolate_landmarks(landmarks)
+        # Exclude corner cases: no landmark in all frames or number of frames is less than window length
+        if not preprocessed_landmarks or len(preprocessed_landmarks) < self.window_margin:
+            return
+        # Affine transformation and crop patch
+        sequence = self.crop_patch(video, preprocessed_landmarks)
+        assert sequence is not None, f"cannot crop a patch from {filename}."
+        return sequence
+
+
+    def crop_patch(self, video, landmarks):
+        sequence = []
+        for frame_idx, frame in enumerate(video):
+            window_margin = min(self.window_margin // 2, frame_idx, len(landmarks) - 1 - frame_idx)
+            smoothed_landmarks = np.mean([landmarks[x] for x in range(frame_idx - window_margin, frame_idx + window_margin + 1)], axis=0)
+            smoothed_landmarks += landmarks[frame_idx].mean(axis=0) - smoothed_landmarks.mean(axis=0)
+            transformed_frame, transformed_landmarks = self.affine_transform(frame,smoothed_landmarks,self.reference,grayscale=self.convert_gray)
+            patch = cut_patch(transformed_frame, transformed_landmarks[self.start_idx:self.stop_idx], self.crop_height//2, self.crop_width//2,)
+            sequence.append(patch)
+        return np.array(sequence)
+
+
+    def interpolate_landmarks(self, landmarks):
+        valid_frames_idx = [idx for idx, lm in enumerate(landmarks) if lm is not None]
+
+        if not valid_frames_idx:
+            return None
+
+        for idx in range(1, len(valid_frames_idx)):
+            if valid_frames_idx[idx] - valid_frames_idx[idx - 1] > 1:
+                landmarks = linear_interpolate(landmarks, valid_frames_idx[idx - 1], valid_frames_idx[idx])
+
+        valid_frames_idx = [idx for idx, lm in enumerate(landmarks) if lm is not None]
+
+        # Handle corner case: keep frames at the beginning or at the end that failed to be detected
+        if valid_frames_idx:
+            landmarks[:valid_frames_idx[0]] = [landmarks[valid_frames_idx[0]]] * valid_frames_idx[0]
+            landmarks[valid_frames_idx[-1]:] = [landmarks[valid_frames_idx[-1]]] * (len(landmarks) - valid_frames_idx[-1])
+
+        assert all(lm is not None for lm in landmarks), "not every frame has landmark"
+
+        return landmarks
+
+
+    def affine_transform(self, frame, landmarks, reference, grayscale=True,
+                         target_size=(256, 256), reference_size=(256, 256), stable_points=(28, 33, 36, 39, 42, 45, 48, 54),
+                         interpolation=cv2.INTER_LINEAR, border_mode=cv2.BORDER_CONSTANT, border_value=0):
+        if grayscale:
+            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
+        stable_reference = self.get_stable_reference(reference, stable_points, reference_size, target_size)
+        transform = self.estimate_affine_transform(landmarks, stable_points, stable_reference)
+        transformed_frame, transformed_landmarks = self.apply_affine_transform(frame, landmarks, transform, target_size, interpolation, border_mode, border_value)
+
+        return transformed_frame, transformed_landmarks
+
+
+    def get_stable_reference(self, reference, stable_points, reference_size, target_size):
+        stable_reference = np.vstack([reference[x] for x in stable_points])
+        stable_reference[:, 0] -= (reference_size[0] - target_size[0]) / 2.0
+        stable_reference[:, 1] -= (reference_size[1] - target_size[1]) / 2.0
+        return stable_reference
+
+
+    def estimate_affine_transform(self, landmarks, stable_points, stable_reference):
+        return cv2.estimateAffinePartial2D(np.vstack([landmarks[x] for x in stable_points]), stable_reference, method=cv2.LMEDS)[0]
+
+
+    def apply_affine_transform(self, frame, landmarks, transform, target_size, interpolation, border_mode, border_value):
+        transformed_frame = cv2.warpAffine(frame, transform, dsize=(target_size[0], target_size[1]),
+                                           flags=interpolation, borderMode=border_mode, borderValue=border_value)
+        transformed_landmarks = np.matmul(landmarks, transform[:, :2].transpose()) + transform[:, 2].transpose()
+        return transformed_frame, transformed_landmarks
diff --git a/pipelines/metrics/measures.py b/pipelines/metrics/measures.py
new file mode 100644
index 0000000000000000000000000000000000000000..a28e3626f6e7dc6774018b5ca15026111e7f24f9
--- /dev/null
+++ b/pipelines/metrics/measures.py
@@ -0,0 +1,43 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Imperial College London (Pingchuan Ma)
+# Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+# This code refers https://github.com/espnet/espnet/blob/24c3676a8d4c2e60d2726e9bcd9bdbed740610e0/espnet/nets/e2e_asr_common.py#L213-L249
+
+import numpy as np
+
+def get_wer(s, ref):
+    return get_er(s.split(), ref.split())
+
+def get_cer(s, ref):
+    return get_er(s.replace(" ", ""), ref.replace(" ", ""))
+
+def get_er(s, ref):
+    """
+        FROM wikipedia levenshtein distance
+        s: list of words/char in sentence to measure
+        ref: list of words/char in reference
+    """
+
+    costs = np.zeros((len(s) + 1, len(ref) + 1))
+    for i in range(len(s) + 1):
+        costs[i, 0] = i
+    for j in range(len(ref) + 1):
+        costs[0, j] = j
+
+    for j in range(1, len(ref) + 1):
+        for i in range(1, len(s) + 1):
+            cost = None
+            if s[i-1] == ref[j-1]:
+                cost = 0
+            else:
+                cost = 1
+            costs[i,j] = min(
+                costs[i-1, j] + 1,
+                costs[i, j-1] + 1,
+                costs[i-1, j-1] + cost
+            )
+
+    return costs[-1,-1] / len(ref)
diff --git a/pipelines/model.py b/pipelines/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..515987ab5f87f08727785218247064b2b7d23405
--- /dev/null
+++ b/pipelines/model.py
@@ -0,0 +1,99 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2023 Imperial College London (Pingchuan Ma)
+# Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+import os
+import json
+import torch
+import argparse
+import numpy as np
+
+from espnet.asr.asr_utils import torch_load
+from espnet.asr.asr_utils import get_model_conf
+from espnet.asr.asr_utils import add_results_to_json
+from espnet.nets.batch_beam_search import BatchBeamSearch
+from espnet.nets.lm_interface import dynamic_import_lm
+from espnet.nets.scorers.length_bonus import LengthBonus
+from espnet.nets.pytorch_backend.e2e_asr_transformer import E2E
+
+
+class AVSR(torch.nn.Module):
+    def __init__(self, modality, model_path, model_conf, rnnlm=None, rnnlm_conf=None,
+        penalty=0., ctc_weight=0.1, lm_weight=0., beam_size=40, device="cuda:0"):
+        super(AVSR, self).__init__()
+        self.device = device
+
+        if modality == "audiovisual":
+            from espnet.nets.pytorch_backend.e2e_asr_transformer_av import E2E
+        else:
+            from espnet.nets.pytorch_backend.e2e_asr_transformer import E2E
+
+        with open(model_conf, "rb") as f:
+            confs = json.load(f)
+        args = confs if isinstance(confs, dict) else confs[2]
+        self.train_args = argparse.Namespace(**args)
+
+        labels_type = getattr(self.train_args, "labels_type", "char")
+        if labels_type == "char":
+            self.token_list = self.train_args.char_list
+        elif labels_type == "unigram5000":
+            file_path = os.path.join(os.path.dirname(__file__), "tokens", "unigram5000_units.txt")
+            self.token_list = ['<blank>'] + [word.split()[0] for word in open(file_path).read().splitlines()] + ['<eos>']
+        self.odim = len(self.token_list)
+
+        self.model = E2E(self.odim, self.train_args)
+        self.model.load_state_dict(torch.load(model_path, map_location=lambda storage, loc: storage))
+        self.model.to(device=self.device).eval()
+
+        self.beam_search = get_beam_search_decoder(self.model, self.token_list, rnnlm, rnnlm_conf, penalty, ctc_weight, lm_weight, beam_size)
+        self.beam_search.to(device=self.device).eval()
+        
+    def infer(self, data):
+        with torch.no_grad():
+            if isinstance(data, tuple):
+                enc_feats = self.model.encode(data[0].to(self.device), data[1].to(self.device))
+            else:
+                enc_feats = self.model.encode(data.to(self.device))
+            nbest_hyps = self.beam_search(enc_feats)
+            nbest_hyps = [h.asdict() for h in nbest_hyps[: min(len(nbest_hyps), 1)]]
+            transcription = add_results_to_json(nbest_hyps, self.token_list)
+            transcription = transcription.replace("▁", " ").strip()
+        return transcription.replace("<eos>", "")
+
+
+def get_beam_search_decoder(model, token_list, rnnlm=None, rnnlm_conf=None, penalty=0, ctc_weight=0.1, lm_weight=0., beam_size=40):
+    sos = model.odim - 1
+    eos = model.odim - 1
+    scorers = model.scorers()
+
+    if not rnnlm:
+        lm = None
+    else:
+        lm_args = get_model_conf(rnnlm, rnnlm_conf)
+        lm_model_module = getattr(lm_args, "model_module", "default")
+        lm_class = dynamic_import_lm(lm_model_module, lm_args.backend)
+        lm = lm_class(len(token_list), lm_args)
+        torch_load(rnnlm, lm)
+        lm.eval()
+
+    scorers["lm"] = lm
+    scorers["length_bonus"] = LengthBonus(len(token_list))
+    weights = dict(
+        decoder=1.0 - ctc_weight,
+        ctc=ctc_weight,
+        lm=lm_weight,
+        length_bonus=penalty,
+    )
+
+    return BatchBeamSearch(
+        beam_size=beam_size,
+        vocab_size=len(token_list),
+        weights=weights,
+        scorers=scorers,
+        sos=sos,
+        eos=eos,
+        token_list=token_list,
+        pre_beam_score_key=None if ctc_weight == 1.0 else "decoder",
+    )
diff --git a/pipelines/pipeline.py b/pipelines/pipeline.py
new file mode 100644
index 0000000000000000000000000000000000000000..94e620bdb69690ab9c4b78bd456d19586115b6e4
--- /dev/null
+++ b/pipelines/pipeline.py
@@ -0,0 +1,73 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2023 Imperial College London (Pingchuan Ma)
+# Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+import os
+import torch
+import pickle
+from configparser import ConfigParser
+
+from pipelines.model import AVSR
+from pipelines.data.data_module import AVSRDataLoader
+
+
+class InferencePipeline(torch.nn.Module):
+    def __init__(self, config_filename, detector="retinaface", face_track=False, device="cuda:0"):
+        super(InferencePipeline, self).__init__()
+        assert os.path.isfile(config_filename), f"config_filename: {config_filename} does not exist."
+
+        config = ConfigParser()
+        config.read(config_filename)
+
+        # modality configuration
+        modality = config.get("input", "modality")
+
+        self.modality = modality
+        # data configuration
+        input_v_fps = config.getfloat("input", "v_fps")
+        model_v_fps = config.getfloat("model", "v_fps")
+
+        # model configuration
+        model_path = config.get("model","model_path")
+        model_conf = config.get("model","model_conf")
+
+        # language model configuration
+        rnnlm = config.get("model", "rnnlm")
+        rnnlm_conf = config.get("model", "rnnlm_conf")
+        penalty = config.getfloat("decode", "penalty")
+        ctc_weight = config.getfloat("decode", "ctc_weight")
+        lm_weight = config.getfloat("decode", "lm_weight")
+        beam_size = config.getint("decode", "beam_size")
+
+        self.dataloader = AVSRDataLoader(modality, speed_rate=input_v_fps/model_v_fps, detector=detector)
+        self.model = AVSR(modality, model_path, model_conf, rnnlm, rnnlm_conf, penalty, ctc_weight, lm_weight, beam_size, device)
+        if face_track and self.modality in ["video", "audiovisual"]:
+            if detector == "mediapipe":
+                from pipelines.detectors.mediapipe.detector import LandmarksDetector
+                self.landmarks_detector = LandmarksDetector()
+            if detector == "retinaface":
+                from pipelines.detectors.retinaface.detector import LandmarksDetector
+                self.landmarks_detector = LandmarksDetector(device="cuda:0")
+        else:
+            self.landmarks_detector = None
+
+
+    def process_landmarks(self, data_filename, landmarks_filename):
+        if self.modality == "audio":
+            return None
+        if self.modality in ["video", "audiovisual"]:
+            if isinstance(landmarks_filename, str):
+                landmarks = pickle.load(open(landmarks_filename, "rb"))
+            else:
+                landmarks = self.landmarks_detector(data_filename)
+            return landmarks
+
+
+    def forward(self, data_filename, landmarks_filename=None):
+        assert os.path.isfile(data_filename), f"data_filename: {data_filename} does not exist."
+        landmarks = self.process_landmarks(data_filename, landmarks_filename)
+        data = self.dataloader.load_data(data_filename, landmarks)
+        transcript = self.model.infer(data)
+        return transcript
\ No newline at end of file
diff --git a/pipelines/tokens/unigram5000_units.txt b/pipelines/tokens/unigram5000_units.txt
new file mode 100755
index 0000000000000000000000000000000000000000..1b5c74fc7cfded197692e4631fd894a3ba275da1
--- /dev/null
+++ b/pipelines/tokens/unigram5000_units.txt
@@ -0,0 +1,5047 @@
+<unk> 1
+' 2
+0 3
+00 4
+000 5
+0000 6
+1 7
+2 8
+3 9
+4 10
+44 11
+46 12
+46664 13
+467 14
+47 15
+474 16
+47748 17
+48 18
+484 19
+5 20
+6 21
+64 22
+646 23
+647 24
+66 25
+664 26
+67 27
+677 28
+68 29
+687 30
+688 31
+7 32
+74 33
+747 34
+76 35
+766 36
+77 37
+776 38
+777 39
+78 40
+7864 41
+787 42
+8 43
+84 44
+847 45
+848 46
+86 47
+864 48
+867 49
+87 50
+874 51
+876 52
+877 53
+878 54
+88 55
+884 56
+886 57
+887 58
+888 59
+9 60
+A 61
+AB 62
+ABILITY 63
+ABLE 64
+ABLY 65
+AC 66
+ACH 67
+ACTIVE 68
+AD 69
+ADE 70
+AFT 71
+AG 72
+AGE 73
+AH 74
+AI 75
+AIN 76
+AK 77
+AKE 78
+AL 79
+ALLY 80
+AM 81
+AN 82
+ANA 83
+ANCE 84
+ANG 85
+ANT 86
+AP 87
+AR 88
+ARD 89
+ARIAN 90
+ARIES 91
+ARILY 92
+ARY 93
+AS 94
+AT 95
+ATE 96
+ATED 97
+ATH 98
+ATING 99
+ATION 100
+ATIVE 101
+ATOMIC 102
+AU 103
+AW 104
+AY 105
+B 106
+BA 107
+BACK 108
+BBIE 109
+BE 110
+BER 111
+BI 112
+BLE 113
+BO 114
+BOARD 115
+BOOK 116
+BORN 117
+BOX 118
+BRA 119
+BU 120
+BURG 121
+BY 122
+C 123
+CA 124
+CAR 125
+CC 126
+CE 127
+CENT 128
+CH 129
+CHE 130
+CI 131
+CK 132
+CL 133
+CLA 134
+CO 135
+COM 136
+CON 137
+CONVENIENT 138
+CR 139
+CRA 140
+CT 141
+CUBA 142
+CUL 143
+CY 144
+D 145
+DA 146
+DDING 147
+DE 148
+DEN 149
+DI 150
+DO 151
+DUCT 152
+DY 153
+E 154
+EA 155
+ECT 156
+ED 157
+EF 158
+EG 159
+EL 160
+EN 161
+ENCE 162
+ENCY 163
+ENT 164
+ENTREPRENEURSHIP 165
+EP 166
+ER 167
+ERS 168
+ES 169
+EST 170
+ET 171
+EV 172
+EX 173
+EY 174
+F 175
+FA 176
+FE 177
+FECTED 178
+FERENCE 179
+FF 180
+FI 181
+FIELD 182
+FLEX 183
+FLOW 184
+FOR 185
+FU 186
+FUL 187
+G 188
+GA 189
+GE 190
+GED 191
+GER 192
+GGED 193
+GGLE 194
+GIE 195
+GING 196
+GO 197
+GRAD 198
+GRAM 199
+GUA 200
+H 201
+HA 202
+HAN 203
+HE 204
+HEAD 205
+HER 206
+HI 207
+HO 208
+HOLD 209
+HOOD 210
+HOUSE 211
+HREW 212
+HUMAN 213
+HY 214
+I 215
+IA 216
+IAL 217
+IAN 218
+IBILITY 219
+IBLE 220
+IBLY 221
+IC 222
+ICAL 223
+ICALLY 224
+ICK 225
+ID 226
+IDE 227
+IE 228
+IER 229
+IES 230
+IF 231
+IFICATION 232
+IFIED 233
+IFY 234
+IG 235
+IGHT 236
+IL 237
+ILE 238
+ILL 239
+IN 240
+INA 241
+INE 242
+INESS 243
+ING 244
+INTENDED 245
+IO 246
+ION 247
+IOUS 248
+IP 249
+IR 250
+IS 251
+ISE 252
+ISH 253
+ISM 254
+IST 255
+ISTIC 256
+ISTS 257
+IT 258
+ITE 259
+ITIES 260
+ITION 261
+ITUDE 262
+ITY 263
+IUM 264
+IV 265
+IVE 266
+IZATION 267
+IZE 268
+IZED 269
+IZING 270
+J 271
+JA 272
+K 273
+KA 274
+KE 275
+KER 276
+KI 277
+KIN 278
+KING 279
+KU 280
+L 281
+LA 282
+LAN 283
+LAND 284
+LD 285
+LE 286
+LED 287
+LER 288
+LES 289
+LESS 290
+LESSNESS 291
+LEY 292
+LI 293
+LIA 294
+LIC 295
+LIE 296
+LIGHT 297
+LIN 298
+LINE 299
+LING 300
+LIT 301
+LL 302
+LLY 303
+LO 304
+LOG 305
+LOR 306
+LU 307
+LY 308
+M 309
+MA 310
+MAN 311
+ME 312
+MEN 313
+MENT 314
+METER 315
+MI 316
+MO 317
+N 318
+NA 319
+ND 320
+NDER 321
+NE 322
+NED 323
+NER 324
+NESS 325
+NG 326
+NGEST 327
+NI 328
+NIC 329
+NING 330
+NO 331
+NS 332
+NT 333
+NY 334
+O 335
+OG 336
+OGRAPH 337
+OK 338
+OL 339
+OLOGICAL 340
+OLOGIST 341
+OLOGY 342
+ON 343
+ONE 344
+OO 345
+OP 346
+OR 347
+ORS 348
+ORY 349
+OS 350
+OSE 351
+OSIS 352
+OT 353
+OU 354
+OUGHT 355
+OUND 356
+OUR 357
+OUS 358
+OUT 359
+OV 360
+OVER 361
+OW 362
+P 363
+PA 364
+PART 365
+PATHETIC 366
+PE 367
+PED 368
+PER 369
+PH 370
+PHOBIA 371
+PI 372
+PING 373
+PLACE 374
+PLAY 375
+PO 376
+POINT 377
+PORT 378
+POWER 379
+PP 380
+PR 381
+PRO 382
+PS 383
+PT 384
+Q 385
+QUA 386
+QUE 387
+R 388
+RA 389
+RAC 390
+RAN 391
+RD 392
+RE 393
+RESPONSIBILITIES 394
+RI 395
+RIB 396
+RIDGE 397
+RIN 398
+RING 399
+RK 400
+RO 401
+RON 402
+ROOM 403
+RS 404
+RU 405
+RY 406
+S 407
+SA 408
+SCRIPT 409
+SE 410
+SEMBL 411
+SH 412
+SHIP 413
+SIDE 414
+SOME 415
+SON 416
+SOURCING 417
+SPIRATION 418
+STAND 419
+STER 420
+STONE 421
+STREAM 422
+STRO 423
+STRUCK 424
+T 425
+TA 426
+TAN 427
+TE 428
+TEN 429
+TER 430
+TH 431
+THE 432
+THER 433
+TI 434
+TIC 435
+TIME 436
+TION 437
+TO 438
+TON 439
+TOP 440
+TOR 441
+TRA 442
+TRI 443
+TRIC 444
+TRIES 445
+TTING 446
+TURING 447
+TY 448
+U 449
+UAL 450
+UB 451
+UC 452
+UD 453
+UE 454
+UFF 455
+UG 456
+UGHT 457
+UIT 458
+UL 459
+ULATE 460
+UM 461
+UN 462
+UND 463
+UNG 464
+UP 465
+UR 466
+URE 467
+US 468
+USE 469
+UT 470
+V 471
+VA 472
+VAL 473
+VAN 474
+VE 475
+VER 476
+VERSE 477
+VERSION 478
+VI 479
+VILLE 480
+VING 481
+VISIBILITY 482
+W 483
+WA 484
+WARD 485
+WATER 486
+WAY 487
+WE 488
+WELL 489
+WI 490
+WN 491
+WOOD 492
+WORK 493
+WORTH 494
+X 495
+Y 496
+Z 497
+ZA 498
+ZE 499
+ZZ 500
+▁ 501
+▁1 502
+▁10 503
+▁100 504
+▁1000 505
+▁10000 506
+▁100000 507
+▁11 508
+▁12 509
+▁13 510
+▁15 511
+▁19 512
+▁1950 513
+▁199 514
+▁1990 515
+▁2 516
+▁20 517
+▁200 518
+▁2000 519
+▁2009 520
+▁201 521
+▁2010 522
+▁2011 523
+▁2012 524
+▁2013 525
+▁2015 526
+▁2050 527
+▁21 528
+▁22 529
+▁23 530
+▁25 531
+▁3 532
+▁30 533
+▁300 534
+▁3000 535
+▁35 536
+▁5 537
+▁50 538
+▁500 539
+▁9 540
+▁90 541
+▁95 542
+▁99 543
+▁A 544
+▁ABANDON 545
+▁ABILITIES 546
+▁ABILITY 547
+▁ABLE 548
+▁ABOUT 549
+▁ABOVE 550
+▁ABRAHAM 551
+▁ABROAD 552
+▁ABSENCE 553
+▁ABSOLUTE 554
+▁ABSOLUTELY 555
+▁ABSORB 556
+▁ABSTRACT 557
+▁ABSURD 558
+▁ABUNDAN 559
+▁ABUSE 560
+▁ABUSI 561
+▁ACADEMIC 562
+▁ACADEMY 563
+▁ACCELERATE 564
+▁ACCENT 565
+▁ACCEPT 566
+▁ACCEPTANCE 567
+▁ACCEPTED 568
+▁ACCESS 569
+▁ACCESSIBLE 570
+▁ACCIDENT 571
+▁ACCOMPLISH 572
+▁ACCORDING 573
+▁ACCOUNT 574
+▁ACCUMULAT 575
+▁ACCURACY 576
+▁ACCURATE 577
+▁ACCUS 578
+▁ACHIEVE 579
+▁ACHIEVEMENT 580
+▁ACHIEVING 581
+▁ACID 582
+▁ACKNOWLEDGE 583
+▁ACQUIRE 584
+▁ACROSS 585
+▁ACT 586
+▁ACTION 587
+▁ACTIONS 588
+▁ACTIVAT 589
+▁ACTIVATE 590
+▁ACTIVE 591
+▁ACTIVISM 592
+▁ACTIVIST 593
+▁ACTIVITIES 594
+▁ACTIVITY 595
+▁ACTOR 596
+▁ACTUAL 597
+▁ACTUALLY 598
+▁ADAM 599
+▁ADAPT 600
+▁ADD 601
+▁ADDED 602
+▁ADDICT 603
+▁ADDICTION 604
+▁ADDITION 605
+▁ADDITIONAL 606
+▁ADDRESS 607
+▁ADEQUATE 608
+▁ADHD 609
+▁ADJUST 610
+▁ADMINISTRATION 611
+▁ADMIT 612
+▁ADMITTED 613
+▁ADOLESCENT 614
+▁ADOPT 615
+▁ADULT 616
+▁ADULTHOOD 617
+▁ADULTS 618
+▁ADVANCE 619
+▁ADVANCED 620
+▁ADVANTAGE 621
+▁ADVENTURE 622
+▁ADVERSITY 623
+▁ADVERTISING 624
+▁ADVICE 625
+▁ADVISE 626
+▁ADVOCATE 627
+▁AESTHETIC 628
+▁AFFAIR 629
+▁AFFECT 630
+▁AFFECTED 631
+▁AFFIRM 632
+▁AFFORD 633
+▁AFGHAN 634
+▁AFGHANISTAN 635
+▁AFRAID 636
+▁AFRICA 637
+▁AFRICAN 638
+▁AFTER 639
+▁AFTERNOON 640
+▁AFTERWARDS 641
+▁AGAIN 642
+▁AGAINST 643
+▁AGE 644
+▁AGENCIES 645
+▁AGENCY 646
+▁AGENDA 647
+▁AGENT 648
+▁AGGREGAT 649
+▁AGGRESSIVE 650
+▁AGO 651
+▁AGREE 652
+▁AGREED 653
+▁AGRICULTURAL 654
+▁AGRICULTURE 655
+▁AHEAD 656
+▁AID 657
+▁AIM 658
+▁AIR 659
+▁AIRPLANE 660
+▁AIRPORT 661
+▁AL 662
+▁ALARM 663
+▁ALBERT 664
+▁ALBUM 665
+▁ALCOHOL 666
+▁ALGAE 667
+▁ALGORITHM 668
+▁ALIEN 669
+▁ALIGN 670
+▁ALIKE 671
+▁ALIVE 672
+▁ALL 673
+▁ALLOW 674
+▁ALLOWED 675
+▁ALLOWING 676
+▁ALLOWS 677
+▁ALMOST 678
+▁ALONE 679
+▁ALONG 680
+▁ALREADY 681
+▁ALRIGHT 682
+▁ALSO 683
+▁ALTER 684
+▁ALTERNATIVE 685
+▁ALTHOUGH 686
+▁ALTOGETHER 687
+▁ALWAYS 688
+▁ALZHEIMER 689
+▁AM 690
+▁AMAZED 691
+▁AMAZING 692
+▁AMAZON 693
+▁AMBASSADOR 694
+▁AMBITION 695
+▁AMBITIOUS 696
+▁AMBULANCE 697
+▁AMERICA 698
+▁AMERICAN 699
+▁AMERICANS 700
+▁AMONG 701
+▁AMONGST 702
+▁AMOUNT 703
+▁AMP 704
+▁AN 705
+▁ANALOG 706
+▁ANALYSIS 707
+▁ANALYTIC 708
+▁ANALYZE 709
+▁ANATOMY 710
+▁ANCESTORS 711
+▁ANCIENT 712
+▁AND 713
+▁ANGEL 714
+▁ANGELES 715
+▁ANGER 716
+▁ANGLE 717
+▁ANGRY 718
+▁ANIMAL 719
+▁ANIMALS 720
+▁ANIMATION 721
+▁ANNOUNCED 722
+▁ANNUAL 723
+▁ANONYMOUS 724
+▁ANOTHER 725
+▁ANSWER 726
+▁ANSWERS 727
+▁ANTI 728
+▁ANTIBIOTIC 729
+▁ANTICIPATE 730
+▁ANTIMATTER 731
+▁ANXIETY 732
+▁ANXIOUS 733
+▁ANY 734
+▁ANYBODY 735
+▁ANYMORE 736
+▁ANYONE 737
+▁ANYTHING 738
+▁ANYWAY 739
+▁ANYWHERE 740
+▁APART 741
+▁APARTMENT 742
+▁APOLOGIZE 743
+▁APP 744
+▁APPARENTLY 745
+▁APPEAL 746
+▁APPEAR 747
+▁APPEARANCE 748
+▁APPLAUSE 749
+▁APPLE 750
+▁APPLICATION 751
+▁APPLIED 752
+▁APPLIES 753
+▁APPLY 754
+▁APPOINTMENT 755
+▁APPRECIATE 756
+▁APPROACH 757
+▁APPROPRIATE 758
+▁APPROVAL 759
+▁APPROXIMATE 760
+▁APRIL 761
+▁ARAB 762
+▁ARCH 763
+▁ARCHAEOLOG 764
+▁ARCHITECT 765
+▁ARCHITECTURE 766
+▁ARCTIC 767
+▁ARE 768
+▁AREA 769
+▁AREAS 770
+▁AREN 771
+▁ARGUE 772
+▁ARGUMENT 773
+▁ARISE 774
+▁ARM 775
+▁ARMY 776
+▁AROUND 777
+▁ARRANGE 778
+▁ARREST 779
+▁ARRIV 780
+▁ARRIVE 781
+▁ARRIVED 782
+▁ARROW 783
+▁ART 784
+▁ARTICLE 785
+▁ARTIFICIAL 786
+▁ARTIST 787
+▁AS 788
+▁ASHAMED 789
+▁ASK 790
+▁ASKED 791
+▁ASKING 792
+▁ASLEEP 793
+▁ASPECT 794
+▁ASSAULT 795
+▁ASSEMBLE 796
+▁ASSESS 797
+▁ASSET 798
+▁ASSIGNMENT 799
+▁ASSISTANCE 800
+▁ASSISTANT 801
+▁ASSOCIATE 802
+▁ASSOCIATED 803
+▁ASSOCIATION 804
+▁ASSUME 805
+▁ASSUMPTION 806
+▁ASTEROID 807
+▁ASTONISHING 808
+▁ASTRONAUT 809
+▁ASTRONOMER 810
+▁AT 811
+▁ATHLETE 812
+▁ATHLETIC 813
+▁ATLANTIC 814
+▁ATMOSPHERE 815
+▁ATOMS 816
+▁ATTACH 817
+▁ATTACK 818
+▁ATTEMPT 819
+▁ATTEND 820
+▁ATTENTION 821
+▁ATTITUDE 822
+▁ATTORNEY 823
+▁ATTRACT 824
+▁ATTRACTIVE 825
+▁ATTRIBUTE 826
+▁AUDIENCE 827
+▁AUDIO 828
+▁AUDIT 829
+▁AUGMENT 830
+▁AUGUST 831
+▁AUNT 832
+▁AUSTRALIA 833
+▁AUTHENTIC 834
+▁AUTHOR 835
+▁AUTHORITY 836
+▁AUTISM 837
+▁AUTISTIC 838
+▁AUTO 839
+▁AUTOMATIC 840
+▁AUTOMATICALLY 841
+▁AUTONOMOUS 842
+▁AVAILABLE 843
+▁AVENUE 844
+▁AVERAGE 845
+▁AVOID 846
+▁AWAKE 847
+▁AWARE 848
+▁AWARENESS 849
+▁AWAY 850
+▁AWESOME 851
+▁AWFUL 852
+▁AWKWARD 853
+▁B 854
+▁BA 855
+▁BABIES 856
+▁BABY 857
+▁BACK 858
+▁BACKGROUND 859
+▁BACKPACK 860
+▁BACKWARDS 861
+▁BACKYARD 862
+▁BACTERIA 863
+▁BAD 864
+▁BAG 865
+▁BAKE 866
+▁BALANCE 867
+▁BALL 868
+▁BALLET 869
+▁BALLOON 870
+▁BALTIMORE 871
+▁BAN 872
+▁BANANA 873
+▁BAND 874
+▁BANG 875
+▁BANK 876
+▁BAR 877
+▁BARELY 878
+▁BARRIER 879
+▁BASE 880
+▁BASEBALL 881
+▁BASED 882
+▁BASIC 883
+▁BASICALLY 884
+▁BASIS 885
+▁BASKETBALL 886
+▁BATHROOM 887
+▁BATTERY 888
+▁BATTLE 889
+▁BE 890
+▁BEACH 891
+▁BEAR 892
+▁BEAT 893
+▁BEAUTIFUL 894
+▁BEAUTY 895
+▁BECAME 896
+▁BECAUSE 897
+▁BECOME 898
+▁BECOMES 899
+▁BECOMING 900
+▁BED 901
+▁BEDROOM 902
+▁BEEN 903
+▁BEFORE 904
+▁BEGAN 905
+▁BEGIN 906
+▁BEGINNING 907
+▁BEGUN 908
+▁BEHALF 909
+▁BEHAVE 910
+▁BEHAVIOR 911
+▁BEHAVIORAL 912
+▁BEHAVIORS 913
+▁BEHAVIOUR 914
+▁BEHIND 915
+▁BEING 916
+▁BEINGS 917
+▁BELIEF 918
+▁BELIEFS 919
+▁BELIEVE 920
+▁BELIEVED 921
+▁BELIEVING 922
+▁BELONG 923
+▁BELOW 924
+▁BENEFICIAL 925
+▁BENEFIT 926
+▁BENEFITS 927
+▁BEST 928
+▁BETTER 929
+▁BETWEEN 930
+▁BEYOND 931
+▁BI 932
+▁BIAS 933
+▁BICYCLE 934
+▁BIG 935
+▁BIGGER 936
+▁BIGGEST 937
+▁BIKE 938
+▁BILL 939
+▁BILLION 940
+▁BIN 941
+▁BINARY 942
+▁BIO 943
+▁BIODIVERSITY 944
+▁BIOLOGICAL 945
+▁BIOLOGIST 946
+▁BIOLOGY 947
+▁BIRD 948
+▁BIRDS 949
+▁BIRMINGHAM 950
+▁BIRTH 951
+▁BIRTHDAY 952
+▁BISEXUAL 953
+▁BIT 954
+▁BITCOIN 955
+▁BIZARRE 956
+▁BLA 957
+▁BLACK 958
+▁BLAME 959
+▁BLANK 960
+▁BLEND 961
+▁BLESS 962
+▁BLIND 963
+▁BLINK 964
+▁BLOCK 965
+▁BLOG 966
+▁BLOOD 967
+▁BLOW 968
+▁BLUE 969
+▁BLUR 970
+▁BO 971
+▁BOARD 972
+▁BOAT 973
+▁BODIES 974
+▁BODY 975
+▁BOLD 976
+▁BOMB 977
+▁BOND 978
+▁BONE 979
+▁BONUS 980
+▁BOO 981
+▁BOOK 982
+▁BOOKS 983
+▁BORDER 984
+▁BORED 985
+▁BORING 986
+▁BORN 987
+▁BORROW 988
+▁BOSS 989
+▁BOSTON 990
+▁BOTH 991
+▁BOTHER 992
+▁BOTTLE 993
+▁BOTTOM 994
+▁BOUGHT 995
+▁BOUNC 996
+▁BOUND 997
+▁BOUNDARIES 998
+▁BOW 999
+▁BOWL 1000
+▁BOX 1001
+▁BOXES 1002
+▁BOY 1003
+▁BOYFRIEND 1004
+▁BOYS 1005
+▁BR 1006
+▁BRA 1007
+▁BRAIN 1008
+▁BRAINS 1009
+▁BRANCH 1010
+▁BRAND 1011
+▁BRAVE 1012
+▁BRAZIL 1013
+▁BREAD 1014
+▁BREAK 1015
+▁BREAKFAST 1016
+▁BREAKTHROUGH 1017
+▁BREAST 1018
+▁BREATH 1019
+▁BREATHE 1020
+▁BREATHING 1021
+▁BREED 1022
+▁BRIDGE 1023
+▁BRIEF 1024
+▁BRIGHT 1025
+▁BRILLIANT 1026
+▁BRING 1027
+▁BRINGING 1028
+▁BRITAIN 1029
+▁BRITISH 1030
+▁BRO 1031
+▁BROAD 1032
+▁BROADCAST 1033
+▁BROKE 1034
+▁BROKEN 1035
+▁BROTHER 1036
+▁BROUGHT 1037
+▁BROWN 1038
+▁BRUSH 1039
+▁BRUTAL 1040
+▁BU 1041
+▁BUBBLE 1042
+▁BUCK 1043
+▁BUDGET 1044
+▁BUG 1045
+▁BUILD 1046
+▁BUILDING 1047
+▁BUILT 1048
+▁BULB 1049
+▁BULLET 1050
+▁BULLIED 1051
+▁BULLYING 1052
+▁BUMP 1053
+▁BUNCH 1054
+▁BUR 1055
+▁BURDEN 1056
+▁BURIED 1057
+▁BURN 1058
+▁BUS 1059
+▁BUSINESS 1060
+▁BUSINESSES 1061
+▁BUSY 1062
+▁BUT 1063
+▁BUTTON 1064
+▁BUY 1065
+▁BY 1066
+▁C 1067
+▁CA 1068
+▁CAKE 1069
+▁CALCULAT 1070
+▁CALCULATE 1071
+▁CALIFORNIA 1072
+▁CALL 1073
+▁CALLED 1074
+▁CALLING 1075
+▁CALM 1076
+▁CALORIES 1077
+▁CAMBODIA 1078
+▁CAMBRIDGE 1079
+▁CAME 1080
+▁CAMERA 1081
+▁CAMP 1082
+▁CAMPAIGN 1083
+▁CAMPUS 1084
+▁CAN 1085
+▁CANADA 1086
+▁CANCER 1087
+▁CANDIDATE 1088
+▁CANNOT 1089
+▁CAP 1090
+▁CAPABILITIES 1091
+▁CAPABILITY 1092
+▁CAPABLE 1093
+▁CAPACITY 1094
+▁CAPITA 1095
+▁CAPITAL 1096
+▁CAPITALISM 1097
+▁CAPTAIN 1098
+▁CAPTIV 1099
+▁CAPTURE 1100
+▁CAR 1101
+▁CARBON 1102
+▁CARD 1103
+▁CARE 1104
+▁CAREER 1105
+▁CAREFUL 1106
+▁CAREFULLY 1107
+▁CAROLINA 1108
+▁CARRIE 1109
+▁CARRY 1110
+▁CARRYING 1111
+▁CARS 1112
+▁CARTOON 1113
+▁CASE 1114
+▁CASES 1115
+▁CASH 1116
+▁CAST 1117
+▁CAT 1118
+▁CATALYST 1119
+▁CATCH 1120
+▁CATEGORIES 1121
+▁CATEGORY 1122
+▁CATHOLIC 1123
+▁CAUGHT 1124
+▁CAUSE 1125
+▁CAUSED 1126
+▁CAUSES 1127
+▁CAUSING 1128
+▁CAVE 1129
+▁CEILING 1130
+▁CELEBRATE 1131
+▁CELL 1132
+▁CELLS 1133
+▁CENSOR 1134
+▁CENT 1135
+▁CENTER 1136
+▁CENTRAL 1137
+▁CENTURIES 1138
+▁CENTURY 1139
+▁CEO 1140
+▁CEREMONY 1141
+▁CERTAIN 1142
+▁CERTAINLY 1143
+▁CH 1144
+▁CHA 1145
+▁CHAIN 1146
+▁CHAIR 1147
+▁CHALLENGE 1148
+▁CHALLENGES 1149
+▁CHALLENGING 1150
+▁CHAMPION 1151
+▁CHANCE 1152
+▁CHANGE 1153
+▁CHANGED 1154
+▁CHANGES 1155
+▁CHANGING 1156
+▁CHANNEL 1157
+▁CHAOS 1158
+▁CHAPTER 1159
+▁CHARACTER 1160
+▁CHARACTERISTICS 1161
+▁CHARGE 1162
+▁CHARITY 1163
+▁CHARLES 1164
+▁CHART 1165
+▁CHASE 1166
+▁CHEAP 1167
+▁CHEAPER 1168
+▁CHEAT 1169
+▁CHECK 1170
+▁CHEEK 1171
+▁CHEER 1172
+▁CHEESE 1173
+▁CHEMICAL 1174
+▁CHEMISTRY 1175
+▁CHEMOTHERAPY 1176
+▁CHEST 1177
+▁CHEW 1178
+▁CHI 1179
+▁CHICAGO 1180
+▁CHICKEN 1181
+▁CHIEF 1182
+▁CHILD 1183
+▁CHILDHOOD 1184
+▁CHILDREN 1185
+▁CHIMPANZEE 1186
+▁CHINA 1187
+▁CHINESE 1188
+▁CHIP 1189
+▁CHOCOLATE 1190
+▁CHOICE 1191
+▁CHOICES 1192
+▁CHOOSE 1193
+▁CHOOSING 1194
+▁CHOSE 1195
+▁CHOSEN 1196
+▁CHRIS 1197
+▁CHRISTIAN 1198
+▁CHRISTMAS 1199
+▁CHROMOSOME 1200
+▁CHRONIC 1201
+▁CHUNK 1202
+▁CHURCH 1203
+▁CHUTZPAH 1204
+▁CIGARETTE 1205
+▁CINEMA 1206
+▁CIRCLE 1207
+▁CIRCUIT 1208
+▁CIRCULA 1209
+▁CIRCUM 1210
+▁CIRCUMSTANCES 1211
+▁CITIES 1212
+▁CITIZEN 1213
+▁CITIZENS 1214
+▁CITY 1215
+▁CIVIC 1216
+▁CIVIL 1217
+▁CIVILIZATION 1218
+▁CL 1219
+▁CLAIM 1220
+▁CLARITY 1221
+▁CLASS 1222
+▁CLASSES 1223
+▁CLASSIC 1224
+▁CLASSICAL 1225
+▁CLASSMATES 1226
+▁CLASSROOM 1227
+▁CLEAN 1228
+▁CLEAR 1229
+▁CLEARLY 1230
+▁CLEVER 1231
+▁CLICK 1232
+▁CLIENT 1233
+▁CLIMATE 1234
+▁CLIMB 1235
+▁CLINIC 1236
+▁CLINICAL 1237
+▁CLIP 1238
+▁CLO 1239
+▁CLOCK 1240
+▁CLOSE 1241
+▁CLOSED 1242
+▁CLOSER 1243
+▁CLOSING 1244
+▁CLOTHES 1245
+▁CLOTHING 1246
+▁CLOUD 1247
+▁CLUB 1248
+▁CLUE 1249
+▁CLUSTER 1250
+▁CO 1251
+▁COACH 1252
+▁COAL 1253
+▁COAST 1254
+▁COCAINE 1255
+▁COCOA 1256
+▁CODE 1257
+▁COFFEE 1258
+▁COGNITIVE 1259
+▁COIN 1260
+▁COLD 1261
+▁COLLABORATE 1262
+▁COLLABORATION 1263
+▁COLLABORATIVE 1264
+▁COLLAPSE 1265
+▁COLLEAGUE 1266
+▁COLLEAGUES 1267
+▁COLLECT 1268
+▁COLLECTION 1269
+▁COLLECTIVE 1270
+▁COLLEGE 1271
+▁COLLIDE 1272
+▁COLLISION 1273
+▁COLOR 1274
+▁COLORADO 1275
+▁COLUMN 1276
+▁COM 1277
+▁COMBAT 1278
+▁COMBINATION 1279
+▁COMBINE 1280
+▁COMBINED 1281
+▁COME 1282
+▁COMES 1283
+▁COMFORT 1284
+▁COMFORTABLE 1285
+▁COMIC 1286
+▁COMING 1287
+▁COMMAND 1288
+▁COMMENT 1289
+▁COMMERCIAL 1290
+▁COMMISSION 1291
+▁COMMIT 1292
+▁COMMITMENT 1293
+▁COMMITTED 1294
+▁COMMITTEE 1295
+▁COMMON 1296
+▁COMMUNI 1297
+▁COMMUNICATE 1298
+▁COMMUNICATING 1299
+▁COMMUNICATION 1300
+▁COMMUNITIES 1301
+▁COMMUNITY 1302
+▁COMP 1303
+▁COMPANIES 1304
+▁COMPANY 1305
+▁COMPARE 1306
+▁COMPARED 1307
+▁COMPARISON 1308
+▁COMPASSION 1309
+▁COMPELLING 1310
+▁COMPETE 1311
+▁COMPETING 1312
+▁COMPETITION 1313
+▁COMPETITIVE 1314
+▁COMPLAIN 1315
+▁COMPLEMENT 1316
+▁COMPLETE 1317
+▁COMPLETELY 1318
+▁COMPLEX 1319
+▁COMPLEXITY 1320
+▁COMPLICATED 1321
+▁COMPLIMENT 1322
+▁COMPONENT 1323
+▁COMPOSER 1324
+▁COMPOST 1325
+▁COMPOUND 1326
+▁COMPREHENSI 1327
+▁COMPROMISE 1328
+▁COMPUTATION 1329
+▁COMPUTER 1330
+▁COMPUTERS 1331
+▁COMPUTING 1332
+▁CON 1333
+▁CONCENTRATE 1334
+▁CONCENTRATION 1335
+▁CONCEPT 1336
+▁CONCERN 1337
+▁CONCERNED 1338
+▁CONCERT 1339
+▁CONCLUDE 1340
+▁CONCLUSION 1341
+▁CONCRETE 1342
+▁CONDITION 1343
+▁CONDITIONS 1344
+▁CONDUCT 1345
+▁CONFERENCE 1346
+▁CONFIDENCE 1347
+▁CONFIDENT 1348
+▁CONFINE 1349
+▁CONFIRM 1350
+▁CONFLICT 1351
+▁CONFORM 1352
+▁CONFRONT 1353
+▁CONFUSED 1354
+▁CONFUSING 1355
+▁CONFUSION 1356
+▁CONGRESS 1357
+▁CONNECT 1358
+▁CONNECTED 1359
+▁CONNECTION 1360
+▁CONNECTIONS 1361
+▁CONQUER 1362
+▁CONSCIOUS 1363
+▁CONSCIOUSNESS 1364
+▁CONSENT 1365
+▁CONSEQUENCE 1366
+▁CONSEQUENCES 1367
+▁CONSERVATION 1368
+▁CONSERVATIVE 1369
+▁CONSIDER 1370
+▁CONSIDERED 1371
+▁CONSIST 1372
+▁CONSISTENT 1373
+▁CONSTANT 1374
+▁CONSTANTLY 1375
+▁CONSTITUTION 1376
+▁CONSTRAIN 1377
+▁CONSTRUCT 1378
+▁CONSTRUCTION 1379
+▁CONSULT 1380
+▁CONSUME 1381
+▁CONSUMER 1382
+▁CONSUMING 1383
+▁CONSUMPTION 1384
+▁CONTACT 1385
+▁CONTAIN 1386
+▁CONTEMPORARY 1387
+▁CONTENT 1388
+▁CONTEST 1389
+▁CONTEXT 1390
+▁CONTINENT 1391
+▁CONTINU 1392
+▁CONTINUE 1393
+▁CONTINUED 1394
+▁CONTRACT 1395
+▁CONTRADICT 1396
+▁CONTRARY 1397
+▁CONTRAST 1398
+▁CONTRIBUTE 1399
+▁CONTRIBUTING 1400
+▁CONTRIBUTION 1401
+▁CONTROL 1402
+▁CONTROLLED 1403
+▁CONTROVERSIAL 1404
+▁CONVENTION 1405
+▁CONVENTIONAL 1406
+▁CONVERSATION 1407
+▁CONVERSATIONS 1408
+▁CONVERT 1409
+▁CONVICT 1410
+▁CONVINCE 1411
+▁CONVINCED 1412
+▁CONVINCING 1413
+▁COOK 1414
+▁COOL 1415
+▁COOPERATION 1416
+▁COORDINATE 1417
+▁COP 1418
+▁COPE 1419
+▁COPY 1420
+▁COR 1421
+▁CORAL 1422
+▁CORE 1423
+▁CORN 1424
+▁CORNER 1425
+▁CORPORATE 1426
+▁CORPORATION 1427
+▁CORPS 1428
+▁CORRECT 1429
+▁CORRELATE 1430
+▁CORRESPOND 1431
+▁CORRUPTION 1432
+▁CORTEX 1433
+▁COSMIC 1434
+▁COST 1435
+▁COSTS 1436
+▁COSTUME 1437
+▁COUCH 1438
+▁COULD 1439
+▁COULDN 1440
+▁COUNCIL 1441
+▁COUNSEL 1442
+▁COUNT 1443
+▁COUNTER 1444
+▁COUNTLESS 1445
+▁COUNTRIES 1446
+▁COUNTRY 1447
+▁COUPLE 1448
+▁COURAGE 1449
+▁COURSE 1450
+▁COURT 1451
+▁COUSIN 1452
+▁COVER 1453
+▁COW 1454
+▁CR 1455
+▁CRACK 1456
+▁CRAFT 1457
+▁CRASH 1458
+▁CRAWL 1459
+▁CRAZY 1460
+▁CREAM 1461
+▁CREATE 1462
+▁CREATED 1463
+▁CREATING 1464
+▁CREATION 1465
+▁CREATIVE 1466
+▁CREATIVITY 1467
+▁CREATOR 1468
+▁CREATURE 1469
+▁CREDIT 1470
+▁CREW 1471
+▁CRIED 1472
+▁CRIME 1473
+▁CRIMINAL 1474
+▁CRISIS 1475
+▁CRITERIA 1476
+▁CRITIC 1477
+▁CRITICAL 1478
+▁CROP 1479
+▁CROSS 1480
+▁CROWD 1481
+▁CRU 1482
+▁CRUCIAL 1483
+▁CRUSH 1484
+▁CRY 1485
+▁CRYING 1486
+▁CRYSTAL 1487
+▁CU 1488
+▁CULTIVAT 1489
+▁CULTURAL 1490
+▁CULTURE 1491
+▁CUP 1492
+▁CURE 1493
+▁CURIOSITY 1494
+▁CURIOUS 1495
+▁CURRENCY 1496
+▁CURRENT 1497
+▁CURRENTLY 1498
+▁CURRICULUM 1499
+▁CURVE 1500
+▁CUSTODY 1501
+▁CUSTOM 1502
+▁CUT 1503
+▁CUTTING 1504
+▁CYBER 1505
+▁CYCLE 1506
+▁D 1507
+▁DA 1508
+▁DAD 1509
+▁DAILY 1510
+▁DAIRY 1511
+▁DAMAGE 1512
+▁DAMAGING 1513
+▁DAMN 1514
+▁DAN 1515
+▁DANCE 1516
+▁DANCING 1517
+▁DANGER 1518
+▁DANGEROUS 1519
+▁DANIEL 1520
+▁DAR 1521
+▁DARK 1522
+▁DARKNESS 1523
+▁DATA 1524
+▁DATABASE 1525
+▁DATE 1526
+▁DATING 1527
+▁DAUGHTER 1528
+▁DAVID 1529
+▁DAY 1530
+▁DAYS 1531
+▁DE 1532
+▁DEAD 1533
+▁DEAF 1534
+▁DEAL 1535
+▁DEALING 1536
+▁DEAR 1537
+▁DEATH 1538
+▁DEBATE 1539
+▁DEBT 1540
+▁DECADE 1541
+▁DECADES 1542
+▁DECEMBER 1543
+▁DECID 1544
+▁DECIDE 1545
+▁DECIDED 1546
+▁DECISION 1547
+▁DECISIONS 1548
+▁DECLARED 1549
+▁DECLINE 1550
+▁DECREASE 1551
+▁DEDICATED 1552
+▁DEEP 1553
+▁DEEPER 1554
+▁DEEPLY 1555
+▁DEF 1556
+▁DEFAULT 1557
+▁DEFEAT 1558
+▁DEFEND 1559
+▁DEFENSE 1560
+▁DEFICIT 1561
+▁DEFINE 1562
+▁DEFINED 1563
+▁DEFINING 1564
+▁DEFINITELY 1565
+▁DEFINITION 1566
+▁DEGREE 1567
+▁DELAY 1568
+▁DELIBERATE 1569
+▁DELICIOUS 1570
+▁DELIVER 1571
+▁DEMAND 1572
+▁DEMO 1573
+▁DEMOCRACY 1574
+▁DEMOCRAT 1575
+▁DEMOCRATIC 1576
+▁DEMOGRAPHIC 1577
+▁DEMONSTRAT 1578
+▁DEMONSTRATE 1579
+▁DENIAL 1580
+▁DENSE 1581
+▁DENSITY 1582
+▁DENVER 1583
+▁DEPARTMENT 1584
+▁DEPEND 1585
+▁DEPENDENT 1586
+▁DEPICT 1587
+▁DEPLOY 1588
+▁DEPRESSED 1589
+▁DEPRESSION 1590
+▁DEPRIV 1591
+▁DEPTH 1592
+▁DERIVE 1593
+▁DESCRIBE 1594
+▁DESCRIBED 1595
+▁DESCRIBING 1596
+▁DESCRIPTION 1597
+▁DESERT 1598
+▁DESERVE 1599
+▁DESIGN 1600
+▁DESIGNED 1601
+▁DESIGNER 1602
+▁DESIRE 1603
+▁DESK 1604
+▁DESPAIR 1605
+▁DESPERATE 1606
+▁DESPITE 1607
+▁DESTINATION 1608
+▁DESTINY 1609
+▁DESTROY 1610
+▁DESTRUCTION 1611
+▁DESTRUCTIVE 1612
+▁DETAIL 1613
+▁DETECT 1614
+▁DETERMINATION 1615
+▁DETERMINE 1616
+▁DETERMINED 1617
+▁DETROIT 1618
+▁DEVASTATING 1619
+▁DEVELOP 1620
+▁DEVELOPED 1621
+▁DEVELOPING 1622
+▁DEVELOPMENT 1623
+▁DEVICE 1624
+▁DEVICES 1625
+▁DEVIL 1626
+▁DEVO 1627
+▁DI 1628
+▁DIABETES 1629
+▁DIAGNOSED 1630
+▁DIAGNOSIS 1631
+▁DIALOGUE 1632
+▁DICTATE 1633
+▁DICTATOR 1634
+▁DICTIONARY 1635
+▁DID 1636
+▁DIDN 1637
+▁DIE 1638
+▁DIED 1639
+▁DIET 1640
+▁DIFFER 1641
+▁DIFFERENCE 1642
+▁DIFFERENCES 1643
+▁DIFFERENT 1644
+▁DIFFERENTLY 1645
+▁DIFFICULT 1646
+▁DIG 1647
+▁DIGITAL 1648
+▁DIGNITY 1649
+▁DILEMMA 1650
+▁DIMENSION 1651
+▁DIMINISH 1652
+▁DINNER 1653
+▁DINOSAUR 1654
+▁DIOXIDE 1655
+▁DIPLOMA 1656
+▁DIRECT 1657
+▁DIRECTION 1658
+▁DIRECTLY 1659
+▁DIRECTOR 1660
+▁DIRT 1661
+▁DIRTY 1662
+▁DIS 1663
+▁DISABILITIES 1664
+▁DISABILITY 1665
+▁DISABLED 1666
+▁DISADVANTAGE 1667
+▁DISAGREE 1668
+▁DISAPPEAR 1669
+▁DISAPPOINT 1670
+▁DISASTER 1671
+▁DISCIPLINE 1672
+▁DISCOMFORT 1673
+▁DISCONNECT 1674
+▁DISCOURSE 1675
+▁DISCOVER 1676
+▁DISCOVERED 1677
+▁DISCOVERY 1678
+▁DISCRIMINAT 1679
+▁DISCRIMINATION 1680
+▁DISCUSS 1681
+▁DISCUSSION 1682
+▁DISEASE 1683
+▁DISEASES 1684
+▁DISGUST 1685
+▁DISH 1686
+▁DISMISS 1687
+▁DISNEY 1688
+▁DISORDER 1689
+▁DISPLAY 1690
+▁DISRUPT 1691
+▁DISTANCE 1692
+▁DISTANT 1693
+▁DISTINCT 1694
+▁DISTINGUISH 1695
+▁DISTORT 1696
+▁DISTRACT 1697
+▁DISTRACTION 1698
+▁DISTRIBUT 1699
+▁DISTRIBUTION 1700
+▁DISTRICT 1701
+▁DISTURB 1702
+▁DIVERSE 1703
+▁DIVERSITY 1704
+▁DIVIDE 1705
+▁DIVINE 1706
+▁DIVISION 1707
+▁DIVORCE 1708
+▁DNA 1709
+▁DO 1710
+▁DOCTOR 1711
+▁DOCTORS 1712
+▁DOCUMENT 1713
+▁DOES 1714
+▁DOESN 1715
+▁DOG 1716
+▁DOGS 1717
+▁DOING 1718
+▁DOLLAR 1719
+▁DOLLARS 1720
+▁DOLPHIN 1721
+▁DOMAIN 1722
+▁DOMESTIC 1723
+▁DOMINANT 1724
+▁DON 1725
+▁DONE 1726
+▁DOOR 1727
+▁DOPAMINE 1728
+▁DOUBLE 1729
+▁DOUBT 1730
+▁DOWN 1731
+▁DOZEN 1732
+▁DR 1733
+▁DRAG 1734
+▁DRAIN 1735
+▁DRAMA 1736
+▁DRAMATIC 1737
+▁DRAMATICALLY 1738
+▁DRAW 1739
+▁DRAWING 1740
+▁DRAWN 1741
+▁DREAD 1742
+▁DREAM 1743
+▁DREAMS 1744
+▁DRESS 1745
+▁DREW 1746
+▁DRIFT 1747
+▁DRINK 1748
+▁DRINKING 1749
+▁DRIVE 1750
+▁DRIVEN 1751
+▁DRIVER 1752
+▁DRIVING 1753
+▁DRONE 1754
+▁DROP 1755
+▁DROPPED 1756
+▁DROVE 1757
+▁DROWN 1758
+▁DRUG 1759
+▁DRUGS 1760
+▁DRUM 1761
+▁DRUNK 1762
+▁DRY 1763
+▁DU 1764
+▁DUE 1765
+▁DUMB 1766
+▁DUMP 1767
+▁DURING 1768
+▁DUST 1769
+▁DUTCH 1770
+▁DUTY 1771
+▁DYING 1772
+▁DYNAMIC 1773
+▁DYSFUNCTION 1774
+▁E 1775
+▁EACH 1776
+▁EAGER 1777
+▁EAR 1778
+▁EARLIER 1779
+▁EARLIEST 1780
+▁EARLY 1781
+▁EARN 1782
+▁EARTH 1783
+▁EARTHQUAKE 1784
+▁EASE 1785
+▁EASIE 1786
+▁EASIER 1787
+▁EASILY 1788
+▁EAST 1789
+▁EASTERN 1790
+▁EASY 1791
+▁EAT 1792
+▁EATING 1793
+▁EBOLA 1794
+▁ECHO 1795
+▁ECOLOGICAL 1796
+▁ECONOMIC 1797
+▁ECONOMIES 1798
+▁ECONOMIST 1799
+▁ECONOMY 1800
+▁ECOSYSTEM 1801
+▁EDGE 1802
+▁EDIT 1803
+▁EDITOR 1804
+▁EDUCATE 1805
+▁EDUCATED 1806
+▁EDUCATION 1807
+▁EDUCATIONAL 1808
+▁EDUCATOR 1809
+▁EFFECT 1810
+▁EFFECTIVE 1811
+▁EFFECTIVELY 1812
+▁EFFECTS 1813
+▁EFFICIENCY 1814
+▁EFFICIENT 1815
+▁EFFORT 1816
+▁EGG 1817
+▁EGO 1818
+▁EGYPT 1819
+▁EIGHT 1820
+▁EINSTEIN 1821
+▁EITHER 1822
+▁EL 1823
+▁ELDERLY 1824
+▁ELECTION 1825
+▁ELECTRIC 1826
+▁ELECTRICAL 1827
+▁ELECTRICITY 1828
+▁ELECTRO 1829
+▁ELECTRONIC 1830
+▁ELEGANT 1831
+▁ELEMENT 1832
+▁ELEMENTS 1833
+▁ELEPHANT 1834
+▁ELIMINAT 1835
+▁ELIMINATE 1836
+▁ELITE 1837
+▁ELSE 1838
+▁ELSEWHERE 1839
+▁EM 1840
+▁EMAIL 1841
+▁EMBARRASSED 1842
+▁EMBARRASSING 1843
+▁EMBEDDED 1844
+▁EMBODIE 1845
+▁EMBRAC 1846
+▁EMBRACE 1847
+▁EMBRYO 1848
+▁EMERGE 1849
+▁EMERGENC 1850
+▁EMERGENCY 1851
+▁EMERGING 1852
+▁EMISSIONS 1853
+▁EMOTION 1854
+▁EMOTIONAL 1855
+▁EMOTIONALLY 1856
+▁EMOTIONS 1857
+▁EMPATHY 1858
+▁EMPHASIZE 1859
+▁EMPIRE 1860
+▁EMPLOY 1861
+▁EMPLOYEE 1862
+▁EMPLOYEES 1863
+▁EMPLOYER 1864
+▁EMPLOYMENT 1865
+▁EMPOWER 1866
+▁EMPTY 1867
+▁EN 1868
+▁ENABLE 1869
+▁ENCOUNTER 1870
+▁ENCOURAGE 1871
+▁ENCOURAGING 1872
+▁END 1873
+▁ENDANGERED 1874
+▁ENDEAVOR 1875
+▁ENDED 1876
+▁ENDLESS 1877
+▁ENEMIES 1878
+▁ENEMY 1879
+▁ENERGY 1880
+▁ENFORCE 1881
+▁ENGAGE 1882
+▁ENGAGED 1883
+▁ENGAGEMENT 1884
+▁ENGAGING 1885
+▁ENGINE 1886
+▁ENGINEER 1887
+▁ENGINEERING 1888
+▁ENGLAND 1889
+▁ENGLISH 1890
+▁ENHANCE 1891
+▁ENJOY 1892
+▁ENLIGHTEN 1893
+▁ENORMOUS 1894
+▁ENOUGH 1895
+▁ENRICH 1896
+▁ENROLL 1897
+▁ENSURE 1898
+▁ENTER 1899
+▁ENTERPRISE 1900
+▁ENTERTAIN 1901
+▁ENTERTAINMENT 1902
+▁ENTIRE 1903
+▁ENTIRELY 1904
+▁ENTITLED 1905
+▁ENTR 1906
+▁ENTREPRENEUR 1907
+▁ENVIRONMENT 1908
+▁ENVIRONMENTAL 1909
+▁ENVISION 1910
+▁EPIDEMIC 1911
+▁EPISODE 1912
+▁EQU 1913
+▁EQUAL 1914
+▁EQUALITY 1915
+▁EQUALLY 1916
+▁EQUATION 1917
+▁EQUIPMENT 1918
+▁EQUIPPED 1919
+▁EQUIVALENT 1920
+▁ERA 1921
+▁ERIC 1922
+▁ERROR 1923
+▁ESCAPE 1924
+▁ESPECIALLY 1925
+▁ESSAY 1926
+▁ESSENCE 1927
+▁ESSENTIAL 1928
+▁ESSENTIALLY 1929
+▁ESTABLISH 1930
+▁ESTEEM 1931
+▁ESTIMATE 1932
+▁ETC 1933
+▁ETHICAL 1934
+▁ETHNIC 1935
+▁EU 1936
+▁EURO 1937
+▁EUROPE 1938
+▁EUROPEAN 1939
+▁EVALUATE 1940
+▁EVEN 1941
+▁EVENT 1942
+▁EVENTS 1943
+▁EVENTUALLY 1944
+▁EVER 1945
+▁EVERY 1946
+▁EVERYBODY 1947
+▁EVERYDAY 1948
+▁EVERYONE 1949
+▁EVERYTHING 1950
+▁EVERYWHERE 1951
+▁EVIDENCE 1952
+▁EVIL 1953
+▁EVOLUTION 1954
+▁EVOLUTIONARY 1955
+▁EVOLVE 1956
+▁EVOLVED 1957
+▁EVOLVING 1958
+▁EX 1959
+▁EXACT 1960
+▁EXACTLY 1961
+▁EXAGGERAT 1962
+▁EXAM 1963
+▁EXAMINE 1964
+▁EXAMPLE 1965
+▁EXAMPLES 1966
+▁EXCEL 1967
+▁EXCELLENT 1968
+▁EXCEPT 1969
+▁EXCEPTION 1970
+▁EXCESS 1971
+▁EXCHANGE 1972
+▁EXCITED 1973
+▁EXCITEMENT 1974
+▁EXCITING 1975
+▁EXCLUSIVE 1976
+▁EXCUSE 1977
+▁EXECUTE 1978
+▁EXECUTIVE 1979
+▁EXERCISE 1980
+▁EXHAUSTED 1981
+▁EXHIBIT 1982
+▁EXIST 1983
+▁EXISTENCE 1984
+▁EXISTING 1985
+▁EXPAND 1986
+▁EXPECT 1987
+▁EXPECTANCY 1988
+▁EXPECTATIONS 1989
+▁EXPECTED 1990
+▁EXPENSE 1991
+▁EXPENSIVE 1992
+▁EXPERIENCE 1993
+▁EXPERIENCED 1994
+▁EXPERIENCES 1995
+▁EXPERIENCING 1996
+▁EXPERIMENT 1997
+▁EXPERIMENTS 1998
+▁EXPERT 1999
+▁EXPERTISE 2000
+▁EXPERTS 2001
+▁EXPLAIN 2002
+▁EXPLAINED 2003
+▁EXPLANATION 2004
+▁EXPLICIT 2005
+▁EXPLODE 2006
+▁EXPLOIT 2007
+▁EXPLORATION 2008
+▁EXPLORE 2009
+▁EXPLORING 2010
+▁EXPLOSION 2011
+▁EXPONENTIAL 2012
+▁EXPOSED 2013
+▁EXPOSURE 2014
+▁EXPRESS 2015
+▁EXPRESSION 2016
+▁EXTEND 2017
+▁EXTENSION 2018
+▁EXTENSIVE 2019
+▁EXTENT 2020
+▁EXTERNAL 2021
+▁EXTINCT 2022
+▁EXTINCTION 2023
+▁EXTRA 2024
+▁EXTRACT 2025
+▁EXTRAORDINARY 2026
+▁EXTREME 2027
+▁EXTREMELY 2028
+▁EXTREMIST 2029
+▁EXTROVERT 2030
+▁EYE 2031
+▁EYES 2032
+▁F 2033
+▁FA 2034
+▁FABRIC 2035
+▁FABULOUS 2036
+▁FACE 2037
+▁FACEBOOK 2038
+▁FACED 2039
+▁FACIAL 2040
+▁FACILITIES 2041
+▁FACILITY 2042
+▁FACING 2043
+▁FACT 2044
+▁FACTOR 2045
+▁FACTORS 2046
+▁FACTORY 2047
+▁FACULTY 2048
+▁FAIL 2049
+▁FAILED 2050
+▁FAILING 2051
+▁FAILURE 2052
+▁FAIR 2053
+▁FAIRLY 2054
+▁FAITH 2055
+▁FAKE 2056
+▁FALL 2057
+▁FALLING 2058
+▁FALSE 2059
+▁FAMILIAR 2060
+▁FAMILIES 2061
+▁FAMILY 2062
+▁FAMOUS 2063
+▁FAN 2064
+▁FANCY 2065
+▁FANTASTIC 2066
+▁FANTASY 2067
+▁FAR 2068
+▁FARM 2069
+▁FARMER 2070
+▁FARMERS 2071
+▁FASCINATED 2072
+▁FASCINATING 2073
+▁FASHION 2074
+▁FAST 2075
+▁FASTER 2076
+▁FAT 2077
+▁FATHER 2078
+▁FAULT 2079
+▁FAVOR 2080
+▁FAVORITE 2081
+▁FE 2082
+▁FEAR 2083
+▁FEATURE 2084
+▁FEBRUARY 2085
+▁FEDERAL 2086
+▁FEED 2087
+▁FEEDBACK 2088
+▁FEEL 2089
+▁FEELING 2090
+▁FEELINGS 2091
+▁FEELS 2092
+▁FEET 2093
+▁FELL 2094
+▁FELLOW 2095
+▁FELT 2096
+▁FEMALE 2097
+▁FEMININE 2098
+▁FEMINISM 2099
+▁FEMINIST 2100
+▁FERTIL 2101
+▁FERTILIZER 2102
+▁FESTIVAL 2103
+▁FEW 2104
+▁FEWER 2105
+▁FI 2106
+▁FICTION 2107
+▁FIELD 2108
+▁FIFTH 2109
+▁FIGHT 2110
+▁FIGHTING 2111
+▁FIGURE 2112
+▁FIGURED 2113
+▁FIGURING 2114
+▁FILE 2115
+▁FILL 2116
+▁FILLED 2117
+▁FILM 2118
+▁FILMMAKER 2119
+▁FILTER 2120
+▁FINAL 2121
+▁FINALLY 2122
+▁FINANCE 2123
+▁FINANCIAL 2124
+▁FIND 2125
+▁FINDING 2126
+▁FINE 2127
+▁FINGER 2128
+▁FINISH 2129
+▁FINISHED 2130
+▁FINLAND 2131
+▁FIRE 2132
+▁FIRM 2133
+▁FIRST 2134
+▁FISH 2135
+▁FISHERIES 2136
+▁FIT 2137
+▁FIVE 2138
+▁FIX 2139
+▁FLAG 2140
+▁FLAME 2141
+▁FLASH 2142
+▁FLAT 2143
+▁FLAVOR 2144
+▁FLAW 2145
+▁FLEE 2146
+▁FLESH 2147
+▁FLEW 2148
+▁FLEXIBILITY 2149
+▁FLEXIBLE 2150
+▁FLIGHT 2151
+▁FLIP 2152
+▁FLOOD 2153
+▁FLOOR 2154
+▁FLORIDA 2155
+▁FLOURISH 2156
+▁FLOW 2157
+▁FLOWER 2158
+▁FLU 2159
+▁FLUID 2160
+▁FLY 2161
+▁FLYING 2162
+▁FO 2163
+▁FOCUS 2164
+▁FOCUSED 2165
+▁FOLD 2166
+▁FOLKS 2167
+▁FOLLOW 2168
+▁FOLLOWED 2169
+▁FOLLOWING 2170
+▁FOOD 2171
+▁FOOL 2172
+▁FOOT 2173
+▁FOOTBALL 2174
+▁FOOTPRINT 2175
+▁FOR 2176
+▁FORCE 2177
+▁FORCED 2178
+▁FORCES 2179
+▁FOREIGN 2180
+▁FOREST 2181
+▁FOREVER 2182
+▁FORGET 2183
+▁FORGIVE 2184
+▁FORGIVENESS 2185
+▁FORGOT 2186
+▁FORGOTTEN 2187
+▁FORM 2188
+▁FORMAL 2189
+▁FORMER 2190
+▁FORMS 2191
+▁FORMULA 2192
+▁FORTH 2193
+▁FORTUNATE 2194
+▁FORTUNATELY 2195
+▁FORTUNE 2196
+▁FORWARD 2197
+▁FOSSIL 2198
+▁FOSTER 2199
+▁FOUND 2200
+▁FOUNDATION 2201
+▁FOUR 2202
+▁FOURTH 2203
+▁FR 2204
+▁FRA 2205
+▁FRACTION 2206
+▁FRAGILE 2207
+▁FRAGMENT 2208
+▁FRAME 2209
+▁FRAMEWORK 2210
+▁FRANCE 2211
+▁FRANCISCO 2212
+▁FRANK 2213
+▁FRANKLY 2214
+▁FREAK 2215
+▁FREE 2216
+▁FREEDOM 2217
+▁FRENCH 2218
+▁FREQUENC 2219
+▁FREQUENT 2220
+▁FRESH 2221
+▁FRIDAY 2222
+▁FRIEND 2223
+▁FRIENDS 2224
+▁FRIENDSHIP 2225
+▁FROM 2226
+▁FRONT 2227
+▁FROZEN 2228
+▁FRUIT 2229
+▁FRUSTRATED 2230
+▁FRUSTRATING 2231
+▁FRUSTRATION 2232
+▁FUEL 2233
+▁FULFILL 2234
+▁FULL 2235
+▁FULLY 2236
+▁FUN 2237
+▁FUNCTION 2238
+▁FUNCTIONAL 2239
+▁FUND 2240
+▁FUNDAMENTAL 2241
+▁FUNDAMENTALLY 2242
+▁FUNDING 2243
+▁FUNERAL 2244
+▁FUNNY 2245
+▁FURNITURE 2246
+▁FURTHER 2247
+▁FUSION 2248
+▁FUTURE 2249
+▁G 2250
+▁GA 2251
+▁GAIN 2252
+▁GALAXIES 2253
+▁GALAXY 2254
+▁GALLON 2255
+▁GAME 2256
+▁GAMES 2257
+▁GANDHI 2258
+▁GAP 2259
+▁GARAGE 2260
+▁GARBAGE 2261
+▁GARDEN 2262
+▁GAS 2263
+▁GATE 2264
+▁GATHER 2265
+▁GAVE 2266
+▁GAY 2267
+▁GDP 2268
+▁GE 2269
+▁GEEK 2270
+▁GEN 2271
+▁GENDER 2272
+▁GENE 2273
+▁GENERAL 2274
+▁GENERALLY 2275
+▁GENERATE 2276
+▁GENERATING 2277
+▁GENERATION 2278
+▁GENERATIONS 2279
+▁GENEROSITY 2280
+▁GENEROUS 2281
+▁GENES 2282
+▁GENETIC 2283
+▁GENITAL 2284
+▁GENIUS 2285
+▁GENOCIDE 2286
+▁GENOME 2287
+▁GENTLE 2288
+▁GENTLEMEN 2289
+▁GENUINE 2290
+▁GEOGRAPHIC 2291
+▁GEORGE 2292
+▁GEORGIA 2293
+▁GERMAN 2294
+▁GERMANY 2295
+▁GESTURE 2296
+▁GET 2297
+▁GETS 2298
+▁GETTING 2299
+▁GI 2300
+▁GIANT 2301
+▁GIFT 2302
+▁GIRL 2303
+▁GIRLFRIEND 2304
+▁GIRLS 2305
+▁GIVE 2306
+▁GIVEN 2307
+▁GIVES 2308
+▁GIVING 2309
+▁GLAD 2310
+▁GLASS 2311
+▁GLIMPSE 2312
+▁GLOBAL 2313
+▁GLOBE 2314
+▁GLOW 2315
+▁GLUCOSE 2316
+▁GO 2317
+▁GOAL 2318
+▁GOALS 2319
+▁GOD 2320
+▁GOES 2321
+▁GOING 2322
+▁GOLD 2323
+▁GOLDEN 2324
+▁GOLF 2325
+▁GONE 2326
+▁GONNA 2327
+▁GOOD 2328
+▁GOODBYE 2329
+▁GOOGLE 2330
+▁GOT 2331
+▁GOTTEN 2332
+▁GOVERN 2333
+▁GOVERNMENT 2334
+▁GOVERNMENTS 2335
+▁GR 2336
+▁GRAB 2337
+▁GRABBED 2338
+▁GRACE 2339
+▁GRADE 2340
+▁GRADUALLY 2341
+▁GRADUATE 2342
+▁GRADUATION 2343
+▁GRAIN 2344
+▁GRAND 2345
+▁GRANDCHILDREN 2346
+▁GRANDFATHER 2347
+▁GRANDMA 2348
+▁GRANDMOTHER 2349
+▁GRANDPARENTS 2350
+▁GRANTED 2351
+▁GRAPH 2352
+▁GRASP 2353
+▁GRASS 2354
+▁GRATEFUL 2355
+▁GRATITUDE 2356
+▁GRAVE 2357
+▁GRAVITATIONAL 2358
+▁GRAVITY 2359
+▁GREAT 2360
+▁GREATER 2361
+▁GREATEST 2362
+▁GREECE 2363
+▁GREEK 2364
+▁GREEN 2365
+▁GREENHOUSE 2366
+▁GREET 2367
+▁GREW 2368
+▁GRID 2369
+▁GRIEF 2370
+▁GRIEV 2371
+▁GROCERY 2372
+▁GROSS 2373
+▁GROUND 2374
+▁GROUP 2375
+▁GROUPS 2376
+▁GROW 2377
+▁GROWING 2378
+▁GROWN 2379
+▁GROWTH 2380
+▁GU 2381
+▁GUARANTEE 2382
+▁GUARD 2383
+▁GUESS 2384
+▁GUIDANCE 2385
+▁GUIDE 2386
+▁GUILT 2387
+▁GUILTY 2388
+▁GUITAR 2389
+▁GULF 2390
+▁GUN 2391
+▁GUT 2392
+▁GUY 2393
+▁GUYS 2394
+▁GYM 2395
+▁H 2396
+▁HA 2397
+▁HABIT 2398
+▁HABITAT 2399
+▁HACK 2400
+▁HAD 2401
+▁HADN 2402
+▁HAIR 2403
+▁HAITI 2404
+▁HALF 2405
+▁HALL 2406
+▁HALLUCINAT 2407
+▁HAND 2408
+▁HANDLE 2409
+▁HANDS 2410
+▁HANG 2411
+▁HAPPEN 2412
+▁HAPPENED 2413
+▁HAPPENING 2414
+▁HAPPENS 2415
+▁HAPPIER 2416
+▁HAPPILY 2417
+▁HAPPINESS 2418
+▁HAPPY 2419
+▁HAR 2420
+▁HARD 2421
+▁HARDER 2422
+▁HARDWARE 2423
+▁HARM 2424
+▁HARMONY 2425
+▁HARNESS 2426
+▁HARSH 2427
+▁HARVARD 2428
+▁HARVEST 2429
+▁HAS 2430
+▁HASN 2431
+▁HAT 2432
+▁HATE 2433
+▁HATRED 2434
+▁HAVE 2435
+▁HAVEN 2436
+▁HAVING 2437
+▁HAWAII 2438
+▁HE 2439
+▁HEAD 2440
+▁HEALTH 2441
+▁HEALTHCARE 2442
+▁HEALTHIER 2443
+▁HEALTHY 2444
+▁HEAR 2445
+▁HEARD 2446
+▁HEARING 2447
+▁HEART 2448
+▁HEARTBREAK 2449
+▁HEAT 2450
+▁HEAVEN 2451
+▁HEAVILY 2452
+▁HEAVY 2453
+▁HELD 2454
+▁HELLO 2455
+▁HELP 2456
+▁HELPED 2457
+▁HELPFUL 2458
+▁HELPING 2459
+▁HER 2460
+▁HERE 2461
+▁HERITAGE 2462
+▁HERO 2463
+▁HEROES 2464
+▁HERSELF 2465
+▁HETEROSEXUAL 2466
+▁HEY 2467
+▁HI 2468
+▁HIDDEN 2469
+▁HIDE 2470
+▁HIDING 2471
+▁HIERARCHY 2472
+▁HIGGS 2473
+▁HIGH 2474
+▁HIGHER 2475
+▁HIGHEST 2476
+▁HIGHLIGHT 2477
+▁HIGHLY 2478
+▁HILL 2479
+▁HIM 2480
+▁HIMSELF 2481
+▁HIP 2482
+▁HIRE 2483
+▁HIS 2484
+▁HISTORIAN 2485
+▁HISTORIC 2486
+▁HISTORICAL 2487
+▁HISTORY 2488
+▁HIT 2489
+▁HIV 2490
+▁HO 2491
+▁HOL 2492
+▁HOLD 2493
+▁HOLDING 2494
+▁HOLE 2495
+▁HOLIDAY 2496
+▁HOLLYWOOD 2497
+▁HOME 2498
+▁HOMELESS 2499
+▁HOMEWORK 2500
+▁HOMO 2501
+▁HOMOSEXUAL 2502
+▁HONEST 2503
+▁HONESTLY 2504
+▁HONEY 2505
+▁HONOR 2506
+▁HOOK 2507
+▁HOP 2508
+▁HOPE 2509
+▁HOPEFULLY 2510
+▁HORIZON 2511
+▁HORR 2512
+▁HORRIBLE 2513
+▁HORRIFIC 2514
+▁HORSE 2515
+▁HOSPITAL 2516
+▁HOST 2517
+▁HOT 2518
+▁HOTEL 2519
+▁HOUR 2520
+▁HOURS 2521
+▁HOUSE 2522
+▁HOUSEHOLD 2523
+▁HOUSING 2524
+▁HOW 2525
+▁HOWEVER 2526
+▁HU 2527
+▁HUG 2528
+▁HUGE 2529
+▁HUM 2530
+▁HUMAN 2531
+▁HUMANITARIAN 2532
+▁HUMANITY 2533
+▁HUMANS 2534
+▁HUMILITY 2535
+▁HUMOR 2536
+▁HUNDRED 2537
+▁HUNDREDS 2538
+▁HUNGER 2539
+▁HUNGRY 2540
+▁HUNT 2541
+▁HURT 2542
+▁HUSBAND 2543
+▁HYDRO 2544
+▁HYDROGEN 2545
+▁HYMEN 2546
+▁HYPER 2547
+▁HYPOTHESIS 2548
+▁HYPOTHETICAL 2549
+▁I 2550
+▁ICE 2551
+▁IDEA 2552
+▁IDEAL 2553
+▁IDEAS 2554
+▁IDENTICAL 2555
+▁IDENTIFIED 2556
+▁IDENTIFY 2557
+▁IDENTITIES 2558
+▁IDENTITY 2559
+▁IDEOLOGY 2560
+▁IF 2561
+▁IGNORANCE 2562
+▁IGNORE 2563
+▁ILLEGAL 2564
+▁ILLNESS 2565
+▁ILLUSION 2566
+▁ILLUSTRATE 2567
+▁IMAGE 2568
+▁IMAGES 2569
+▁IMAGINATION 2570
+▁IMAGINE 2571
+▁IMAGING 2572
+▁IMAGINING 2573
+▁IMMEDIATE 2574
+▁IMMEDIATELY 2575
+▁IMMENSE 2576
+▁IMMIGRANT 2577
+▁IMMIGRATION 2578
+▁IMMUNE 2579
+▁IMPACT 2580
+▁IMPAIRED 2581
+▁IMPERFECT 2582
+▁IMPLANT 2583
+▁IMPLEMENT 2584
+▁IMPLICATIONS 2585
+▁IMPLIE 2586
+▁IMPORTANCE 2587
+▁IMPORTANT 2588
+▁IMPORTANTLY 2589
+▁IMPOSE 2590
+▁IMPOSSIBLE 2591
+▁IMPRESS 2592
+▁IMPRESSION 2593
+▁IMPROV 2594
+▁IMPROVE 2595
+▁IMPROVEMENT 2596
+▁IMPULSE 2597
+▁IN 2598
+▁INCARCERATED 2599
+▁INCARCERATION 2600
+▁INCENTIVE 2601
+▁INCIDENT 2602
+▁INCLUDE 2603
+▁INCLUDING 2604
+▁INCLUSION 2605
+▁INCLUSIVE 2606
+▁INCOME 2607
+▁INCREASE 2608
+▁INCREASED 2609
+▁INCREASING 2610
+▁INCREASINGLY 2611
+▁INCREDIBLE 2612
+▁INCREDIBLY 2613
+▁INDEED 2614
+▁INDEPENDENCE 2615
+▁INDEPENDENT 2616
+▁INDIA 2617
+▁INDIAN 2618
+▁INDICATE 2619
+▁INDICATOR 2620
+▁INDIGENOUS 2621
+▁INDIVIDUAL 2622
+▁INDIVIDUALS 2623
+▁INDUCE 2624
+▁INDUSTRIAL 2625
+▁INDUSTRIES 2626
+▁INDUSTRY 2627
+▁INEQUALITY 2628
+▁INEVITAB 2629
+▁INFANT 2630
+▁INFECTION 2631
+▁INFECTIOUS 2632
+▁INFINITE 2633
+▁INFLAT 2634
+▁INFLUENCE 2635
+▁INFORMATION 2636
+▁INFORMED 2637
+▁INFRASTRUCTURE 2638
+▁INGREDIENT 2639
+▁INHABIT 2640
+▁INHERIT 2641
+▁INHIBIT 2642
+▁INITIAL 2643
+▁INITIATIVE 2644
+▁INJECT 2645
+▁INJURED 2646
+▁INJURIES 2647
+▁INJURY 2648
+▁INJUSTICE 2649
+▁INNER 2650
+▁INNOCENT 2651
+▁INNOVATE 2652
+▁INNOVATION 2653
+▁INNOVATIVE 2654
+▁INNOVATOR 2655
+▁INPUT 2656
+▁INSANE 2657
+▁INSECTS 2658
+▁INSIDE 2659
+▁INSIGHT 2660
+▁INSPIRATION 2661
+▁INSPIRE 2662
+▁INSPIRED 2663
+▁INSPIRING 2664
+▁INSTAGRAM 2665
+▁INSTALL 2666
+▁INSTANCE 2667
+▁INSTANT 2668
+▁INSTEAD 2669
+▁INSTINCT 2670
+▁INSTITUTE 2671
+▁INSTITUTION 2672
+▁INSTITUTIONS 2673
+▁INSTRUCTION 2674
+▁INSTRUMENT 2675
+▁INSULIN 2676
+▁INSURANCE 2677
+▁INTEGRATE 2678
+▁INTEGRATION 2679
+▁INTEGRITY 2680
+▁INTELLECTUAL 2681
+▁INTELLIGENCE 2682
+▁INTELLIGENT 2683
+▁INTENSE 2684
+▁INTENSIVE 2685
+▁INTENTION 2686
+▁INTER 2687
+▁INTERACT 2688
+▁INTERACTION 2689
+▁INTERCONNECT 2690
+▁INTEREST 2691
+▁INTERESTED 2692
+▁INTERESTING 2693
+▁INTERFACE 2694
+▁INTERFERE 2695
+▁INTERNAL 2696
+▁INTERNATIONAL 2697
+▁INTERNET 2698
+▁INTERNSHIP 2699
+▁INTERPRET 2700
+▁INTERRUPT 2701
+▁INTERSECTION 2702
+▁INTERVENTION 2703
+▁INTERVIEW 2704
+▁INTIMACY 2705
+▁INTIMATE 2706
+▁INTO 2707
+▁INTRIGU 2708
+▁INTRODUCE 2709
+▁INTRODUCED 2710
+▁INTRODUCING 2711
+▁INTRODUCTION 2712
+▁INTROVERT 2713
+▁INTUITION 2714
+▁INTUITIVE 2715
+▁INVENT 2716
+▁INVENTED 2717
+▁INVENTION 2718
+▁INVEST 2719
+▁INVESTIGAT 2720
+▁INVESTIGATION 2721
+▁INVESTMENT 2722
+▁INVISIBLE 2723
+▁INVITATION 2724
+▁INVITE 2725
+▁INVITED 2726
+▁INVOLVE 2727
+▁INVOLVED 2728
+▁IPHONE 2729
+▁IRAQ 2730
+▁IRON 2731
+▁IRRATIONAL 2732
+▁IS 2733
+▁ISLAM 2734
+▁ISLAMIC 2735
+▁ISLAND 2736
+▁ISN 2737
+▁ISOLATED 2738
+▁ISOLATION 2739
+▁ISRAEL 2740
+▁ISSUE 2741
+▁ISSUES 2742
+▁IT 2743
+▁ITEM 2744
+▁ITSELF 2745
+▁J 2746
+▁JACK 2747
+▁JAIL 2748
+▁JAMES 2749
+▁JANE 2750
+▁JANUARY 2751
+▁JAPAN 2752
+▁JAPANESE 2753
+▁JAZZ 2754
+▁JE 2755
+▁JEALOUS 2756
+▁JENN 2757
+▁JERSEY 2758
+▁JEWISH 2759
+▁JIHAD 2760
+▁JIM 2761
+▁JO 2762
+▁JOB 2763
+▁JOBS 2764
+▁JOHN 2765
+▁JOIN 2766
+▁JOINED 2767
+▁JOKE 2768
+▁JORDAN 2769
+▁JOURNAL 2770
+▁JOURNALIST 2771
+▁JOURNEY 2772
+▁JOY 2773
+▁JU 2774
+▁JUDGE 2775
+▁JUDGING 2776
+▁JUDGMENT 2777
+▁JUICE 2778
+▁JUMP 2779
+▁JUNIOR 2780
+▁JUNK 2781
+▁JUST 2782
+▁JUSTICE 2783
+▁K 2784
+▁KA 2785
+▁KAR 2786
+▁KE 2787
+▁KEEP 2788
+▁KEEPING 2789
+▁KENNEDY 2790
+▁KENYA 2791
+▁KEPT 2792
+▁KEVIN 2793
+▁KEY 2794
+▁KI 2795
+▁KICK 2796
+▁KID 2797
+▁KIDNEY 2798
+▁KIDS 2799
+▁KILL 2800
+▁KILLED 2801
+▁KILLING 2802
+▁KILOMETER 2803
+▁KIND 2804
+▁KINDERGARTEN 2805
+▁KINDNESS 2806
+▁KINDS 2807
+▁KING 2808
+▁KINGDOM 2809
+▁KITCHEN 2810
+▁KNEE 2811
+▁KNEW 2812
+▁KNIFE 2813
+▁KNOCK 2814
+▁KNOW 2815
+▁KNOWING 2816
+▁KNOWLEDGE 2817
+▁KNOWN 2818
+▁KO 2819
+▁KOREA 2820
+▁L 2821
+▁LA 2822
+▁LAB 2823
+▁LABEL 2824
+▁LABOR 2825
+▁LABORATORY 2826
+▁LACK 2827
+▁LADIES 2828
+▁LADY 2829
+▁LAKE 2830
+▁LAND 2831
+▁LANDSCAPE 2832
+▁LANGUAGE 2833
+▁LANGUAGES 2834
+▁LAPTOP 2835
+▁LARGE 2836
+▁LARGELY 2837
+▁LARGER 2838
+▁LARGEST 2839
+▁LAST 2840
+▁LATE 2841
+▁LATER 2842
+▁LATIN 2843
+▁LATVIA 2844
+▁LAUGH 2845
+▁LAUGHTER 2846
+▁LAUNCH 2847
+▁LAUNCHED 2848
+▁LAW 2849
+▁LAWS 2850
+▁LAWYER 2851
+▁LAY 2852
+▁LAYER 2853
+▁LAZY 2854
+▁LE 2855
+▁LEAD 2856
+▁LEADER 2857
+▁LEADERS 2858
+▁LEADERSHIP 2859
+▁LEADING 2860
+▁LEADS 2861
+▁LEAF 2862
+▁LEAGUE 2863
+▁LEAN 2864
+▁LEAP 2865
+▁LEARN 2866
+▁LEARNED 2867
+▁LEARNING 2868
+▁LEAST 2869
+▁LEAVE 2870
+▁LEAVING 2871
+▁LEBANON 2872
+▁LECTURE 2873
+▁LED 2874
+▁LEFT 2875
+▁LEG 2876
+▁LEGACY 2877
+▁LEGAL 2878
+▁LEGEND 2879
+▁LEGISLAT 2880
+▁LEGITIMATE 2881
+▁LEGS 2882
+▁LENGTH 2883
+▁LENS 2884
+▁LESBIAN 2885
+▁LESS 2886
+▁LESSON 2887
+▁LESSONS 2888
+▁LET 2889
+▁LETTER 2890
+▁LETTING 2891
+▁LEVEL 2892
+▁LEVELS 2893
+▁LEVERAGE 2894
+▁LG 2895
+▁LGBT 2896
+▁LI 2897
+▁LIBERAL 2898
+▁LIBERAT 2899
+▁LIBERTY 2900
+▁LIBRARIES 2901
+▁LIBRARY 2902
+▁LICENSE 2903
+▁LIE 2904
+▁LIES 2905
+▁LIFE 2906
+▁LIFESPAN 2907
+▁LIFESTYLE 2908
+▁LIFETIME 2909
+▁LIFT 2910
+▁LIGHT 2911
+▁LIKE 2912
+▁LIKELY 2913
+▁LIMB 2914
+▁LIMIT 2915
+▁LIMITATIONS 2916
+▁LIMITED 2917
+▁LINE 2918
+▁LINES 2919
+▁LINGUISTIC 2920
+▁LINK 2921
+▁LINKED 2922
+▁LION 2923
+▁LIQUID 2924
+▁LIST 2925
+▁LISTEN 2926
+▁LISTENED 2927
+▁LISTENING 2928
+▁LITERACY 2929
+▁LITERALLY 2930
+▁LITERATURE 2931
+▁LITTLE 2932
+▁LIVE 2933
+▁LIVED 2934
+▁LIVES 2935
+▁LIVING 2936
+▁LO 2937
+▁LOAD 2938
+▁LOAN 2939
+▁LOCAL 2940
+▁LOCATE 2941
+▁LOCATION 2942
+▁LOCK 2943
+▁LOG 2944
+▁LOGIC 2945
+▁LOGICAL 2946
+▁LONDON 2947
+▁LONELINESS 2948
+▁LONELY 2949
+▁LONG 2950
+▁LONGER 2951
+▁LONGEVITY 2952
+▁LOOK 2953
+▁LOOKED 2954
+▁LOOKING 2955
+▁LOOKS 2956
+▁LOOP 2957
+▁LOOSE 2958
+▁LOSE 2959
+▁LOSING 2960
+▁LOSS 2961
+▁LOST 2962
+▁LOT 2963
+▁LOTS 2964
+▁LOUD 2965
+▁LOUIS 2966
+▁LOVE 2967
+▁LOVED 2968
+▁LOVING 2969
+▁LOW 2970
+▁LOWER 2971
+▁LU 2972
+▁LUCK 2973
+▁LUCKILY 2974
+▁LUCKY 2975
+▁LUNCH 2976
+▁LUNG 2977
+▁LYING 2978
+▁M 2979
+▁MA 2980
+▁MACHINE 2981
+▁MACHINES 2982
+▁MAD 2983
+▁MADE 2984
+▁MAGAZINE 2985
+▁MAGIC 2986
+▁MAGICAL 2987
+▁MAGNET 2988
+▁MAGNITUDE 2989
+▁MAIL 2990
+▁MAIN 2991
+▁MAINSTREAM 2992
+▁MAINTAIN 2993
+▁MAJOR 2994
+▁MAJORITY 2995
+▁MAKE 2996
+▁MAKES 2997
+▁MAKING 2998
+▁MALARIA 2999
+▁MALE 3000
+▁MAMMAL 3001
+▁MAMMOTH 3002
+▁MAN 3003
+▁MANAGE 3004
+▁MANAGED 3005
+▁MANAGEMENT 3006
+▁MANAGER 3007
+▁MANAGING 3008
+▁MANHATTAN 3009
+▁MANIFEST 3010
+▁MANIPULAT 3011
+▁MANKIND 3012
+▁MANNER 3013
+▁MANUFACTURE 3014
+▁MANUFACTURING 3015
+▁MANY 3016
+▁MAP 3017
+▁MAR 3018
+▁MARATHON 3019
+▁MARCH 3020
+▁MARIJUANA 3021
+▁MARINE 3022
+▁MARK 3023
+▁MARKET 3024
+▁MARKETING 3025
+▁MARRIAGE 3026
+▁MARRIED 3027
+▁MARRY 3028
+▁MARS 3029
+▁MARTIN 3030
+▁MARY 3031
+▁MASCULINE 3032
+▁MASCULINITY 3033
+▁MASK 3034
+▁MASS 3035
+▁MASSIVE 3036
+▁MASTER 3037
+▁MATCH 3038
+▁MATERIAL 3039
+▁MATERIALS 3040
+▁MATH 3041
+▁MATHEMATICAL 3042
+▁MATHEMATICIAN 3043
+▁MATHEMATICS 3044
+▁MATTER 3045
+▁MATTERS 3046
+▁MAXIMIZ 3047
+▁MAXIMUM 3048
+▁MAY 3049
+▁MAYBE 3050
+▁MC 3051
+▁ME 3052
+▁MEAN 3053
+▁MEANING 3054
+▁MEANINGFUL 3055
+▁MEANS 3056
+▁MEANT 3057
+▁MEANWHILE 3058
+▁MEASURE 3059
+▁MEASURING 3060
+▁MEAT 3061
+▁MECHANICAL 3062
+▁MECHANISM 3063
+▁MEDIA 3064
+▁MEDICAL 3065
+▁MEDICATION 3066
+▁MEDICINE 3067
+▁MEDITATION 3068
+▁MEDIUM 3069
+▁MEET 3070
+▁MEETING 3071
+▁MELT 3072
+▁MEMBER 3073
+▁MEMBERS 3074
+▁MEMORIES 3075
+▁MEMORIZE 3076
+▁MEMORY 3077
+▁MEN 3078
+▁MENTAL 3079
+▁MENTION 3080
+▁MENTIONED 3081
+▁MENTOR 3082
+▁MERELY 3083
+▁MESS 3084
+▁MESSAGE 3085
+▁MESSAGES 3086
+▁MET 3087
+▁METABOLI 3088
+▁METAL 3089
+▁METAPHOR 3090
+▁METERS 3091
+▁METHOD 3092
+▁METROPOLI 3093
+▁MEXICO 3094
+▁MICE 3095
+▁MICHAEL 3096
+▁MICHEL 3097
+▁MICHIGAN 3098
+▁MICRO 3099
+▁MICROBES 3100
+▁MICROSCOPE 3101
+▁MICROSOFT 3102
+▁MID 3103
+▁MIDDLE 3104
+▁MIGHT 3105
+▁MIGRANT 3106
+▁MIGRATION 3107
+▁MIKE 3108
+▁MIL 3109
+▁MILE 3110
+▁MILES 3111
+▁MILITARY 3112
+▁MILK 3113
+▁MILL 3114
+▁MILLENNIA 3115
+▁MILLION 3116
+▁MILLIONS 3117
+▁MIN 3118
+▁MIND 3119
+▁MINDFULNESS 3120
+▁MINDS 3121
+▁MINDSET 3122
+▁MINE 3123
+▁MINI 3124
+▁MINIMUM 3125
+▁MINISTER 3126
+▁MINORITY 3127
+▁MINUTE 3128
+▁MINUTES 3129
+▁MIRACLE 3130
+▁MIRROR 3131
+▁MIS 3132
+▁MISERABLE 3133
+▁MISS 3134
+▁MISSING 3135
+▁MISSION 3136
+▁MISTAKE 3137
+▁MISTAKES 3138
+▁MIT 3139
+▁MIX 3140
+▁MO 3141
+▁MOBILE 3142
+▁MOBILITY 3143
+▁MODE 3144
+▁MODEL 3145
+▁MODELS 3146
+▁MODERN 3147
+▁MOLECULAR 3148
+▁MOLECULE 3149
+▁MOM 3150
+▁MOMENT 3151
+▁MOMENTS 3152
+▁MONDAY 3153
+▁MONEY 3154
+▁MONITOR 3155
+▁MONKEY 3156
+▁MONSTER 3157
+▁MONTH 3158
+▁MONTHS 3159
+▁MOOD 3160
+▁MOON 3161
+▁MOR 3162
+▁MORAL 3163
+▁MORE 3164
+▁MORNING 3165
+▁MORTALITY 3166
+▁MOSQUITO 3167
+▁MOST 3168
+▁MOSTLY 3169
+▁MOTHER 3170
+▁MOTION 3171
+▁MOTIVATE 3172
+▁MOTIVATED 3173
+▁MOTIVATION 3174
+▁MOTOR 3175
+▁MOUNT 3176
+▁MOUNTAIN 3177
+▁MOUTH 3178
+▁MOVE 3179
+▁MOVED 3180
+▁MOVEMENT 3181
+▁MOVIE 3182
+▁MOVING 3183
+▁MOZART 3184
+▁MR 3185
+▁MU 3186
+▁MUCH 3187
+▁MULTI 3188
+▁MULTIPLE 3189
+▁MULTIPLY 3190
+▁MUM 3191
+▁MURDER 3192
+▁MUSCLE 3193
+▁MUSEUM 3194
+▁MUSIC 3195
+▁MUSICAL 3196
+▁MUSLIM 3197
+▁MUST 3198
+▁MUTATION 3199
+▁MUTUAL 3200
+▁MY 3201
+▁MYSELF 3202
+▁MYSTERIOUS 3203
+▁MYSTERY 3204
+▁MYTH 3205
+▁N 3206
+▁NA 3207
+▁NAKED 3208
+▁NAME 3209
+▁NAMED 3210
+▁NANO 3211
+▁NARRATIVE 3212
+▁NARROW 3213
+▁NASA 3214
+▁NATION 3215
+▁NATIONAL 3216
+▁NATIONS 3217
+▁NATIVE 3218
+▁NATURAL 3219
+▁NATURALLY 3220
+▁NATURE 3221
+▁NAVIGAT 3222
+▁NAVIGATE 3223
+▁NBSP 3224
+▁NE 3225
+▁NEAR 3226
+▁NEARLY 3227
+▁NECESSARILY 3228
+▁NECESSARY 3229
+▁NECK 3230
+▁NEED 3231
+▁NEEDED 3232
+▁NEEDS 3233
+▁NEGATIVE 3234
+▁NEGLECT 3235
+▁NEGOTIATE 3236
+▁NEGOTIATION 3237
+▁NEIGHBOR 3238
+▁NEIGHBORHOOD 3239
+▁NEIGHBORS 3240
+▁NEIGHBOUR 3241
+▁NEITHER 3242
+▁NERVE 3243
+▁NERVOUS 3244
+▁NETHERLANDS 3245
+▁NETWORK 3246
+▁NEURAL 3247
+▁NEURO 3248
+▁NEURONS 3249
+▁NEUROSCIENCE 3250
+▁NEUROSCIENTIST 3251
+▁NEUTRAL 3252
+▁NEUTRON 3253
+▁NEVER 3254
+▁NEW 3255
+▁NEWS 3256
+▁NEWSPAPER 3257
+▁NEXT 3258
+▁NGO 3259
+▁NI 3260
+▁NICE 3261
+▁NIGERIA 3262
+▁NIGHT 3263
+▁NIGHTMARE 3264
+▁NINE 3265
+▁NO 3266
+▁NOBEL 3267
+▁NOBODY 3268
+▁NOISE 3269
+▁NON 3270
+▁NONE 3271
+▁NONPROFIT 3272
+▁NONVIOLENT 3273
+▁NOR 3274
+▁NORM 3275
+▁NORMAL 3276
+▁NORMALLY 3277
+▁NORTH 3278
+▁NORTHERN 3279
+▁NORWAY 3280
+▁NORWEGIAN 3281
+▁NOT 3282
+▁NOTE 3283
+▁NOTHING 3284
+▁NOTICE 3285
+▁NOTICED 3286
+▁NOTION 3287
+▁NOURISH 3288
+▁NOVEL 3289
+▁NOVEMBER 3290
+▁NOW 3291
+▁NOWADAYS 3292
+▁NOWHERE 3293
+▁NS 3294
+▁NU 3295
+▁NUCLEAR 3296
+▁NUMB 3297
+▁NUMBER 3298
+▁NUMBERS 3299
+▁NUMEROUS 3300
+▁NURSE 3301
+▁NURSING 3302
+▁NURTURE 3303
+▁NUTRITION 3304
+▁O 3305
+▁OB 3306
+▁OBAMA 3307
+▁OBESITY 3308
+▁OBJECT 3309
+▁OBJECTIVE 3310
+▁OBJECTS 3311
+▁OBLIGATION 3312
+▁OBSERVATION 3313
+▁OBSERVE 3314
+▁OBSESSED 3315
+▁OBSTACLE 3316
+▁OBTAIN 3317
+▁OBVIOUS 3318
+▁OBVIOUSLY 3319
+▁OCCASION 3320
+▁OCCUPY 3321
+▁OCCUR 3322
+▁OCCURRED 3323
+▁OCEAN 3324
+▁OCTOBER 3325
+▁ODD 3326
+▁OF 3327
+▁OFF 3328
+▁OFFENDERS 3329
+▁OFFER 3330
+▁OFFERED 3331
+▁OFFICE 3332
+▁OFFICER 3333
+▁OFFICIAL 3334
+▁OFFSPRING 3335
+▁OFTEN 3336
+▁OFTENTIMES 3337
+▁OH 3338
+▁OIL 3339
+▁OK 3340
+▁OKAY 3341
+▁OLD 3342
+▁OLDER 3343
+▁OLIVE 3344
+▁OLYMPIC 3345
+▁ON 3346
+▁ONCE 3347
+▁ONE 3348
+▁ONGOING 3349
+▁ONLINE 3350
+▁ONLY 3351
+▁ONTO 3352
+▁OP 3353
+▁OPEN 3354
+▁OPENED 3355
+▁OPENING 3356
+▁OPERA 3357
+▁OPERATE 3358
+▁OPERATING 3359
+▁OPERATION 3360
+▁OPINION 3361
+▁OPPONENT 3362
+▁OPPORTUNITIES 3363
+▁OPPORTUNITY 3364
+▁OPPOSED 3365
+▁OPPOSITE 3366
+▁OPPOSITION 3367
+▁OPPRESS 3368
+▁OPTIMAL 3369
+▁OPTIMISM 3370
+▁OPTIMIZ 3371
+▁OPTION 3372
+▁OPTIONS 3373
+▁OR 3374
+▁ORANGE 3375
+▁ORBIT 3376
+▁ORCHESTRA 3377
+▁ORDER 3378
+▁ORDINARY 3379
+▁ORGAN 3380
+▁ORGANIC 3381
+▁ORGANISM 3382
+▁ORGANIZATION 3383
+▁ORGANIZATIONS 3384
+▁ORGANIZE 3385
+▁ORGANIZED 3386
+▁ORGANIZING 3387
+▁ORGASM 3388
+▁ORIENTATION 3389
+▁ORIENTED 3390
+▁ORIGIN 3391
+▁ORIGINAL 3392
+▁ORPHAN 3393
+▁OTHER 3394
+▁OTHERS 3395
+▁OTHERWISE 3396
+▁OUR 3397
+▁OURSELVES 3398
+▁OUT 3399
+▁OUTBREAK 3400
+▁OUTCOME 3401
+▁OUTRAGE 3402
+▁OUTSIDE 3403
+▁OVER 3404
+▁OVERALL 3405
+▁OVERCOME 3406
+▁OVERLOOK 3407
+▁OVERNIGHT 3408
+▁OVERWHELMED 3409
+▁OVERWHELMING 3410
+▁OWE 3411
+▁OWN 3412
+▁OWNERSHIP 3413
+▁OXFORD 3414
+▁OXYGEN 3415
+▁OXYTOCIN 3416
+▁P 3417
+▁PA 3418
+▁PACE 3419
+▁PACIFIC 3420
+▁PACK 3421
+▁PACKAGE 3422
+▁PAGE 3423
+▁PAID 3424
+▁PAIN 3425
+▁PAINFUL 3426
+▁PAINT 3427
+▁PAINTING 3428
+▁PAIR 3429
+▁PAKISTAN 3430
+▁PALESTINIAN 3431
+▁PAN 3432
+▁PANEL 3433
+▁PANIC 3434
+▁PANTS 3435
+▁PAPER 3436
+▁PARA 3437
+▁PARADIGM 3438
+▁PARADOX 3439
+▁PARALLEL 3440
+▁PARALYZE 3441
+▁PARENT 3442
+▁PARENTS 3443
+▁PARIS 3444
+▁PARK 3445
+▁PARKINSON 3446
+▁PARLIAMENT 3447
+▁PART 3448
+▁PARTICIPAT 3449
+▁PARTICIPATE 3450
+▁PARTICIPATION 3451
+▁PARTICLE 3452
+▁PARTICLES 3453
+▁PARTICULAR 3454
+▁PARTICULARLY 3455
+▁PARTIES 3456
+▁PARTNER 3457
+▁PARTNERSHIP 3458
+▁PARTS 3459
+▁PARTY 3460
+▁PASS 3461
+▁PASSED 3462
+▁PASSENGER 3463
+▁PASSION 3464
+▁PASSIONATE 3465
+▁PAST 3466
+▁PATENT 3467
+▁PATH 3468
+▁PATHOGEN 3469
+▁PATHWAY 3470
+▁PATIENCE 3471
+▁PATIENT 3472
+▁PATIENTS 3473
+▁PATTERN 3474
+▁PATTERNS 3475
+▁PAUL 3476
+▁PAUSE 3477
+▁PAY 3478
+▁PAYING 3479
+▁PE 3480
+▁PEACE 3481
+▁PEACEFUL 3482
+▁PEAK 3483
+▁PEER 3484
+▁PEN 3485
+▁PENALTY 3486
+▁PENGUIN 3487
+▁PENNSYLVANIA 3488
+▁PEOPLE 3489
+▁PER 3490
+▁PERCEIVE 3491
+▁PERCENT 3492
+▁PERCENTAGE 3493
+▁PERCEPTION 3494
+▁PERFECT 3495
+▁PERFECTION 3496
+▁PERFECTLY 3497
+▁PERFORM 3498
+▁PERFORMANCE 3499
+▁PERFORMING 3500
+▁PERHAPS 3501
+▁PERIOD 3502
+▁PERMANENT 3503
+▁PERMISSION 3504
+▁PERPETRATOR 3505
+▁PERPETUAT 3506
+▁PERSIST 3507
+▁PERSON 3508
+▁PERSONAL 3509
+▁PERSONALITY 3510
+▁PERSONALLY 3511
+▁PERSPECTIVE 3512
+▁PERSUADE 3513
+▁PESTICIDE 3514
+▁PET 3515
+▁PH 3516
+▁PHARMA 3517
+▁PHARMACEUTICAL 3518
+▁PHASE 3519
+▁PHENOMENA 3520
+▁PHENOMENON 3521
+▁PHILADELPHIA 3522
+▁PHILANTHROP 3523
+▁PHILOSOPHER 3524
+▁PHILOSOPHICAL 3525
+▁PHILOSOPHY 3526
+▁PHONE 3527
+▁PHOTO 3528
+▁PHOTOGRAPH 3529
+▁PHRASE 3530
+▁PHYSICAL 3531
+▁PHYSICALLY 3532
+▁PHYSICIAN 3533
+▁PHYSICIST 3534
+▁PHYSICS 3535
+▁PHYSIOLOGICAL 3536
+▁PI 3537
+▁PIANO 3538
+▁PICK 3539
+▁PICKED 3540
+▁PICTURE 3541
+▁PICTURES 3542
+▁PIECE 3543
+▁PIECES 3544
+▁PIG 3545
+▁PILL 3546
+▁PILOT 3547
+▁PIN 3548
+▁PINK 3549
+▁PIONEER 3550
+▁PITCH 3551
+▁PL 3552
+▁PLACE 3553
+▁PLACES 3554
+▁PLAIN 3555
+▁PLAN 3556
+▁PLANE 3557
+▁PLANET 3558
+▁PLANNING 3559
+▁PLANT 3560
+▁PLANTS 3561
+▁PLASTIC 3562
+▁PLATE 3563
+▁PLATFORM 3564
+▁PLAY 3565
+▁PLAYED 3566
+▁PLAYER 3567
+▁PLAYGROUND 3568
+▁PLAYING 3569
+▁PLEA 3570
+▁PLEASE 3571
+▁PLEASURE 3572
+▁PLENTY 3573
+▁PLOT 3574
+▁PLUS 3575
+▁PO 3576
+▁POCKET 3577
+▁POEM 3578
+▁POET 3579
+▁POETRY 3580
+▁POINT 3581
+▁POINTS 3582
+▁POISON 3583
+▁POLAR 3584
+▁POLE 3585
+▁POLICE 3586
+▁POLICIES 3587
+▁POLICY 3588
+▁POLIO 3589
+▁POLISH 3590
+▁POLITE 3591
+▁POLITICAL 3592
+▁POLITICIAN 3593
+▁POLITICIANS 3594
+▁POLITICS 3595
+▁POLL 3596
+▁POLLUTION 3597
+▁POOL 3598
+▁POOR 3599
+▁POP 3600
+▁POPULAR 3601
+▁POPULATION 3602
+▁PORN 3603
+▁PORT 3604
+▁PORTRAIT 3605
+▁PORTRAY 3606
+▁POSE 3607
+▁POSITION 3608
+▁POSITIVE 3609
+▁POSSESS 3610
+▁POSSIBILITIES 3611
+▁POSSIBILITY 3612
+▁POSSIBLE 3613
+▁POSSIBLY 3614
+▁POST 3615
+▁POTATO 3616
+▁POTENTIAL 3617
+▁POTENTIALLY 3618
+▁POUND 3619
+▁POUNDS 3620
+▁POUR 3621
+▁POVERTY 3622
+▁POWER 3623
+▁POWERFUL 3624
+▁PR 3625
+▁PRACTICAL 3626
+▁PRACTICE 3627
+▁PRACTICING 3628
+▁PRAISE 3629
+▁PRAY 3630
+▁PRE 3631
+▁PRECIOUS 3632
+▁PRECISE 3633
+▁PREDATOR 3634
+▁PREDICT 3635
+▁PREFER 3636
+▁PREGNANCY 3637
+▁PREGNANT 3638
+▁PREJUDICE 3639
+▁PREPARATION 3640
+▁PREPARE 3641
+▁PREPARED 3642
+▁PREPARING 3643
+▁PRESCRIBE 3644
+▁PRESCRIPTION 3645
+▁PRESENCE 3646
+▁PRESENT 3647
+▁PRESENTATION 3648
+▁PRESERVE 3649
+▁PRESIDENT 3650
+▁PRESS 3651
+▁PRESSURE 3652
+▁PRETEND 3653
+▁PRETTY 3654
+▁PREVENT 3655
+▁PREVIOUS 3656
+▁PRICE 3657
+▁PRIDE 3658
+▁PRIM 3659
+▁PRIMARILY 3660
+▁PRIMARY 3661
+▁PRIME 3662
+▁PRIMITIVE 3663
+▁PRINCE 3664
+▁PRINCIPAL 3665
+▁PRINCIPLE 3666
+▁PRINCIPLES 3667
+▁PRINT 3668
+▁PRIOR 3669
+▁PRIORITIZE 3670
+▁PRIORITY 3671
+▁PRISON 3672
+▁PRIVACY 3673
+▁PRIVATE 3674
+▁PRIVILEGE 3675
+▁PRIZE 3676
+▁PRO 3677
+▁PROBABILITY 3678
+▁PROBABLY 3679
+▁PROBLEM 3680
+▁PROBLEMATIC 3681
+▁PROBLEMS 3682
+▁PROCEDURE 3683
+▁PROCEED 3684
+▁PROCESS 3685
+▁PROCESSES 3686
+▁PROCRASTINAT 3687
+▁PRODUCE 3688
+▁PRODUCED 3689
+▁PRODUCING 3690
+▁PRODUCT 3691
+▁PRODUCTION 3692
+▁PRODUCTIVE 3693
+▁PRODUCTIVITY 3694
+▁PRODUCTS 3695
+▁PROFESSION 3696
+▁PROFESSIONAL 3697
+▁PROFESSOR 3698
+▁PROFILE 3699
+▁PROFIT 3700
+▁PROFOUND 3701
+▁PROGRAM 3702
+▁PROGRAMME 3703
+▁PROGRAMMING 3704
+▁PROGRAMS 3705
+▁PROGRESS 3706
+▁PROJECT 3707
+▁PROJECTS 3708
+▁PROMISE 3709
+▁PROMISING 3710
+▁PROMOT 3711
+▁PROMOTE 3712
+▁PROOF 3713
+▁PROP 3714
+▁PROPER 3715
+▁PROPERLY 3716
+▁PROPERTIES 3717
+▁PROPERTY 3718
+▁PROPORTION 3719
+▁PROPOSAL 3720
+▁PROPOSE 3721
+▁PROPOSITION 3722
+▁PROSECUT 3723
+▁PROSPECT 3724
+▁PROSPERITY 3725
+▁PROSTHETIC 3726
+▁PROTECT 3727
+▁PROTECTED 3728
+▁PROTECTION 3729
+▁PROTEIN 3730
+▁PROTEST 3731
+▁PROTOCOL 3732
+▁PROTOTYPE 3733
+▁PROUD 3734
+▁PROVE 3735
+▁PROVIDE 3736
+▁PROVIDING 3737
+▁PSYCH 3738
+▁PSYCHIATRIST 3739
+▁PSYCHOLOGICAL 3740
+▁PSYCHOLOGIST 3741
+▁PSYCHOLOGY 3742
+▁PSYCHOPATH 3743
+▁PTSD 3744
+▁PUBLIC 3745
+▁PUBLISH 3746
+▁PUBLISHED 3747
+▁PULL 3748
+▁PULLED 3749
+▁PULSE 3750
+▁PUMP 3751
+▁PUNCH 3752
+▁PUNISH 3753
+▁PUNISHMENT 3754
+▁PUR 3755
+▁PURCHASE 3756
+▁PURE 3757
+▁PURPLE 3758
+▁PURPOSE 3759
+▁PURSUE 3760
+▁PURSUING 3761
+▁PURSUIT 3762
+▁PUSH 3763
+▁PUSHED 3764
+▁PUSHING 3765
+▁PUT 3766
+▁PUTTING 3767
+▁PUZZLE 3768
+▁PYRAMID 3769
+▁QU 3770
+▁QUALIFIED 3771
+▁QUALITIES 3772
+▁QUALITY 3773
+▁QUANTUM 3774
+▁QUARTER 3775
+▁QUEEN 3776
+▁QUEER 3777
+▁QUESTION 3778
+▁QUESTIONS 3779
+▁QUICK 3780
+▁QUICKLY 3781
+▁QUIET 3782
+▁QUIT 3783
+▁QUITE 3784
+▁QUO 3785
+▁QUOTE 3786
+▁R 3787
+▁RA 3788
+▁RABBI 3789
+▁RACE 3790
+▁RACIAL 3791
+▁RACISM 3792
+▁RACIST 3793
+▁RADIATION 3794
+▁RADICAL 3795
+▁RADIO 3796
+▁RAIN 3797
+▁RAINFOREST 3798
+▁RAISE 3799
+▁RAISED 3800
+▁RAISING 3801
+▁RAN 3802
+▁RANDOM 3803
+▁RANGE 3804
+▁RANK 3805
+▁RAP 3806
+▁RAPE 3807
+▁RAPID 3808
+▁RAPIDLY 3809
+▁RARE 3810
+▁RARELY 3811
+▁RAT 3812
+▁RATE 3813
+▁RATES 3814
+▁RATHER 3815
+▁RATIONAL 3816
+▁RAW 3817
+▁RAY 3818
+▁RE 3819
+▁REACH 3820
+▁REACHED 3821
+▁REACT 3822
+▁REACTION 3823
+▁READ 3824
+▁READING 3825
+▁READY 3826
+▁REAL 3827
+▁REALISE 3828
+▁REALITY 3829
+▁REALIZE 3830
+▁REALIZED 3831
+▁REALLY 3832
+▁REASON 3833
+▁REASONS 3834
+▁REBEL 3835
+▁REBUILD 3836
+▁REC 3837
+▁RECALL 3838
+▁RECEIVE 3839
+▁RECEIVED 3840
+▁RECEIVING 3841
+▁RECENT 3842
+▁RECENTLY 3843
+▁RECEPTOR 3844
+▁RECIPE 3845
+▁RECOGNITION 3846
+▁RECOGNIZE 3847
+▁RECOGNIZED 3848
+▁RECOGNIZING 3849
+▁RECOMMEND 3850
+▁RECONNECT 3851
+▁RECONSTRUCT 3852
+▁RECORD 3853
+▁RECOVER 3854
+▁RECOVERY 3855
+▁RECRUIT 3856
+▁RECYCLE 3857
+▁RECYCLING 3858
+▁RED 3859
+▁REDESIGN 3860
+▁REDUC 3861
+▁REDUCE 3862
+▁REDUCTION 3863
+▁REEF 3864
+▁REFER 3865
+▁REFERENCE 3866
+▁REFERR 3867
+▁REFLECT 3868
+▁REFORM 3869
+▁REFRA 3870
+▁REFUGEE 3871
+▁REFUGEES 3872
+▁REFUSE 3873
+▁REGARD 3874
+▁REGARDLESS 3875
+▁REGIME 3876
+▁REGION 3877
+▁REGISTER 3878
+▁REGRET 3879
+▁REGULAR 3880
+▁REGULATE 3881
+▁REGULATION 3882
+▁REHABILITATION 3883
+▁REINFORCE 3884
+▁REINVENT 3885
+▁REJECT 3886
+▁REJECTION 3887
+▁RELATE 3888
+▁RELATED 3889
+▁RELATION 3890
+▁RELATIONSHIP 3891
+▁RELATIONSHIPS 3892
+▁RELATIVE 3893
+▁RELATIVELY 3894
+▁RELATIVITY 3895
+▁RELAX 3896
+▁RELEASE 3897
+▁RELEASED 3898
+▁RELEVANT 3899
+▁RELI 3900
+▁RELIABLE 3901
+▁RELIEF 3902
+▁RELIGION 3903
+▁RELIGIOUS 3904
+▁RELY 3905
+▁REMAIN 3906
+▁REMARKABLE 3907
+▁REMARKABLY 3908
+▁REMEMBER 3909
+▁REMIND 3910
+▁REMOTE 3911
+▁REMOVE 3912
+▁REMOVING 3913
+▁RENAISSANCE 3914
+▁RENEWABLE 3915
+▁RENT 3916
+▁REPAIR 3917
+▁REPEAT 3918
+▁REPLACE 3919
+▁REPLICA 3920
+▁REPORT 3921
+▁REPRESENT 3922
+▁REPRESENTATION 3923
+▁REPRESENTATIVE 3924
+▁REPRODUCE 3925
+▁REPRODUCTION 3926
+▁REPRODUCTIVE 3927
+▁REPUBLIC 3928
+▁REPUTATION 3929
+▁REQUEST 3930
+▁REQUIRE 3931
+▁REQUIRED 3932
+▁REQUIRES 3933
+▁RESCUE 3934
+▁RESEARCH 3935
+▁RESEARCHERS 3936
+▁RESERVE 3937
+▁RESIDENT 3938
+▁RESILIENCE 3939
+▁RESILIENT 3940
+▁RESIST 3941
+▁RESISTANCE 3942
+▁RESOLUTION 3943
+▁RESOLVE 3944
+▁RESONATE 3945
+▁RESOURCE 3946
+▁RESOURCES 3947
+▁RESPECT 3948
+▁RESPOND 3949
+▁RESPONSE 3950
+▁RESPONSIBILITY 3951
+▁RESPONSIBLE 3952
+▁REST 3953
+▁RESTAURANT 3954
+▁RESTORE 3955
+▁RESTRICT 3956
+▁RESULT 3957
+▁RESULTS 3958
+▁RESUME 3959
+▁RETAIL 3960
+▁RETHINK 3961
+▁RETIRE 3962
+▁RETREAT 3963
+▁RETURN 3964
+▁REV 3965
+▁REVEAL 3966
+▁REVENGE 3967
+▁REVENUE 3968
+▁REVERSE 3969
+▁REVIEW 3970
+▁REVOLUTION 3971
+▁REWARD 3972
+▁RHYTHM 3973
+▁RI 3974
+▁RICE 3975
+▁RICH 3976
+▁RICHARD 3977
+▁RID 3978
+▁RIDE 3979
+▁RIDICULOUS 3980
+▁RIGHT 3981
+▁RIGHTS 3982
+▁RIGID 3983
+▁RING 3984
+▁RIPPLE 3985
+▁RISE 3986
+▁RISING 3987
+▁RISK 3988
+▁RISKS 3989
+▁RITUAL 3990
+▁RIVER 3991
+▁RO 3992
+▁ROAD 3993
+▁ROB 3994
+▁ROBERT 3995
+▁ROBOT 3996
+▁ROBOTIC 3997
+▁ROBOTS 3998
+▁ROBUST 3999
+▁ROCK 4000
+▁ROCKET 4001
+▁ROLE 4002
+▁ROLL 4003
+▁ROMAN 4004
+▁ROMANTIC 4005
+▁ROOF 4006
+▁ROOM 4007
+▁ROOT 4008
+▁ROSE 4009
+▁ROUGH 4010
+▁ROUGHLY 4011
+▁ROUND 4012
+▁ROUTE 4013
+▁ROUTINE 4014
+▁ROW 4015
+▁ROYAL 4016
+▁RU 4017
+▁RUBB 4018
+▁RUIN 4019
+▁RULE 4020
+▁RULES 4021
+▁RUN 4022
+▁RUNNING 4023
+▁RURAL 4024
+▁RUSH 4025
+▁RUSSIA 4026
+▁RUSSIAN 4027
+▁RWANDA 4028
+▁S 4029
+▁SA 4030
+▁SACRED 4031
+▁SACRIFICE 4032
+▁SAD 4033
+▁SAFE 4034
+▁SAFETY 4035
+▁SAID 4036
+▁SAIL 4037
+▁SAL 4038
+▁SAME 4039
+▁SAMPLE 4040
+▁SAN 4041
+▁SAND 4042
+▁SANDWICH 4043
+▁SARAH 4044
+▁SAT 4045
+▁SATELLITE 4046
+▁SATISFACTION 4047
+▁SATISFIED 4048
+▁SATISFY 4049
+▁SATURDAY 4050
+▁SAUDI 4051
+▁SAVE 4052
+▁SAVED 4053
+▁SAVING 4054
+▁SAW 4055
+▁SAY 4056
+▁SAYING 4057
+▁SAYS 4058
+▁SC 4059
+▁SCALE 4060
+▁SCAN 4061
+▁SCANN 4062
+▁SCAR 4063
+▁SCARED 4064
+▁SCARY 4065
+▁SCENARIO 4066
+▁SCENE 4067
+▁SCHEDULE 4068
+▁SCHIZOPHRENIA 4069
+▁SCHOLAR 4070
+▁SCHOLARSHIP 4071
+▁SCHOOL 4072
+▁SCHOOLS 4073
+▁SCIENCE 4074
+▁SCIENTIFIC 4075
+▁SCIENTIST 4076
+▁SCIENTISTS 4077
+▁SCORE 4078
+▁SCOTT 4079
+▁SCRAP 4080
+▁SCRATCH 4081
+▁SCREAM 4082
+▁SCREEN 4083
+▁SCREW 4084
+▁SCRIPT 4085
+▁SE 4086
+▁SEA 4087
+▁SEARCH 4088
+▁SEARCHING 4089
+▁SEASON 4090
+▁SEAT 4091
+▁SECOND 4092
+▁SECRET 4093
+▁SECTION 4094
+▁SECTOR 4095
+▁SECULAR 4096
+▁SECURE 4097
+▁SECURITY 4098
+▁SEE 4099
+▁SEEDS 4100
+▁SEEING 4101
+▁SEEK 4102
+▁SEEM 4103
+▁SEEMED 4104
+▁SEEMINGLY 4105
+▁SEEMS 4106
+▁SEEN 4107
+▁SELECT 4108
+▁SELF 4109
+▁SELL 4110
+▁SELLING 4111
+▁SELVES 4112
+▁SEMESTER 4113
+▁SEMI 4114
+▁SEND 4115
+▁SENIOR 4116
+▁SENSATION 4117
+▁SENSE 4118
+▁SENSITIVE 4119
+▁SENSOR 4120
+▁SENT 4121
+▁SENTENCE 4122
+▁SEPARATE 4123
+▁SEPARATION 4124
+▁SEPTEMBER 4125
+▁SEQUENCE 4126
+▁SERIES 4127
+▁SERIOUS 4128
+▁SERIOUSLY 4129
+▁SERVE 4130
+▁SERVICE 4131
+▁SERVICES 4132
+▁SERVING 4133
+▁SESSION 4134
+▁SET 4135
+▁SETTING 4136
+▁SETTLE 4137
+▁SEVEN 4138
+▁SEVERAL 4139
+▁SEVERE 4140
+▁SEX 4141
+▁SEXUAL 4142
+▁SEXUALITY 4143
+▁SH 4144
+▁SHA 4145
+▁SHADOW 4146
+▁SHAKE 4147
+▁SHAKESPEARE 4148
+▁SHAME 4149
+▁SHAPE 4150
+▁SHARE 4151
+▁SHARED 4152
+▁SHARING 4153
+▁SHARK 4154
+▁SHARP 4155
+▁SHE 4156
+▁SHELTER 4157
+▁SHI 4158
+▁SHIFT 4159
+▁SHIP 4160
+▁SHIRT 4161
+▁SHOCK 4162
+▁SHOES 4163
+▁SHOOT 4164
+▁SHOOTING 4165
+▁SHOP 4166
+▁SHOPPING 4167
+▁SHORE 4168
+▁SHORT 4169
+▁SHOT 4170
+▁SHOULD 4171
+▁SHOULDER 4172
+▁SHOULDN 4173
+▁SHOUT 4174
+▁SHOW 4175
+▁SHOWED 4176
+▁SHOWING 4177
+▁SHOWN 4178
+▁SHOWS 4179
+▁SHRINK 4180
+▁SHUT 4181
+▁SHY 4182
+▁SI 4183
+▁SICK 4184
+▁SIDE 4185
+▁SIGHT 4186
+▁SIGN 4187
+▁SIGNAL 4188
+▁SIGNATURE 4189
+▁SIGNIFICANCE 4190
+▁SIGNIFICANT 4191
+▁SILENCE 4192
+▁SILENT 4193
+▁SILICON 4194
+▁SILLY 4195
+▁SILVER 4196
+▁SIMILAR 4197
+▁SIMPLE 4198
+▁SIMPLY 4199
+▁SIMULATION 4200
+▁SIMULTANEOUS 4201
+▁SINCE 4202
+▁SINGING 4203
+▁SINGLE 4204
+▁SINK 4205
+▁SIR 4206
+▁SISTER 4207
+▁SIT 4208
+▁SITE 4209
+▁SITTING 4210
+▁SITUATION 4211
+▁SIX 4212
+▁SIZE 4213
+▁SKEPTIC 4214
+▁SKI 4215
+▁SKILL 4216
+▁SKILLS 4217
+▁SKIN 4218
+▁SKIP 4219
+▁SKULL 4220
+▁SKY 4221
+▁SLAUGHTER 4222
+▁SLAVE 4223
+▁SLEEP 4224
+▁SLEEVE 4225
+▁SLEPT 4226
+▁SLICE 4227
+▁SLIDE 4228
+▁SLIGHTLY 4229
+▁SLIP 4230
+▁SLOW 4231
+▁SLOWLY 4232
+▁SLUM 4233
+▁SMALL 4234
+▁SMALLER 4235
+▁SMART 4236
+▁SMARTPHONE 4237
+▁SMELL 4238
+▁SMILE 4239
+▁SMILING 4240
+▁SMOKE 4241
+▁SMOKING 4242
+▁SMOOTH 4243
+▁SNAKE 4244
+▁SNAP 4245
+▁SNEAK 4246
+▁SNOW 4247
+▁SO 4248
+▁SOCCER 4249
+▁SOCIAL 4250
+▁SOCIETAL 4251
+▁SOCIETIES 4252
+▁SOCIETY 4253
+▁SOFT 4254
+▁SOFTWARE 4255
+▁SOIL 4256
+▁SOLAR 4257
+▁SOLD 4258
+▁SOLDIER 4259
+▁SOLID 4260
+▁SOLUTION 4261
+▁SOLUTIONS 4262
+▁SOLVE 4263
+▁SOLVED 4264
+▁SOLVING 4265
+▁SOME 4266
+▁SOMEBODY 4267
+▁SOMEDAY 4268
+▁SOMEHOW 4269
+▁SOMEONE 4270
+▁SOMETHING 4271
+▁SOMETIMES 4272
+▁SOMEWHAT 4273
+▁SOMEWHERE 4274
+▁SON 4275
+▁SONG 4276
+▁SOON 4277
+▁SOPHISTICATED 4278
+▁SORRY 4279
+▁SORT 4280
+▁SOUL 4281
+▁SOUND 4282
+▁SOUNDS 4283
+▁SOURCE 4284
+▁SOUTH 4285
+▁SOUTHERN 4286
+▁SOVIET 4287
+▁SP 4288
+▁SPACE 4289
+▁SPACECRAFT 4290
+▁SPAN 4291
+▁SPANISH 4292
+▁SPARK 4293
+▁SPATIAL 4294
+▁SPEAK 4295
+▁SPEAKER 4296
+▁SPEAKING 4297
+▁SPECIAL 4298
+▁SPECIES 4299
+▁SPECIFIC 4300
+▁SPECIFICALLY 4301
+▁SPECTACULAR 4302
+▁SPECTRUM 4303
+▁SPEECH 4304
+▁SPEED 4305
+▁SPELL 4306
+▁SPEND 4307
+▁SPENDING 4308
+▁SPENT 4309
+▁SPHERE 4310
+▁SPIN 4311
+▁SPIRAL 4312
+▁SPIRIT 4313
+▁SPIRITUAL 4314
+▁SPLIT 4315
+▁SPOKE 4316
+▁SPOKEN 4317
+▁SPONSOR 4318
+▁SPONTANEOUS 4319
+▁SPORT 4320
+▁SPORTS 4321
+▁SPOT 4322
+▁SPOUSE 4323
+▁SPRAY 4324
+▁SPREAD 4325
+▁SPRING 4326
+▁SQUARE 4327
+▁SQUEEZE 4328
+▁SQUI 4329
+▁ST 4330
+▁STA 4331
+▁STABLE 4332
+▁STAFF 4333
+▁STAGE 4334
+▁STAKE 4335
+▁STAMP 4336
+▁STAND 4337
+▁STANDARD 4338
+▁STANDING 4339
+▁STANFORD 4340
+▁STAR 4341
+▁STARS 4342
+▁START 4343
+▁STARTED 4344
+▁STARTING 4345
+▁STARTS 4346
+▁STATE 4347
+▁STATEMENT 4348
+▁STATES 4349
+▁STATION 4350
+▁STATISTIC 4351
+▁STATISTICS 4352
+▁STATUS 4353
+▁STAY 4354
+▁STAYED 4355
+▁STEAL 4356
+▁STEEL 4357
+▁STEM 4358
+▁STEP 4359
+▁STEPPED 4360
+▁STEPS 4361
+▁STEREOTYPE 4362
+▁STEVE 4363
+▁STICK 4364
+▁STIGMA 4365
+▁STILL 4366
+▁STIMUL 4367
+▁STOCK 4368
+▁STOMACH 4369
+▁STONE 4370
+▁STOOD 4371
+▁STOP 4372
+▁STOPPED 4373
+▁STORAGE 4374
+▁STORE 4375
+▁STORIES 4376
+▁STORM 4377
+▁STORY 4378
+▁STORYTELLER 4379
+▁STORYTELLING 4380
+▁STRAIGHT 4381
+▁STRAIGHTFORWARD 4382
+▁STRANGE 4383
+▁STRANGER 4384
+▁STRATEGIES 4385
+▁STRATEGY 4386
+▁STRAW 4387
+▁STREAM 4388
+▁STREET 4389
+▁STREETS 4390
+▁STRENGTH 4391
+▁STRESS 4392
+▁STRETCH 4393
+▁STRICT 4394
+▁STRIKE 4395
+▁STRIKING 4396
+▁STRING 4397
+▁STRIP 4398
+▁STRIVE 4399
+▁STROKE 4400
+▁STRONG 4401
+▁STRONGER 4402
+▁STRUCTURAL 4403
+▁STRUCTURE 4404
+▁STRUGGLE 4405
+▁STRUGGLING 4406
+▁STUCK 4407
+▁STUDENT 4408
+▁STUDENTS 4409
+▁STUDIED 4410
+▁STUDIES 4411
+▁STUDIO 4412
+▁STUDY 4413
+▁STUDYING 4414
+▁STUFF 4415
+▁STUMBL 4416
+▁STUPID 4417
+▁STYLE 4418
+▁SU 4419
+▁SUB 4420
+▁SUBCONSCIOUS 4421
+▁SUBJECT 4422
+▁SUBMIT 4423
+▁SUBSTANCE 4424
+▁SUBSTANTIAL 4425
+▁SUBTLE 4426
+▁SUBURB 4427
+▁SUCCEED 4428
+▁SUCCESS 4429
+▁SUCCESSFUL 4430
+▁SUCH 4431
+▁SUCK 4432
+▁SUDDEN 4433
+▁SUDDENLY 4434
+▁SUFFER 4435
+▁SUFFERING 4436
+▁SUFFICIENT 4437
+▁SUGAR 4438
+▁SUGGEST 4439
+▁SUGGESTION 4440
+▁SUICIDAL 4441
+▁SUICIDE 4442
+▁SUIT 4443
+▁SUM 4444
+▁SUMMER 4445
+▁SUN 4446
+▁SUNDAY 4447
+▁SUPER 4448
+▁SUPERHERO 4449
+▁SUPERMARKET 4450
+▁SUPPLIES 4451
+▁SUPPLY 4452
+▁SUPPORT 4453
+▁SUPPOSE 4454
+▁SUPPOSED 4455
+▁SUPPRESS 4456
+▁SUPREME 4457
+▁SURE 4458
+▁SURF 4459
+▁SURFACE 4460
+▁SURGEON 4461
+▁SURGERY 4462
+▁SURGICAL 4463
+▁SURPRISE 4464
+▁SURPRISED 4465
+▁SURPRISING 4466
+▁SURRENDER 4467
+▁SURROUND 4468
+▁SURROUNDED 4469
+▁SURVEILLANCE 4470
+▁SURVEY 4471
+▁SURVIVAL 4472
+▁SURVIVE 4473
+▁SURVIVING 4474
+▁SURVIVOR 4475
+▁SUSPECT 4476
+▁SUSTAIN 4477
+▁SUSTAINABILITY 4478
+▁SUSTAINABLE 4479
+▁SW 4480
+▁SWALLOW 4481
+▁SWEAT 4482
+▁SWEDEN 4483
+▁SWEET 4484
+▁SWIM 4485
+▁SWIMMING 4486
+▁SWITCH 4487
+▁SWITZERLAND 4488
+▁SYMBOL 4489
+▁SYMPTOM 4490
+▁SYMPTOMS 4491
+▁SYNDROME 4492
+▁SYNTHETIC 4493
+▁SYRIA 4494
+▁SYSTEM 4495
+▁SYSTEMATIC 4496
+▁SYSTEMS 4497
+▁T 4498
+▁TA 4499
+▁TABLE 4500
+▁TABOO 4501
+▁TACKLE 4502
+▁TAIL 4503
+▁TAKE 4504
+▁TAKEN 4505
+▁TAKES 4506
+▁TAKING 4507
+▁TALENT 4508
+▁TALK 4509
+▁TALKED 4510
+▁TALKING 4511
+▁TALL 4512
+▁TANGIBLE 4513
+▁TANK 4514
+▁TAP 4515
+▁TARGET 4516
+▁TASK 4517
+▁TASTE 4518
+▁TATTOO 4519
+▁TAUGHT 4520
+▁TAX 4521
+▁TE 4522
+▁TEA 4523
+▁TEACH 4524
+▁TEACHER 4525
+▁TEACHERS 4526
+▁TEACHES 4527
+▁TEACHING 4528
+▁TEAM 4529
+▁TEARS 4530
+▁TECH 4531
+▁TECHNICAL 4532
+▁TECHNIQUE 4533
+▁TECHNOLOGICAL 4534
+▁TECHNOLOGIES 4535
+▁TECHNOLOGY 4536
+▁TED 4537
+▁TEDX 4538
+▁TEEN 4539
+▁TEENAGE 4540
+▁TEENAGER 4541
+▁TEETH 4542
+▁TELEPHONE 4543
+▁TELESCOPE 4544
+▁TELEVISION 4545
+▁TELL 4546
+▁TELLING 4547
+▁TELLS 4548
+▁TEMP 4549
+▁TEMPERATURE 4550
+▁TEMPORARY 4551
+▁TEN 4552
+▁TEND 4553
+▁TENSION 4554
+▁TERM 4555
+▁TERMINAL 4556
+▁TERMS 4557
+▁TERRIBLE 4558
+▁TERRIBLY 4559
+▁TERRIFIED 4560
+▁TERRIFYING 4561
+▁TERRITORY 4562
+▁TERROR 4563
+▁TERRORISM 4564
+▁TERRORIST 4565
+▁TEST 4566
+▁TESTING 4567
+▁TEXAS 4568
+▁TEXT 4569
+▁TEXTBOOK 4570
+▁TH 4571
+▁THAN 4572
+▁THANK 4573
+▁THAT 4574
+▁THE 4575
+▁THEATER 4576
+▁THEIR 4577
+▁THEM 4578
+▁THEMSELVES 4579
+▁THEN 4580
+▁THEORETICAL 4581
+▁THEORIES 4582
+▁THEORY 4583
+▁THERAPEUTIC 4584
+▁THERAPIST 4585
+▁THERAPY 4586
+▁THERE 4587
+▁THEREFORE 4588
+▁THESE 4589
+▁THEY 4590
+▁THICK 4591
+▁THIN 4592
+▁THING 4593
+▁THINGS 4594
+▁THINK 4595
+▁THINKING 4596
+▁THIRD 4597
+▁THIRTY 4598
+▁THIS 4599
+▁THOMAS 4600
+▁THOSE 4601
+▁THOUGH 4602
+▁THOUGHT 4603
+▁THOUGHTS 4604
+▁THOUSAND 4605
+▁THOUSANDS 4606
+▁THREAD 4607
+▁THREAT 4608
+▁THREATENED 4609
+▁THREATENING 4610
+▁THREE 4611
+▁THRESHOLD 4612
+▁THRILL 4613
+▁THRIVE 4614
+▁THRIVING 4615
+▁THROAT 4616
+▁THROUGH 4617
+▁THROUGHOUT 4618
+▁THROW 4619
+▁THUMB 4620
+▁THUS 4621
+▁TICKET 4622
+▁TIE 4623
+▁TIGER 4624
+▁TIGHT 4625
+▁TILL 4626
+▁TIM 4627
+▁TIME 4628
+▁TIMES 4629
+▁TINY 4630
+▁TIP 4631
+▁TIRED 4632
+▁TISSUE 4633
+▁TITLE 4634
+▁TO 4635
+▁TODAY 4636
+▁TOGETHER 4637
+▁TOILET 4638
+▁TOLD 4639
+▁TOMATO 4640
+▁TOMORROW 4641
+▁TONGUE 4642
+▁TONIGHT 4643
+▁TOO 4644
+▁TOOK 4645
+▁TOOL 4646
+▁TOOLS 4647
+▁TOP 4648
+▁TOPIC 4649
+▁TORONTO 4650
+▁TORTURE 4651
+▁TOTAL 4652
+▁TOTALLY 4653
+▁TOUCH 4654
+▁TOUGH 4655
+▁TOWARDS 4656
+▁TOWER 4657
+▁TOWN 4658
+▁TOXIC 4659
+▁TR 4660
+▁TRACE 4661
+▁TRACK 4662
+▁TRADE 4663
+▁TRADITION 4664
+▁TRADITIONAL 4665
+▁TRAFFIC 4666
+▁TRAFFICKING 4667
+▁TRAGEDY 4668
+▁TRAGIC 4669
+▁TRAIL 4670
+▁TRAIN 4671
+▁TRAINED 4672
+▁TRAINING 4673
+▁TRANS 4674
+▁TRANSACTION 4675
+▁TRANSCEND 4676
+▁TRANSFER 4677
+▁TRANSFORM 4678
+▁TRANSFORMATION 4679
+▁TRANSGENDER 4680
+▁TRANSITION 4681
+▁TRANSLATE 4682
+▁TRANSLATION 4683
+▁TRANSLATOR 4684
+▁TRANSMIT 4685
+▁TRANSPARENCY 4686
+▁TRANSPARENT 4687
+▁TRANSPLANT 4688
+▁TRANSPORT 4689
+▁TRANSPORTATION 4690
+▁TRAP 4691
+▁TRASH 4692
+▁TRAUMA 4693
+▁TRAUMATIC 4694
+▁TRAVEL 4695
+▁TREASURE 4696
+▁TREAT 4697
+▁TREATED 4698
+▁TREATMENT 4699
+▁TREE 4700
+▁TREES 4701
+▁TREMENDOUS 4702
+▁TREND 4703
+▁TRI 4704
+▁TRIAL 4705
+▁TRIBAL 4706
+▁TRIBE 4707
+▁TRICK 4708
+▁TRIED 4709
+▁TRIGGER 4710
+▁TRILLION 4711
+▁TRIP 4712
+▁TRIVIAL 4713
+▁TRO 4714
+▁TROUBLE 4715
+▁TRUCK 4716
+▁TRUE 4717
+▁TRULY 4718
+▁TRUMP 4719
+▁TRUST 4720
+▁TRUTH 4721
+▁TRY 4722
+▁TRYING 4723
+▁TU 4724
+▁TUBE 4725
+▁TUMOR 4726
+▁TUNE 4727
+▁TUNNEL 4728
+▁TURKEY 4729
+▁TURN 4730
+▁TURNED 4731
+▁TURNING 4732
+▁TURNS 4733
+▁TV 4734
+▁TWEET 4735
+▁TWELVE 4736
+▁TWENTY 4737
+▁TWICE 4738
+▁TWIN 4739
+▁TWIST 4740
+▁TWITTER 4741
+▁TWO 4742
+▁TYPE 4743
+▁TYPES 4744
+▁TYPICAL 4745
+▁TYPICALLY 4746
+▁U 4747
+▁UGLY 4748
+▁UK 4749
+▁ULTIMATE 4750
+▁ULTIMATELY 4751
+▁ULTRA 4752
+▁UN 4753
+▁UNBELIEVABLE 4754
+▁UNCERTAIN 4755
+▁UNCERTAINTY 4756
+▁UNCLE 4757
+▁UNCOMFORTABLE 4758
+▁UNCONDITIONAL 4759
+▁UNCONSCIOUS 4760
+▁UNCOVER 4761
+▁UNDER 4762
+▁UNDERESTIMATE 4763
+▁UNDERGRADUATE 4764
+▁UNDERGROUND 4765
+▁UNDERLYING 4766
+▁UNDERNEATH 4767
+▁UNDERSTAND 4768
+▁UNDERSTANDING 4769
+▁UNDERSTOOD 4770
+▁UNEMPLOYMENT 4771
+▁UNEXPECTED 4772
+▁UNFAIR 4773
+▁UNFORTUNATELY 4774
+▁UNHAPPY 4775
+▁UNHEALTHY 4776
+▁UNIFORM 4777
+▁UNION 4778
+▁UNIQUE 4779
+▁UNIT 4780
+▁UNITED 4781
+▁UNIVERSAL 4782
+▁UNIVERSE 4783
+▁UNIVERSITIES 4784
+▁UNIVERSITY 4785
+▁UNKNOWN 4786
+▁UNLESS 4787
+▁UNLIKE 4788
+▁UNNECESSARY 4789
+▁UNPRECEDENTED 4790
+▁UNTIL 4791
+▁UNUSUAL 4792
+▁UP 4793
+▁UPDATE 4794
+▁UPGRADE 4795
+▁UPON 4796
+▁UPSET 4797
+▁URBAN 4798
+▁URGE 4799
+▁US 4800
+▁USE 4801
+▁USED 4802
+▁USEFUL 4803
+▁USING 4804
+▁USUALLY 4805
+▁UTOPIA 4806
+▁UTTER 4807
+▁V 4808
+▁VA 4809
+▁VACATION 4810
+▁VACCINE 4811
+▁VACUUM 4812
+▁VAGINA 4813
+▁VALID 4814
+▁VALLEY 4815
+▁VALUABLE 4816
+▁VALUE 4817
+▁VALUES 4818
+▁VAN 4819
+▁VARIABLE 4820
+▁VARIATION 4821
+▁VARIET 4822
+▁VARIOUS 4823
+▁VAST 4824
+▁VE 4825
+▁VEGAN 4826
+▁VEGETABLE 4827
+▁VEGETARIAN 4828
+▁VEHICLE 4829
+▁VENTURE 4830
+▁VERSION 4831
+▁VERSUS 4832
+▁VERTICAL 4833
+▁VERY 4834
+▁VESSEL 4835
+▁VETERAN 4836
+▁VI 4837
+▁VIBRANT 4838
+▁VIBRAT 4839
+▁VICIOUS 4840
+▁VICTIM 4841
+▁VICTIMS 4842
+▁VICTOR 4843
+▁VIDEO 4844
+▁VIETNAM 4845
+▁VIEW 4846
+▁VILLAGE 4847
+▁VIOLENCE 4848
+▁VIOLENT 4849
+▁VIRAL 4850
+▁VIRGIN 4851
+▁VIRTUAL 4852
+▁VIRTUE 4853
+▁VIRUS 4854
+▁VISCERAL 4855
+▁VISIBLE 4856
+▁VISION 4857
+▁VISIT 4858
+▁VISUAL 4859
+▁VITAL 4860
+▁VITAMIN 4861
+▁VO 4862
+▁VOCABULARY 4863
+▁VOCAL 4864
+▁VOICE 4865
+▁VOICES 4866
+▁VOLUME 4867
+▁VOLUNTEER 4868
+▁VOTE 4869
+▁VOTING 4870
+▁VULNERABILITY 4871
+▁VULNERABLE 4872
+▁W 4873
+▁WA 4874
+▁WAGE 4875
+▁WAIT 4876
+▁WAITING 4877
+▁WAK 4878
+▁WAKE 4879
+▁WALK 4880
+▁WALKED 4881
+▁WALKING 4882
+▁WALL 4883
+▁WANNA 4884
+▁WANT 4885
+▁WANTED 4886
+▁WAR 4887
+▁WAREHOUSE 4888
+▁WARM 4889
+▁WARRIOR 4890
+▁WAS 4891
+▁WASH 4892
+▁WASHINGTON 4893
+▁WASN 4894
+▁WASTE 4895
+▁WATCH 4896
+▁WATCHED 4897
+▁WATCHING 4898
+▁WATER 4899
+▁WAVE 4900
+▁WAY 4901
+▁WAYS 4902
+▁WE 4903
+▁WEAK 4904
+▁WEAKNESS 4905
+▁WEALTH 4906
+▁WEAPON 4907
+▁WEAR 4908
+▁WEARING 4909
+▁WEATHER 4910
+▁WEB 4911
+▁WEBSITE 4912
+▁WEEK 4913
+▁WEEKEND 4914
+▁WEEKS 4915
+▁WEIGH 4916
+▁WEIGHT 4917
+▁WEIRD 4918
+▁WELCOME 4919
+▁WELFARE 4920
+▁WELL 4921
+▁WENT 4922
+▁WERE 4923
+▁WEREN 4924
+▁WEST 4925
+▁WESTERN 4926
+▁WHALE 4927
+▁WHAT 4928
+▁WHATEVER 4929
+▁WHATSOEVER 4930
+▁WHEAT 4931
+▁WHEEL 4932
+▁WHEELCHAIR 4933
+▁WHEN 4934
+▁WHENEVER 4935
+▁WHERE 4936
+▁WHETHER 4937
+▁WHI 4938
+▁WHICH 4939
+▁WHILE 4940
+▁WHISPER 4941
+▁WHISTLE 4942
+▁WHITE 4943
+▁WHO 4944
+▁WHOEVER 4945
+▁WHOLE 4946
+▁WHOSE 4947
+▁WHY 4948
+▁WI 4949
+▁WIDE 4950
+▁WIDESPREAD 4951
+▁WIFE 4952
+▁WIKIPEDIA 4953
+▁WILD 4954
+▁WILDLIFE 4955
+▁WILL 4956
+▁WILLIAM 4957
+▁WILLING 4958
+▁WIN 4959
+▁WIND 4960
+▁WINDOW 4961
+▁WINE 4962
+▁WINNING 4963
+▁WINTER 4964
+▁WIRE 4965
+▁WISDOM 4966
+▁WISE 4967
+▁WISH 4968
+▁WITH 4969
+▁WITHDRAW 4970
+▁WITHIN 4971
+▁WITHOUT 4972
+▁WITNESS 4973
+▁WOKE 4974
+▁WOLF 4975
+▁WOLVES 4976
+▁WOMAN 4977
+▁WOMEN 4978
+▁WON 4979
+▁WONDER 4980
+▁WONDERED 4981
+▁WONDERFUL 4982
+▁WONDERING 4983
+▁WOOD 4984
+▁WORD 4985
+▁WORDS 4986
+▁WORE 4987
+▁WORK 4988
+▁WORKED 4989
+▁WORKERS 4990
+▁WORKFORCE 4991
+▁WORKING 4992
+▁WORKPLACE 4993
+▁WORKS 4994
+▁WORKSHOP 4995
+▁WORLD 4996
+▁WORLDVIEW 4997
+▁WORLDWIDE 4998
+▁WORM 4999
+▁WORRIED 5000
+▁WORRIES 5001
+▁WORRY 5002
+▁WORSE 5003
+▁WORSHIP 5004
+▁WORST 5005
+▁WORTH 5006
+▁WOULD 5007
+▁WOULDN 5008
+▁WOUND 5009
+▁WOW 5010
+▁WRAP 5011
+▁WRESTLE 5012
+▁WRITE 5013
+▁WRITER 5014
+▁WRITING 5015
+▁WRITTEN 5016
+▁WRONG 5017
+▁WROTE 5018
+▁X 5019
+▁YA 5020
+▁YARD 5021
+▁YEAH 5022
+▁YEAR 5023
+▁YEARS 5024
+▁YELL 5025
+▁YELLOW 5026
+▁YES 5027
+▁YESTERDAY 5028
+▁YET 5029
+▁YIELD 5030
+▁YO 5031
+▁YOGA 5032
+▁YORK 5033
+▁YOU 5034
+▁YOUNG 5035
+▁YOUNGER 5036
+▁YOUR 5037
+▁YOURSELF 5038
+▁YOURSELVES 5039
+▁YOUTH 5040
+▁YOUTUBE 5041
+▁YU 5042
+▁Z 5043
+▁ZEALAND 5044
+▁ZERO 5045
+▁ZONE 5046
+▁ZOO 5047