# -------------------------------------------------------- # Pre-Training Transformer Decoder for End-to-End ASR Model with Unpaired Speech Data (https://arxiv.org/abs/2203.17113) # Github source: https://github.com/microsoft/SpeechT5/tree/main/Speech2C # Copyright (c) 2022 Microsoft # Licensed under The MIT License [see LICENSE for details] # Based on fairseq code bases # https://github.com/pytorch/fairseq # -------------------------------------------------------- from fairseq.models import ( register_model_architecture, ) from fairseq.models.transformer_lm import base_lm_architecture @register_model_architecture(model_name="transformer_lm", arch_name="transformer_lm_t5") def transformer_lm_t5(args): args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 1280) args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 6144) args.decoder_layers = getattr(args, "decoder_layers", 20) args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16) args.dropout = getattr(args, "dropout", 0.1) args.attention_dropout = getattr(args, "attention_dropout", 0.1) args.activation_fn = getattr(args, "activation_fn", "gelu") base_lm_architecture(args)