from transformers.models.t5.configuration_t5 import T5Config | |
class MrT5Config(T5Config): | |
model_type = "mrt5" | |
def __init__( | |
self, | |
*args, | |
sigmoid_mask_scale=-10.0, | |
gate_layer_norm=True, | |
deletion_threshold=None, | |
delete_gate_layer=2, | |
use_softmax1=False, | |
deletion_type=None, | |
random_deletion_probability=0.5, | |
fixed_deletion_amount=0.5, | |
train_language="en", | |
eval_language="en", | |
use_gumbel_noise=False, | |
**kwargs, | |
): | |
super().__init__(*args, **kwargs) | |
self.deletion_threshold = deletion_threshold | |
self.sigmoid_mask_scale = sigmoid_mask_scale | |
self.gate_layer_norm = gate_layer_norm | |
self.use_softmax1 = use_softmax1 | |
self.deletion_type = deletion_type | |
self.random_deletion_probability = random_deletion_probability | |
self.fixed_deletion_amount = fixed_deletion_amount | |
self.train_language = train_language | |
self.eval_language = eval_language | |
self.delete_gate_layer = delete_gate_layer | |
self.use_gumbel_noise = use_gumbel_noise |