bilma_MX_mean / configuration_bilma.py
guillermoruiz's picture
Upload TFBilma
fc80b92 verified
from transformers import PretrainedConfig
class BilmaConfig(PretrainedConfig):
model_type = "bilma"
def __init__(
self,
weights="MX",
include_top = True,
add_head = None,
pooling = None,
num_attention_heads: int = 4,
num_hidden_layers: int = 2,
seq_max_length: int = 280,
hidden_size: int = 512,
vocab_size: int = 29025,
hidden_dropout_prob: float = 0.1,
**kwargs,
):
countries = ["MX"]
poolings = ["mean", "cls", "max"]
if weights not in countries:
raise ValueError(f"`weights` must be one of {countries}, got {weights}.")
if add_head is not None and include_top == True:
raise ValueError(f"To add a head, 'include_top' must be False")
if pooling is not None and include_top == True:
raise ValueError(f"To specify a pooling, 'include_top' must be False")
if pooling is not None and pooling not in poolings:
raise ValueError(f"`pooling` must be one of {poolings}, got {pooling}.")
if weights is not None:
self.weights = weights
self.include_top = include_top
self.add_head = add_head
self.pooling = pooling
self.num_attention_heads = 4
self.num_hidden_layers = 2
self.seq_max_length = 280
self.hidden_size = 512
self.vocab_size = 29025
self.hidden_dropout_prob = 0.1
super().__init__(**kwargs)
return
self.weights = weights
self.include_top = include_top
self.add_head = add_head
self.pooling = pooling
self.num_attention_heads = num_attention_heads
self.num_hidden_layers = num_hidden_layers
self.seq_max_length = seq_max_length
self.hidden_size = hidden_size
self.vocab_size = vocab_size
self.hidden_dropout_prob = hidden_dropout_prob
super().__init__(**kwargs)