|
from transformers import PretrainedConfig |
|
|
|
class BilmaConfig(PretrainedConfig): |
|
model_type = "bilma" |
|
|
|
def __init__( |
|
self, |
|
weights="CL", |
|
include_top = True, |
|
add_head = None, |
|
pooling = None, |
|
num_attention_heads: int = 4, |
|
num_hidden_layers: int = 2, |
|
seq_max_length: int = 280, |
|
hidden_size: int = 512, |
|
vocab_size: int = 29025, |
|
hidden_dropout_prob: float = 0.1, |
|
**kwargs, |
|
): |
|
countries = ["CL"] |
|
poolings = ["mean", "cls", "max"] |
|
if weights not in countries: |
|
raise ValueError(f"`weights` must be one of {countries}, got {weights}.") |
|
if add_head is not None and include_top == True: |
|
raise ValueError(f"To add a head, 'include_top' must be False") |
|
if pooling is not None and include_top == True: |
|
raise ValueError(f"To specify a pooling, 'include_top' must be False") |
|
if pooling is not None and pooling not in poolings: |
|
raise ValueError(f"`pooling` must be one of {poolings}, got {pooling}.") |
|
if weights is not None: |
|
self.weights = weights |
|
self.include_top = include_top |
|
self.add_head = add_head |
|
self.pooling = pooling |
|
self.num_attention_heads = 4 |
|
self.num_hidden_layers = 2 |
|
self.seq_max_length = 280 |
|
self.hidden_size = 512 |
|
self.vocab_size = 29025 |
|
self.hidden_dropout_prob = 0.1 |
|
super().__init__(**kwargs) |
|
return |
|
|
|
self.weights = weights |
|
self.include_top = include_top |
|
self.add_head = add_head |
|
self.pooling = pooling |
|
self.num_attention_heads = num_attention_heads |
|
self.num_hidden_layers = num_hidden_layers |
|
self.seq_max_length = seq_max_length |
|
self.hidden_size = hidden_size |
|
self.vocab_size = vocab_size |
|
self.hidden_dropout_prob = hidden_dropout_prob |
|
super().__init__(**kwargs) |