jina-embeddings-v2-base-en / configuration_jbert (3).py
bwang0911's picture
Upload 9 files
9f46b9b
raw
history blame
1.01 kB
# Copyright 2022 MosaicML Examples authors
# SPDX-License-Identifier: Apache-2.0
from transformers import BertConfig as TransformersBertConfig
class JBertConfig(TransformersBertConfig):
def __init__(
self,
model_max_length: int = 8192,
attention_probs_dropout_prob: float = 0.0,
**kwargs,
):
"""Configuration class for MosaicBert.
Args:
model_max_length (int): Use `model_max_length` to determine how large of an alibi tensor to
create when initializing the model. You should be able to ignore this parameter in most cases.
Defaults to 8192.
attention_probs_dropout_prob (float): By default, turn off attention dropout in Mosaic BERT
(otherwise, Flash Attention will be off by default). Defaults to 0.0.
"""
super().__init__(
attention_probs_dropout_prob=attention_probs_dropout_prob, **kwargs
)
self.model_max_length = model_max_length