File size: 924 Bytes

8adb2f8

trainer: "gist"
model_name: "bertimbau-335m-mmarco-pairs-gist1-v1"
base_model_name: "bertimbau-335m-europarl-eubookshop-ted2020-tatoeba-ct1-nli-gist10-sts-angle20-v3"
guide_model_name: "bertimbau-100m-europarl-eubookshop-ted2020-tatoeba-ct1-nli-gist10-sts-cosent20-v1"
validation_ir: True
validation_ir_corpus_size: 50000
# validation_ir_corpus_size: 500

# see https://huggingface.co/docs/datasets/v2.18.0/en/about_dataset_load
train_dataset_configs:
  - alias: "mmarco"
    path: "unicamp-dl/mmarco"
    name: "portuguese"
    split: "train"
    # split: "train[1000:2000]"

examples_are_triples: False
examples_are_labelled: False
seed: 1
learning_rate: 1e-5
warmup_ratio: 0.1
weight_decay: 0.01
# batch_size: 100  # 100 fits very tightly (40GB used), could crash on batches of longer texts 
batch_size: 85  # 85 uses up to 37.5GB out of 40GB
use_amp: True
epochs: 1
# validations_per_epoch: 1
validations_per_epoch: 100