|
max_seq_len: 1024 |
|
tokenizer_name: EleutherAI/gpt-neox-20b |
|
seed: 1 |
|
precision: amp_fp16 |
|
|
|
models: |
|
- |
|
model_name: mpt_test |
|
|
|
tokenizer: |
|
name: ${tokenizer_name} |
|
kwargs: |
|
model_max_length: ${max_seq_len} |
|
model: |
|
name: mpt_causal_lm |
|
init_device: mixed |
|
|
|
d_model: 768 |
|
n_heads: 12 |
|
n_layers: 12 |
|
expansion_ratio: 4 |
|
max_seq_len: ${max_seq_len} |
|
vocab_size: 50368 |
|
attn_config: |
|
attn_impl: triton |
|
|
|
load_path: |
|
|
|
device_eval_batch_size: 16 |
|
|
|
|
|
fsdp_config: |
|
sharding_strategy: FULL_SHARD |
|
mixed_precision: FULL |
|
forward_prefetch: True |
|
limit_all_gathers: True |
|
|
|
icl_tasks: |
|
- |
|
label: jeopardy |
|
dataset_uri: eval/local_data/world_knowledge/jeopardy_all.jsonl |
|
num_fewshot: [0] |
|
icl_task_type: language_modeling |
|
continuation_delimiter: "\nAnswer: " |
|
has_categories: true |
|
|