File size: 1,619 Bytes
cf05c06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
{
    "os": "Linux-5.4.0-147-generic-x86_64-with-glibc2.31",
    "python": "3.9.16",
    "heartbeatAt": "2023-07-25T19:40:15.153966",
    "startedAt": "2023-07-25T19:40:14.648957",
    "docker": null,
    "gpu": "NVIDIA A100-SXM4-80GB",
    "gpu_count": 8,
    "cpu_count": 64,
    "cuda": null,
    "args": [
        "--local_rank=0",
        "--train_datasets",
        "bt",
        "--model_name_or_path",
        "cerebras/btlm-3b-8k-base",
        "--max_length",
        "8092",
        "--trust_remote_code",
        "True",
        "--epochs",
        "16",
        "--per_device_train_batch_size",
        "8",
        "--per_device_eval_batch_size",
        "2",
        "--gradient_accumulation_steps",
        "1",
        "--gradient_checkpointing",
        "--learning_rate",
        "4.7e-6",
        "--lr_scheduler_type",
        "cosine",
        "--num_warmup_steps",
        "20",
        "--weight_decay",
        "0.0",
        "--seed",
        "42",
        "--output_dir",
        "/home/paperspace/safe-rlhf/output/sft",
        "--log_type",
        "wandb",
        "--log_project",
        "BT-Training",
        "--zero_stage",
        "2",
        "--bf16",
        "True",
        "--tf32",
        "True"
    ],
    "state": "running",
    "program": "-m safe_rlhf.finetune.__main__",
    "git": {
        "remote": "https://github.com/robertalanm/safe-rlhf",
        "commit": "000436c589d4c9f59193f719f05281e0c8ef6814"
    },
    "email": null,
    "root": "/home/paperspace/safe-rlhf",
    "host": "psiohuej6",
    "username": "paperspace",
    "executable": "/usr/bin/python3.9"
}