Spaces:
Runtime error
Runtime error
File size: 3,573 Bytes
fa6856c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
import json
import os
import sys
from itertools import islice
from datasets import load_dataset
from ppo_hh import create_reward_fn
import trlx
from trlx.data.default_configs import (
ILQLConfig,
ModelConfig,
OptimizerConfig,
SchedulerConfig,
TokenizerConfig,
TrainConfig,
TRLConfig,
)
default_config = TRLConfig(
train=TrainConfig(
seq_length=1024,
batch_size=4,
epochs=100,
total_steps=20000,
checkpoint_interval=10000,
eval_interval=1000,
pipeline="PromptPipeline",
trainer="AccelerateILQLTrainer",
checkpoint_dir="checkpoints/ilql_hh",
),
model=ModelConfig(model_path="EleutherAI/gpt-j-6B", num_layers_unfrozen=-1),
tokenizer=TokenizerConfig(tokenizer_path="EleutherAI/gpt-j-6B", truncation_side="left"),
optimizer=OptimizerConfig(name="adamw", kwargs=dict(lr=1e-6, betas=(0.9, 0.95), eps=1.0e-8, weight_decay=1.0e-6)),
scheduler=SchedulerConfig(name="cosine_annealing", kwargs=dict(T_max=1000000000, eta_min=1e-6)),
method=ILQLConfig(
name="ilqlconfig",
tau=0.6,
gamma=0.99,
cql_scale=0.1,
awac_scale=1,
alpha=0.0001,
beta=0,
steps_for_target_q_sync=1,
two_qs=True,
gen_kwargs=dict(max_new_tokens=128, top_k=20, beta=[1, 4], temperature=1.0),
),
)
config_name = os.environ.get("CONFIG_NAME")
if config_name == "125M":
default_config.train.batch_size = 16
default_config.train.checkpoint_dir = "checkpoints/ilql_hh_125M"
default_config.model.model_path = "EleutherAI/pythia-125m-deduped"
default_config.tokenizer.tokenizer_path = "EleutherAI/gpt-neox-20b"
elif config_name == "1B":
default_config.train.batch_size = 8
default_config.train.checkpoint_dir = "checkpoints/ilql_hh_1B"
default_config.model.model_path = "EleutherAI/pythia-1.4b-deduped"
default_config.tokenizer.tokenizer_path = "EleutherAI/gpt-neox-20b"
elif config_name == "6B":
default_config.train.batch_size = 4
default_config.train.checkpoint_dir = "checkpoints/ilql_hh_6B"
default_config.model.model_path = "EleutherAI/pythia-6.9b-deduped"
default_config.tokenizer.tokenizer_path = "EleutherAI/gpt-neox-20b"
elif config_name == "20B":
default_config.train.batch_size = 1
default_config.train.total_steps = 3000
default_config.train.checkpoint_dir = "checkpoints/ilql_hh_20B"
default_config.model.model_path = "EleutherAI/gpt-neox-20b"
default_config.tokenizer.tokenizer_path = "EleutherAI/gpt-neox-20b"
def preprocess(sample):
sample["prompt_output"] = [
[sample["prompt"], sample["chosen"]],
[sample["prompt"], sample["rejected"]],
]
sample["reward"] = [1, -1]
return sample
def main(hparams={}):
config = TRLConfig.update(default_config, hparams)
dataset = load_dataset("Dahoas/full-hh-rlhf").map(preprocess)
prompts_outputs = sum(dataset["train"]["prompt_output"], [])
rewards = sum(dataset["train"]["reward"], [])
eval_prompts = [{"prompt": x["prompt"], "original_output": x["chosen"]} for x in islice(dataset["test"], 280)]
reward_fn = create_reward_fn()
trlx.train(
samples=prompts_outputs,
rewards=rewards,
config=config,
eval_prompts=eval_prompts,
metric_fn=lambda **kwargs: {"reward": reward_fn(**kwargs)},
stop_sequences=["Human:", "human:", "Assistant:", "assistant:"],
)
if __name__ == "__main__":
hparams = {} if len(sys.argv) == 1 else json.loads(sys.argv[1])
main(hparams)
|