Initial commit

Files changed (10) hide show

added_tokens.json +3 -0
arguments.json +64 -0
arguments.pkl +3 -0
config.json +41 -0
pytorch_model.bin +3 -0
script.sh +143 -0
special_tokens_map.json +30 -0
tokenizer.json +0 -0
tokenizer.model +3 -0
tokenizer_config.json +57 -0

added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<pad>": 32000
+}

arguments.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+    "model_name_or_path": "Ren-Wei/Safe-RLHF-SFT-mistral-7b",
+    "max_length": 512,
+    "trust_remote_code": true,
+    "train_datasets": [
+        [
+            "PKU-SafeRLHF/train",
+            {
+                "proportion": 1.0
+            }
+        ]
+    ],
+    "eval_datasets": [
+        [
+            "PKU-SafeRLHF/test",
+            {
+                "proportion": 1.0
+            }
+        ]
+    ],
+    "loss_type": "sequence-wise",
+    "epochs": 2,
+    "per_device_train_batch_size": 16,
+    "per_device_eval_batch_size": 16,
+    "gradient_accumulation_steps": 1,
+    "gradient_checkpointing": true,
+    "regularization": 0.001,
+    "normalize_score_during_training": false,
+    "normalizer_type": "ExponentialMovingAverage",
+    "normalizer_momentum": 0.9,
+    "lr": 2e-05,
+    "lr_scheduler_type": "cosine",
+    "lr_warmup_ratio": 0.03,
+    "weight_decay": 0.1,
+    "seed": 42,
+    "fp16": true,
+    "bf16": false,
+    "tf32": null,
+    "eval_strategy": "epoch",
+    "eval_interval": 1000000,
+    "need_eval": true,
+    "eval_split_ratio": null,
+    "output_dir": "/home/rwliang/Reinforcement-Learning/FINAL_PROJECT/rl-final/safe-rlhf/output/rm_helpful_mistral_7b",
+    "log_type": "wandb",
+    "log_dir": "/home/rwliang/Reinforcement-Learning/FINAL_PROJECT/rl-final/safe-rlhf/output/rm_helpful_mistral_7b",
+    "log_project": "Safe-RLHF-RM",
+    "log_run_name": "reward-2025-03-14-10-17-41",
+    "save_16bit": true,
+    "save_interval": 1000000,
+    "local_rank": 0,
+    "zero_stage": 3,
+    "offload": "none",
+    "deepspeed": false,
+    "deepspeed_config": null,
+    "deepscale": false,
+    "deepscale_config": null,
+    "global_rank": 0,
+    "device": {
+        "type": "torch.device",
+        "repr": "device(type='cuda', index=0)"
+    },
+    "num_update_steps_per_epoch": 660,
+    "total_training_steps": 1320
+}

arguments.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:47fc7f53302a4399cbab7a6bc6ef61b389ec6fd7fb4dc0815a2b44efa3c53efb
+size 1295

config.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "_attn_implementation_autoset": true,
+  "_name_or_path": "Ren-Wei/Safe-RLHF-SFT-mistral-7b",
+  "architectures": [
+    "MistralForScore"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "do_normalize": false,
+  "eos_token_id": 2,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 14336,
+  "max_position_embeddings": 32768,
+  "mean": [
+    0.029040411114692688
+  ],
+  "model_type": "mistral",
+  "momentum": 0.9,
+  "normalizer_type": "ExponentialMovingAverage",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 8,
+  "pad_token_id": 32000,
+  "rms_norm_eps": 1e-05,
+  "rope_theta": 10000.0,
+  "score_bias": true,
+  "score_dim": 1,
+  "score_type": "reward",
+  "sliding_window": 4096,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.49.0",
+  "use_cache": true,
+  "var": [
+    2.804319381713867
+  ],
+  "vocab_size": 32001
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ee0e413661a4cefdb7f0ac7e071e2ccc3fe82bee95cd4f2e32a6af2e64ed7d98
+size 14221434706

script.sh ADDED Viewed

	@@ -0,0 +1,143 @@

+#!/usr/bin/env bash
+#
+# Copyright 2023-2024 PKU-Alignment Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+if [ -z "${BASH_VERSION}" ]; then
+	echo "Please use bash to run this script." >&2
+	exit 1
+fi
+set -x
+SCRIPT_DIR="$(cd "$(dirname "$0")" &>/dev/null && pwd)"
+ROOT_DIR="$(dirname "${SCRIPT_DIR}")"
+export PYTHONPATH="${ROOT_DIR}${PYTHONPATH:+:${PYTHONPATH}}"
+export LOGLEVEL="${LOGLEVEL:-WARNING}"
+MODEL_NAME_OR_PATH="PKU-Alignment/alpaca-7b-reproduced"
+OUTPUT_DIR="${ROOT_DIR}/output/rm"
+unset HOSTFILE
+ZERO_STAGE=3 ###
+OFFLOAD="none"
+while [[ "$#" -gt 0 ]]; do
+	arg="$1"
+	shift
+	case "${arg}" in
+		--model_name_or_path)
+			MODEL_NAME_OR_PATH="$1"
+			shift
+			;;
+		--model_name_or_path=*)
+			MODEL_NAME_OR_PATH="${arg#*=}"
+			;;
+		--output_dir)
+			OUTPUT_DIR="$1"
+			shift
+			;;
+		--output_dir=*)
+			OUTPUT_DIR="${arg#*=}"
+			;;
+		--hostfile)
+			HOSTFILE="$1"
+			shift
+			;;
+		--hostfile=*)
+			HOSTFILE="${arg#*=}"
+			;;
+		--zero_stage)
+			ZERO_STAGE="$1"
+			shift
+			;;
+		--zero_stage=*)
+			ZERO_STAGE="${arg#*=}"
+			;;
+		--offload)
+			OFFLOAD="$1"
+			shift
+			;;
+		--offload=*)
+			OFFLOAD="${arg#*=}"
+			;;
+		*)
+			echo "Unknown parameter passed: '${arg}'" >&2
+			exit 1
+			;;
+	esac
+done
+mkdir -p "${OUTPUT_DIR}"
+OUTPUT_DIR="$(cd "${OUTPUT_DIR}" &>/dev/null && pwd)"
+if [[ ! -f "${OUTPUT_DIR}/.gitignore" ]]; then
+	echo '*' >"${OUTPUT_DIR}/.gitignore"
+fi
+cp -f "$0" "${OUTPUT_DIR}/script.sh"
+if [[ -z "${WANDB_API_KEY}" ]]; then
+	export WANDB_MODE="offline"
+fi
+MASTER_PORT_START=10000
+MASTER_PORT_END=65535
+MASTER_PORT="$(
+	comm -23 \
+		<(seq "${MASTER_PORT_START}" "${MASTER_PORT_END}" | sort) \
+		<(ss -Htan | awk '{ print $4 }' | awk -F ':' '{ print $NF }' | sort -u) |
+		shuf | head -n 1
+)"
+DEEPSPEED_ARGS=()
+if [[ -n "${HOSTFILE+x}" ]]; then
+	DEEPSPEED_ARGS+=("--hostfile" "${HOSTFILE}")
+fi
+DEEPSPEED_ARGS+=("--master_port" "${MASTER_PORT}")
+exec 1> >(tee "${OUTPUT_DIR}/stdout.log" >&1) 2> >(tee "${OUTPUT_DIR}/stderr.log" >&2)
+deepspeed "${DEEPSPEED_ARGS[@]}" \
+	--module safe_rlhf.values.reward \
+	--train_datasets PKU-SafeRLHF/train \
+	--eval_datasets PKU-SafeRLHF/test \
+	--model_name_or_path "${MODEL_NAME_OR_PATH}" \
+	--max_length 512 \
+	--trust_remote_code True \
+	--loss_type sequence-wise \
+	--epochs 2 \
+	--per_device_train_batch_size 16 \
+	--per_device_eval_batch_size 16 \
+	--gradient_accumulation_steps 1 \
+	--gradient_checkpointing \
+	--regularization 0.001 \
+	--normalize_score_during_training False \
+	--normalizer_type ExponentialMovingAverage \
+	--normalizer_momentum 0.9 \
+	--learning_rate 2e-5 \
+	--lr_scheduler_type cosine \
+	--lr_warmup_ratio 0.03 \
+	--weight_decay 0.1 \
+	--seed 42 \
+	--need_eval \
+	--eval_strategy epoch \
+	--output_dir "${OUTPUT_DIR}" \
+	--log_type wandb \
+	--log_project Safe-RLHF-RM \
+	--zero_stage "${ZERO_STAGE}" \
+	--offload "${OFFLOAD}" \
+	--fp16 True \
+	--save_16bit
+	# --bf16 True \
+	# --tf32 True

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32000": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [],
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "legacy": false,
+  "max_length": 512,
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "stride": 0,
+  "tokenizer_class": "LlamaTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}