In [1]:
import torch
import torch.nn as nn
import warnings
from peft import PeftModel, PeftConfig, get_peft_model, LoraConfig
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from dataloader import StoryPairDataset
from trl import RewardTrainer, RewardConfig
import os

In [2]:
datapath = 'readsy/stories/'
pairpath = 'readsy/pairs/readsy_story_pairs0407.csv'
model_name = 'model/SFTmodels/gemma-2b_sftm3genre10vast/'
base_model = 'model/gemma/gemma-2b/'
mode='m3' if 'm3' in model_name else 'm2'
if 'random' in model_name:
    split_by = 'random'
elif 'time' in model_name:
    split_by = 'time'
else:
    split_by = 'random'
lease_likes = 10
max_seq_length = 2048*2 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
margin = False
save_path = 'model/reward_models/' +model_name.split('/')[-2] + '_rm'
if margin:
    save_path += 'margin'



In [3]:
model = AutoModelForSequenceClassification.from_pretrained('unsloth/gemma-2b', num_labels = 1, load_in_4bit=True)
model = PeftModel.from_pretrained(model, model_name)
tokenizer = AutoTokenizer.from_pretrained(base_model)


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
`low_cpu_mem_usage` was None, now set to True since model is quantized.
`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.
Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use
`config.hidden_activation` if you want to override this behaviour.
See https://github.com/huggingface/transformers/pull/29402 for more details.
Some weights of GemmaForSequenceClassification were not initialized from the model checkpoint at unsloth/gemma-2b and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
peft_config = LoraConfig(
    lora_alpha= 16,
    lora_dropout= 0,
    r= 16,
    bias= "none",
    task_type= "SEQ_CLS",
    target_modules=[
    "q_proj",
    "up_proj",
    "o_proj",
    "k_proj",
    "down_proj",
    "gate_proj",
    "v_proj"],
)
model = get_peft_model(model, peft_config)

In [5]:
training_args = RewardConfig(
    num_train_epochs= 3,
    per_device_train_batch_size= 1,
    gradient_accumulation_steps= 1,
    optim = "adamw_8bit",
    logging_steps= 5,
    save_strategy= "epoch",
    learning_rate= 1e-4, #0 -> test if the model is trainable
    weight_decay= 0.01,
    warmup_steps= 5,
    fp16= not torch.cuda.is_bf16_supported(),
    bf16= torch.cuda.is_bf16_supported(),
    max_grad_norm= 0.3,
    lr_scheduler_type= "cosine",
    disable_tqdm= True,
    #report_to= "wandb",
    dataloader_drop_last= True,
    max_length= 1024*4,
    output_dir = save_path,
)

In [6]:
dataloader = StoryPairDataset(datapath,
                              pairpath,
                              tokenizer,
                              task='rm',
                              used_dataset_size=100,
                              train_test_split=0.1,
                              split_by=split_by,
                              max_len=4096,
                              mode= mode,
                              max_time_window=3600,
                              least_likes= lease_likes,
                              margin= margin)
#map data columns ['chosen_text', 'rejected_text'] into `input_ids_chosen`, `attention_mask_chosen`, `input_ids_rejected` and `attention_mask_rejected` with the tokenizer


the total number of pairs is  100
the number of effective pairs is  84


No chat template is set for this tokenizer, falling back to a default class-level template. This is very error-prone, because models are often trained with templates different from the class default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which point any code depending on them will stop working. We recommend setting a valid chat template before then to ensure that this model continues working without issues.


Index(['prompt_id', 'prompt', 'story_id', 'story_title', 'story_author',
       'story_url', 'link', 'genre', 'is_sensitive', 'categories', 'likes',
       'story_text', 'posted_date', 'comments'],
      dtype='object')
the columns of train is  Index(['prompt_id', 'story1_id', 'story2_id', 'time_lag', 'least_likes'], dtype='object')
the first example of train is  prompt_id                                              prompt_0792
story1_id                                                   15ginj
story2_id                                                   h7yder
time_lag                                                    2100.0
least_likes                                                     11
chosen_text      <bos><|im_start|>user\nWrite a story about a c...
rejected_text    <bos><|im_start|>user\nWrite a story about a c...
Name: 0, dtype: object


In [7]:
def preprocess_function(examples):
    chosen_text = examples['chosen_text']
    rejected_text = examples['rejected_text']
    tokenized_input_chosen = tokenizer(chosen_text, truncation=True)
    tokenized_input_rejected = tokenizer(rejected_text, truncation=True)
    examples['input_ids_chosen'] = tokenized_input_chosen['input_ids']
    examples['attention_mask_chosen'] = tokenized_input_chosen['attention_mask']
    examples['input_ids_rejected'] = tokenized_input_rejected['input_ids']
    examples['attention_mask_rejected'] = tokenized_input_rejected['attention_mask']
    return examples

traindata = dataloader.dataset['train'].map(preprocess_function,num_proc=32)
testdata = dataloader.dataset['test'].map(preprocess_function,num_proc=32)


Map (num_proc=32):   0%|          | 0/75 [00:00<?, ? examples/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to tru

Map (num_proc=9):   0%|          | 0/9 [00:00<?, ? examples/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to tru

In [8]:
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Union
from transformers.utils import PaddingStrategy
@dataclass
class RewardDataCollatorWithPadding:
    tokenizer: AutoTokenizer
    padding: Union[bool, str, PaddingStrategy] = True
    max_length: Optional[int] = None
    pad_to_multiple_of: Optional[int] = None
    return_tensors: str = "pt"

    def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, Any]:
        merged_features = []
        for feature in features:
            merged_features.append(
                {
                    "input_ids": feature["input_ids_chosen"],
                    "attention_mask": feature["attention_mask_chosen"],
                }
            )
            merged_features.append(
                {
                    "input_ids": feature["input_ids_rejected"],
                    "attention_mask": feature["attention_mask_rejected"],
                }
            )
        batch = self.tokenizer.pad(
            merged_features,
            padding=self.padding,
            max_length=self.max_length,
            pad_to_multiple_of=self.pad_to_multiple_of,
            return_tensors=self.return_tensors,
        )
        batch = {
            "input_ids": batch["input_ids"],
            "attention_mask": batch["attention_mask"],
            "return_loss": True,
        }
        return batch

In [16]:
from trl import RewardTrainer
trainer = RewardTrainer(
    model = model,
    args = training_args,
    tokenizer= tokenizer,
    train_dataset= traindata,
    eval_dataset= testdata,
    #peft_config= peft_config
)
trainer.train()

trainer.save_model(save_path)
print('model saved at', save_path)

OutOfMemoryError: CUDA out of memory. Tried to allocate 104.00 MiB. GPU 0 has a total capacity of 23.65 GiB of which 32.69 MiB is free. Process 2450213 has 23.61 GiB memory in use. Of the allocated memory 22.95 GiB is allocated by PyTorch, and 206.85 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
traindata[0]['input_ids_chosen']

In [None]:
basemodel = 'mistralai/Mistral-7B-Instruct-v0.3'
model = AutoModelForSequenceClassification.from_pretrained(base_model, num_labels = 1)

In [None]:
model(input_ids = torch.tensor(traindata[0]['input_ids_chosen']),
      attention_mask = torch.tensor(traindata[0]['attention_mask_chosen']),
      return_dict=True)


In [None]:
traindata[0]['input_ids_chosen']

In [None]:
tokenizer(traindata[0]['chosen_text'], truncation=True)

In [None]:
import torch
import torch.nn as nn
import warnings
from peft import PeftModel, PeftConfig, get_peft_model, LoraConfig
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from dataloader import StoryPairDataset
from trl import RewardTrainer, RewardConfig
import os
#os.environ["WANDB_PROJECT"] = "<my-amazing-project>"  # name your W&B project
os.environ["WANDB_LOG_MODEL"] = "checkpoint"  # log all model checkpoints


# datapath = 'readsy/stories/'
# pairpath = '../../../work/lawecon/Work/penghao/readsy_story_pairs0407.csv'
# model_name = "../../../work/lawecon/Work/penghao/SFTmodels/gemma-2b_sftm3genre10"
# base_model = '../../../work/lawecon/Work/penghao/gemma/gemma-2b'
mode='m3' if 'm3' in model_name else 'm2'
if 'random' in model_name:
    split_by = 'random'
elif 'time' in model_name:
    split_by = 'time'
else:
    split_by = 'random'
lease_likes = 10
max_seq_length = 2048*2 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
margin = False
save_path = '../../../work/lawecon/Work/penghao/reward_models/' +model_name + '_rm' + 'margin' if margin else '_no_margin'
if margin:
    save_path += 'margin'

model = AutoModelForSequenceClassification.from_pretrained(base_model, load_in_4bit=True)
model = PeftModel.from_pretrained(model, model_name)
tokenizer = AutoTokenizer.from_pretrained(base_model)
#model = nn.Sequential(model, nn.Linear(model.config.hidden_size, 1), nn.Sigmoid())
