Riddhi Bhagwat
organization of files & debugging reward_eval file
1893204
import sys
import os
from typing import Any, Dict, List
import json
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, CohereConfig, AutoModel
from accelerate import Accelerator
from tqdm import tqdm
# Add script directory to system path for importing local modules
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.dirname(SCRIPT_DIR))
from eval.utils import jload, jdump
from eval.evaluate_arguments import EvalArguments
# set `device` to "cuda" if a GPU is available. otherwise, defaults to CPU
device = "cuda" if torch.cuda.is_available() else "cpu"
def create_model(model_name: str):
"""
loads pre-trained reward model and moves it onto device
"""
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", num_labels=1).to("cuda")
return model
def create_tokenizer(model_name):
# loads the tokenizer that pairs with the model for encoding the text data
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
return tokenizer
def MyAccelerator(mixed_precision: str):
"""
accelerator initialization (wrapper) for handling mixed precision
"""
return Accelerator(mixed_precision=mixed_precision)
def get_reward_output_fn(reward_output_format: str, sigmoid: bool):
def default(x):
return x.squeeze().cpu().detach().numpy().tolist()
reward_fn_map = {
'0': lambda x: x.squeeze().cpu().detach().softmax(dim=-1).numpy()[0].tolist(),
'1': lambda x: x.squeeze().cpu().detach().softmax(dim=-1).numpy()[1].tolist(),
'1-0': lambda x: (x.squeeze().cpu().detach().softmax(dim=-1).numpy()[1] - x.squeeze().cpu().detach().softmax(dim=-1).numpy()[0]).tolist()
}
reward_output_fn = reward_fn_map.get(reward_output_format, default)
if sigmoid:
return lambda x: torch.sigmoid(torch.tensor(x)).numpy().tolist()
return reward_output_fn
def evaluate_data(args, model, tokenizer, eval_data_list_dict) -> List[Dict[str, Any]]:
"""
Evaluate the dataset using the reward model.
"""
reward_output_fn = get_reward_output_fn(args.reward_output_fmt, args.apply_sigmoid_to_reward)
pbar = tqdm(total=len(eval_data_list_dict), desc="Evaluating Rewards")
rewards_list = []
for idx in range(0, len(eval_data_list_dict), args.per_device_batch_size):
batch_list_dict = eval_data_list_dict[idx:idx+args.per_device_batch_size]
# Create prompt-response pairs
batch_full_outputs = [
f"{l['prompt']} {l['output']}" for l in batch_list_dict
] if 'prompt' in batch_list_dict[0] else [f"Below is an instruction: {l['instruction']} Response: {l['output']}" for l in batch_list_dict]
# Tokenize reponse and send to device
encoded_full_responses = tokenizer(batch_full_outputs, return_tensors="pt", padding=True, truncation=True)
encoded_full_responses = encoded_full_responses.to(model.device)
# Generate rewards
with torch.inference_mode():
reward_outputs = model(**encoded_full_responses)
rewards = reward_output_fn(reward_outputs.logits)
rewards_list.extend(rewards)
pbar.update(len(batch_list_dict))
# Adding reward scores to original data
for i, data in enumerate(eval_data_list_dict):
data['reward'] = rewards_list[i]
return eval_data_list_dict
def process_evaluation(args, model_name: str, eval_data_list_dict) -> List[Dict[str, Any]]:
"""
Main function for processing evaluation, takes model name as input.
"""
# mixed_precision = 'bf16' if args.bfloat16 else 'fp16'
# Initialize accelerator and model
# accelerator = MyAccelerator(mixed_precision)
model = create_model(model_name)
tokenizer = create_tokenizer(model_name)
model.eval()
eval_data = evaluate_data(args, model, tokenizer, eval_data_list_dict)
result_filename = args.result_filename or f"{os.path.basename(args.output_filepath).split('.')[0]}_reward_results.json"
with open(result_filename, "w") as f:
json.dump(eval_data, f)
return eval_data
# ONLY FOR TESTING:
if __name__ == '__main__':
args = EvalArguments(bfloat16=True,
reward_output_fmt='1-0',
apply_sigmoid_to_reward=False,
per_device_batch_size=8,
output_filepath= '/path/to/your/data.json',
result_filename=None,
model_name_or_path="CohereForAI/aya-expanse-8b")
eval_data_list_dict = [{"prompt": "How are you?", "output": "I'm doing great!"}, {"prompt": "What's your name?", "output": "Assistant"}]
process_evaluation(args, model_name="CohereForAI/aya-expanse-8b", eval_data_list_dict=eval_data_list_dict)