Spaces:
Sleeping
Sleeping
File size: 4,886 Bytes
67312ac 1893204 67312ac 1893204 67312ac 1893204 67312ac 1893204 67312ac 1893204 67312ac 1893204 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
import sys
import os
from typing import Any, Dict, List
import json
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, CohereConfig, AutoModel
from accelerate import Accelerator
from tqdm import tqdm
# Add script directory to system path for importing local modules
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.dirname(SCRIPT_DIR))
from eval.utils import jload, jdump
from eval.evaluate_arguments import EvalArguments
# set `device` to "cuda" if a GPU is available. otherwise, defaults to CPU
device = "cuda" if torch.cuda.is_available() else "cpu"
def create_model(model_name: str):
"""
loads pre-trained reward model and moves it onto device
"""
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", num_labels=1).to("cuda")
return model
def create_tokenizer(model_name):
# loads the tokenizer that pairs with the model for encoding the text data
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
return tokenizer
def MyAccelerator(mixed_precision: str):
"""
accelerator initialization (wrapper) for handling mixed precision
"""
return Accelerator(mixed_precision=mixed_precision)
def get_reward_output_fn(reward_output_format: str, sigmoid: bool):
def default(x):
return x.squeeze().cpu().detach().numpy().tolist()
reward_fn_map = {
'0': lambda x: x.squeeze().cpu().detach().softmax(dim=-1).numpy()[0].tolist(),
'1': lambda x: x.squeeze().cpu().detach().softmax(dim=-1).numpy()[1].tolist(),
'1-0': lambda x: (x.squeeze().cpu().detach().softmax(dim=-1).numpy()[1] - x.squeeze().cpu().detach().softmax(dim=-1).numpy()[0]).tolist()
}
reward_output_fn = reward_fn_map.get(reward_output_format, default)
if sigmoid:
return lambda x: torch.sigmoid(torch.tensor(x)).numpy().tolist()
return reward_output_fn
def evaluate_data(args, model, tokenizer, eval_data_list_dict) -> List[Dict[str, Any]]:
"""
Evaluate the dataset using the reward model.
"""
reward_output_fn = get_reward_output_fn(args.reward_output_fmt, args.apply_sigmoid_to_reward)
pbar = tqdm(total=len(eval_data_list_dict), desc="Evaluating Rewards")
rewards_list = []
for idx in range(0, len(eval_data_list_dict), args.per_device_batch_size):
batch_list_dict = eval_data_list_dict[idx:idx+args.per_device_batch_size]
# Create prompt-response pairs
batch_full_outputs = [
f"{l['prompt']} {l['output']}" for l in batch_list_dict
] if 'prompt' in batch_list_dict[0] else [f"Below is an instruction: {l['instruction']} Response: {l['output']}" for l in batch_list_dict]
# Tokenize reponse and send to device
encoded_full_responses = tokenizer(batch_full_outputs, return_tensors="pt", padding=True, truncation=True)
encoded_full_responses = encoded_full_responses.to(model.device)
# Generate rewards
with torch.inference_mode():
reward_outputs = model(**encoded_full_responses)
rewards = reward_output_fn(reward_outputs.logits)
rewards_list.extend(rewards)
pbar.update(len(batch_list_dict))
# Adding reward scores to original data
for i, data in enumerate(eval_data_list_dict):
data['reward'] = rewards_list[i]
return eval_data_list_dict
def process_evaluation(args, model_name: str, eval_data_list_dict) -> List[Dict[str, Any]]:
"""
Main function for processing evaluation, takes model name as input.
"""
# mixed_precision = 'bf16' if args.bfloat16 else 'fp16'
# Initialize accelerator and model
# accelerator = MyAccelerator(mixed_precision)
model = create_model(model_name)
tokenizer = create_tokenizer(model_name)
model.eval()
eval_data = evaluate_data(args, model, tokenizer, eval_data_list_dict)
result_filename = args.result_filename or f"{os.path.basename(args.output_filepath).split('.')[0]}_reward_results.json"
with open(result_filename, "w") as f:
json.dump(eval_data, f)
return eval_data
# ONLY FOR TESTING:
if __name__ == '__main__':
args = EvalArguments(bfloat16=True,
reward_output_fmt='1-0',
apply_sigmoid_to_reward=False,
per_device_batch_size=8,
output_filepath= '/path/to/your/data.json',
result_filename=None,
model_name_or_path="CohereForAI/aya-expanse-8b")
eval_data_list_dict = [{"prompt": "How are you?", "output": "I'm doing great!"}, {"prompt": "What's your name?", "output": "Assistant"}]
process_evaluation(args, model_name="CohereForAI/aya-expanse-8b", eval_data_list_dict=eval_data_list_dict) |