Spaces:
Sleeping
Sleeping
import sys | |
import os | |
from typing import Any, Dict, List | |
import json | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer, CohereConfig, AutoModel | |
from accelerate import Accelerator | |
from tqdm import tqdm | |
# Add script directory to system path for importing local modules | |
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) | |
sys.path.append(os.path.dirname(SCRIPT_DIR)) | |
from eval.utils import jload, jdump | |
from eval.evaluate_arguments import EvalArguments | |
# set `device` to "cuda" if a GPU is available. otherwise, defaults to CPU | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
def create_model(model_name: str): | |
""" | |
loads pre-trained reward model and moves it onto device | |
""" | |
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", num_labels=1).to("cuda") | |
return model | |
def create_tokenizer(model_name): | |
# loads the tokenizer that pairs with the model for encoding the text data | |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True) | |
return tokenizer | |
def MyAccelerator(mixed_precision: str): | |
""" | |
accelerator initialization (wrapper) for handling mixed precision | |
""" | |
return Accelerator(mixed_precision=mixed_precision) | |
def get_reward_output_fn(reward_output_format: str, sigmoid: bool): | |
def default(x): | |
return x.squeeze().cpu().detach().numpy().tolist() | |
reward_fn_map = { | |
'0': lambda x: x.squeeze().cpu().detach().softmax(dim=-1).numpy()[0].tolist(), | |
'1': lambda x: x.squeeze().cpu().detach().softmax(dim=-1).numpy()[1].tolist(), | |
'1-0': lambda x: (x.squeeze().cpu().detach().softmax(dim=-1).numpy()[1] - x.squeeze().cpu().detach().softmax(dim=-1).numpy()[0]).tolist() | |
} | |
reward_output_fn = reward_fn_map.get(reward_output_format, default) | |
if sigmoid: | |
return lambda x: torch.sigmoid(torch.tensor(x)).numpy().tolist() | |
return reward_output_fn | |
def evaluate_data(args, model, tokenizer, eval_data_list_dict) -> List[Dict[str, Any]]: | |
""" | |
Evaluate the dataset using the reward model. | |
""" | |
reward_output_fn = get_reward_output_fn(args.reward_output_fmt, args.apply_sigmoid_to_reward) | |
pbar = tqdm(total=len(eval_data_list_dict), desc="Evaluating Rewards") | |
rewards_list = [] | |
for idx in range(0, len(eval_data_list_dict), args.per_device_batch_size): | |
batch_list_dict = eval_data_list_dict[idx:idx+args.per_device_batch_size] | |
# Create prompt-response pairs | |
batch_full_outputs = [ | |
f"{l['prompt']} {l['output']}" for l in batch_list_dict | |
] if 'prompt' in batch_list_dict[0] else [f"Below is an instruction: {l['instruction']} Response: {l['output']}" for l in batch_list_dict] | |
# Tokenize reponse and send to device | |
encoded_full_responses = tokenizer(batch_full_outputs, return_tensors="pt", padding=True, truncation=True) | |
encoded_full_responses = encoded_full_responses.to(model.device) | |
# Generate rewards | |
with torch.inference_mode(): | |
reward_outputs = model(**encoded_full_responses) | |
rewards = reward_output_fn(reward_outputs.logits) | |
rewards_list.extend(rewards) | |
pbar.update(len(batch_list_dict)) | |
# Adding reward scores to original data | |
for i, data in enumerate(eval_data_list_dict): | |
data['reward'] = rewards_list[i] | |
return eval_data_list_dict | |
def process_evaluation(args, model_name: str, eval_data_list_dict) -> List[Dict[str, Any]]: | |
""" | |
Main function for processing evaluation, takes model name as input. | |
""" | |
# mixed_precision = 'bf16' if args.bfloat16 else 'fp16' | |
# Initialize accelerator and model | |
# accelerator = MyAccelerator(mixed_precision) | |
model = create_model(model_name) | |
tokenizer = create_tokenizer(model_name) | |
model.eval() | |
eval_data = evaluate_data(args, model, tokenizer, eval_data_list_dict) | |
result_filename = args.result_filename or f"{os.path.basename(args.output_filepath).split('.')[0]}_reward_results.json" | |
with open(result_filename, "w") as f: | |
json.dump(eval_data, f) | |
return eval_data | |
# ONLY FOR TESTING: | |
if __name__ == '__main__': | |
args = EvalArguments(bfloat16=True, | |
reward_output_fmt='1-0', | |
apply_sigmoid_to_reward=False, | |
per_device_batch_size=8, | |
output_filepath= '/path/to/your/data.json', | |
result_filename=None, | |
model_name_or_path="CohereForAI/aya-expanse-8b") | |
eval_data_list_dict = [{"prompt": "How are you?", "output": "I'm doing great!"}, {"prompt": "What's your name?", "output": "Assistant"}] | |
process_evaluation(args, model_name="CohereForAI/aya-expanse-8b", eval_data_list_dict=eval_data_list_dict) |