File size: 4,886 Bytes
67312ac
 
 
 
 
1893204
67312ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1893204
67312ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1893204
 
67312ac
1893204
67312ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1893204
 
 
 
 
 
 
 
67312ac
 
 
1893204
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import sys
import os
from typing import Any, Dict, List
import json 
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, CohereConfig, AutoModel
from accelerate import Accelerator
from tqdm import tqdm

# Add script directory to system path for importing local modules
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.dirname(SCRIPT_DIR))

from eval.utils import jload, jdump
from eval.evaluate_arguments import EvalArguments


# set `device` to "cuda" if a GPU is available. otherwise, defaults to CPU
device = "cuda" if torch.cuda.is_available() else "cpu"

def create_model(model_name: str):
    """
    loads pre-trained reward model and moves it onto device
    """
    model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", num_labels=1).to("cuda")
    return model


def create_tokenizer(model_name):
    # loads the tokenizer that pairs with the model for encoding the text data
    tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
    return tokenizer


def MyAccelerator(mixed_precision: str):
    """
    accelerator initialization (wrapper) for handling mixed precision
    """
    return Accelerator(mixed_precision=mixed_precision)
    
def get_reward_output_fn(reward_output_format: str, sigmoid: bool):
    def default(x): 
        return x.squeeze().cpu().detach().numpy().tolist()
    reward_fn_map = {
        '0': lambda x: x.squeeze().cpu().detach().softmax(dim=-1).numpy()[0].tolist(),
        '1': lambda x: x.squeeze().cpu().detach().softmax(dim=-1).numpy()[1].tolist(),
        '1-0': lambda x: (x.squeeze().cpu().detach().softmax(dim=-1).numpy()[1] - x.squeeze().cpu().detach().softmax(dim=-1).numpy()[0]).tolist()
    }
    reward_output_fn = reward_fn_map.get(reward_output_format, default)
    if sigmoid: 
        return lambda x: torch.sigmoid(torch.tensor(x)).numpy().tolist()
    return reward_output_fn

def evaluate_data(args, model, tokenizer, eval_data_list_dict) -> List[Dict[str, Any]]:
    """
    Evaluate the dataset using the reward model.
    """
    reward_output_fn = get_reward_output_fn(args.reward_output_fmt, args.apply_sigmoid_to_reward)
    pbar = tqdm(total=len(eval_data_list_dict), desc="Evaluating Rewards")
    rewards_list = []

    for idx in range(0, len(eval_data_list_dict), args.per_device_batch_size):
        batch_list_dict = eval_data_list_dict[idx:idx+args.per_device_batch_size]

        # Create prompt-response pairs
        batch_full_outputs = [
            f"{l['prompt']} {l['output']}" for l in batch_list_dict
        ] if 'prompt' in batch_list_dict[0] else [f"Below is an instruction: {l['instruction']} Response: {l['output']}" for l in batch_list_dict]

        # Tokenize reponse and send to device
        encoded_full_responses = tokenizer(batch_full_outputs, return_tensors="pt", padding=True, truncation=True)
        encoded_full_responses = encoded_full_responses.to(model.device)

        # Generate rewards
        with torch.inference_mode():
            reward_outputs = model(**encoded_full_responses)
            rewards = reward_output_fn(reward_outputs.logits)
            rewards_list.extend(rewards)

        pbar.update(len(batch_list_dict))

    # Adding reward scores to original data
    for i, data in enumerate(eval_data_list_dict):
        data['reward'] = rewards_list[i]

    return eval_data_list_dict

def process_evaluation(args, model_name: str, eval_data_list_dict) -> List[Dict[str, Any]]:
    """
    Main function for processing evaluation, takes model name as input.
    """
    # mixed_precision = 'bf16' if args.bfloat16 else 'fp16'
    
    # Initialize accelerator and model
    # accelerator = MyAccelerator(mixed_precision)
    model = create_model(model_name)
    tokenizer = create_tokenizer(model_name)

    model.eval()

    eval_data = evaluate_data(args, model, tokenizer, eval_data_list_dict)

    result_filename = args.result_filename or f"{os.path.basename(args.output_filepath).split('.')[0]}_reward_results.json"
    with open(result_filename, "w") as f:
        json.dump(eval_data, f)

    return eval_data


# ONLY FOR TESTING: 
if __name__ == '__main__':
    args = EvalArguments(bfloat16=True, 
                         reward_output_fmt='1-0', 
                         apply_sigmoid_to_reward=False,
                         per_device_batch_size=8,
                         output_filepath= '/path/to/your/data.json',
                         result_filename=None,
                         model_name_or_path="CohereForAI/aya-expanse-8b")


    eval_data_list_dict = [{"prompt": "How are you?", "output": "I'm doing great!"}, {"prompt": "What's your name?", "output": "Assistant"}]

    process_evaluation(args, model_name="CohereForAI/aya-expanse-8b", eval_data_list_dict=eval_data_list_dict)