Riddhi Bhagwat commited on
Commit
fbc38c9
·
unverified ·
2 Parent(s): 30461ca 62b796f

Merge pull request #12 from riddhibhagwat/main

Browse files

Evaluation Pipeline Updates & Refined Implementation

ml/{eval/data_transform_pipeline.py → dataset_transformer.py} RENAMED
File without changes
ml/dpo_pipeline.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import Trainer, TrainingArguments
2
+ from datasets import Dataset
3
+ import torch
4
+
5
+ def train_dpo_model(model, dataset, learning_rate=5e-5, num_train_epochs=3, per_device_train_batch_size=16):
6
+ """
7
+ Trains a model using Direct Preference Optimization (DPO).
8
+
9
+ Args:
10
+ model: The language model to be trained.
11
+ dataset: The dataset used for training, should be in Hugging Face Dataset format.
12
+ learning_rate: Learning rate for the optimizer.
13
+ num_train_epochs: Number of epochs to train.
14
+ per_device_train_batch_size: Batch size per device during training.
15
+ """
16
+ model.train()
17
+
18
+ training_args = TrainingArguments(
19
+ output_dir="./dpo_model",
20
+ evaluation_strategy="epoch",
21
+ save_strategy="epoch",
22
+ learning_rate=learning_rate,
23
+ per_device_train_batch_size=per_device_train_batch_size,
24
+ per_device_eval_batch_size=per_device_train_batch_size,
25
+ num_train_epochs=num_train_epochs,
26
+ weight_decay=0.01,
27
+ logging_dir="./logs",
28
+ logging_steps=100,
29
+ save_total_limit=2,
30
+ push_to_hub=False,
31
+ load_best_model_at_end=True,
32
+ )
33
+
34
+ trainer = Trainer(
35
+ model=model,
36
+ args=training_args,
37
+ train_dataset=dataset["train"],
38
+ eval_dataset=dataset.get("validation", None),
39
+ )
40
+
41
+ trainer.train()
42
+
43
+ return model
44
+
ml/eval/.reward_eval.py.swp ADDED
Binary file (20.5 kB). View file
 
ml/eval/alpaca.py CHANGED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ import os
3
+ from alpaca_eval import run_evaluation
4
+
5
+ def judge_responses(response1, response2, prompt):
6
+ """
7
+ Use OpenAI GPT-4 API to judge two model responses.
8
+ Returns: "A" if response1 is better, "B" if response2 is better, or "tie".
9
+ """
10
+ openai.api_key = os.getenv("OPENAI_API_KEY")
11
+
12
+ prompt_text = f"""
13
+ Given the user prompt: "{prompt}"
14
+
15
+ Response A: "{response1}"
16
+ Response B: "{response2}"
17
+
18
+ Which response is better? Reply with 'A', 'B', or 'tie'.
19
+ """
20
+
21
+ try:
22
+ response = openai.ChatCompletion.create(
23
+ model="gpt-4",
24
+ messages=[{"role": "system", "content": "You are an expert evaluator."},
25
+ {"role": "user", "content": prompt_text}],
26
+ max_tokens=5
27
+ )
28
+ result = response["choices"][0]["message"]["content"].strip().lower()
29
+ return result if result in ["a", "b", "tie"] else "tie"
30
+ except Exception as e:
31
+ print(f"Error in OpenAI API call: {e}")
32
+ return "tie"
33
+
34
+
35
+
36
+ def alpaca_evaluator(model_name, num_samples=200):
37
+ results = run_evaluation(
38
+ model=model_name,
39
+ num_samples=num_samples, # fewer samples for quick testing
40
+ reference_model="gpt-4", # Compare against GPT-4 (optional)
41
+ )
42
+ return results
43
+
ml/eval/bt.py CHANGED
@@ -11,9 +11,9 @@ class ScriptArguments:
11
  """
12
  Arguments for the Bradley-Terry evaluation script.
13
  """
14
- sft_generations_file: str = '/raid/lingo/jen_ben/HF-RLHF/eval/test/gen_examples_idan_mini.json'
15
- kto_generations_file: str = '/raid/lingo/jen_ben/HF-RLHF/eval/test/gen_examples_idan_mini.json'
16
- output_file: str = 'bt_results_test_mini.json'
17
 
18
 
19
  ####################################
@@ -34,63 +34,63 @@ def load_rewards(file_path):
34
  return json.load(f)
35
 
36
 
37
- def bradley_terry_comparison(sft_rewards, kto_rewards):
38
  """
39
  Perform Bradley-Terry comparison between two sets of model generations.
40
 
41
  Args:
42
- sft_rewards (list): List of dictionaries for the SFT model's generations and rewards.
43
- kto_rewards (list): List of dictionaries for the KTO model's generations and rewards.
44
 
45
  Returns:
46
  list: Comparison results including preferred outputs and probabilities.
47
  dict: Metrics summary including percentage preferred and average probabilities.
48
  """
49
  results = []
50
- kto_preferred_count = 0
51
- sft_preferred_count = 0
52
  probabilities = []
53
 
54
- for ix in range(len(sft_rewards)):
55
- sft = sft_rewards[ix]
56
- kto = kto_rewards[ix]
57
 
58
  # Ensure prompts match
59
- assert sft['prompt'] == kto['prompt'], f"ERROR: Prompts at index {ix} do not match."
60
 
61
  # Compute Bradley-Terry probability
62
- kto_reward = torch.tensor(kto['reward'], dtype=torch.float32)
63
- sft_reward = torch.tensor(sft['reward'], dtype=torch.float32)
64
- prob_kto_preferred = torch.sigmoid(kto_reward - sft_reward).item()
65
 
66
- probabilities.append(prob_kto_preferred)
67
- preferred_model = 'kto' if prob_kto_preferred > 0.5 else 'sft'
68
 
69
  # Count preferences
70
- if preferred_model == 'kto':
71
- kto_preferred_count += 1
72
  else:
73
- sft_preferred_count += 1
74
 
75
  # Log results
76
  bt_result = {
77
- 'prompt': sft['prompt'],
78
- 'sft_output': sft['output'],
79
- 'kto_output': kto['output'],
80
- 'sft_reward': sft['reward'],
81
- 'kto_reward': kto['reward'],
82
  'preferred': preferred_model,
83
- 'prob_kto_preferred': prob_kto_preferred
84
  }
85
  results.append(bt_result)
86
 
87
  # Calculate metrics
88
- total_examples = len(sft_rewards)
89
  metrics = {
90
  'total_examples': total_examples,
91
- 'kto_preferred_percentage': 100 * kto_preferred_count / total_examples,
92
- 'sft_preferred_percentage': 100 * sft_preferred_count / total_examples,
93
- 'avg_probability_kto_preferred': sum(probabilities) / total_examples
94
  }
95
 
96
  return results, metrics
@@ -118,9 +118,9 @@ def print_metrics(metrics):
118
  """
119
  print("\nEVALUATION METRICS:")
120
  print(f"Total examples: {metrics['total_examples']}")
121
- print(f"Percentage preferred - KTO model: {metrics['kto_preferred_percentage']:.2f}%")
122
- print(f"Percentage preferred - SFT model: {metrics['sft_preferred_percentage']:.2f}%")
123
- print(f"Average probability of KTO model being preferred: {metrics['avg_probability_kto_preferred']:.4f}")
124
 
125
 
126
  ####################################
@@ -128,22 +128,17 @@ def print_metrics(metrics):
128
  ####################################
129
 
130
  def main():
131
- # Initialize script arguments
132
  args = ScriptArguments()
133
 
134
- # Load data
135
  print("Loading data...")
136
- sft_rewards = load_rewards(args.sft_generations_file)
137
- kto_rewards = load_rewards(args.kto_generations_file)
138
 
139
  # Perform Bradley-Terry comparison
140
  print("Performing Bradley-Terry comparison...")
141
- results, metrics = bradley_terry_comparison(sft_rewards, kto_rewards)
142
 
143
- # Save results
144
  save_results(results, args.output_file)
145
-
146
- # Print metrics
147
  print_metrics(metrics)
148
 
149
 
@@ -152,55 +147,3 @@ if __name__ == "__main__":
152
 
153
 
154
 
155
- # import json
156
- # import torch
157
-
158
- # output_file_path = 'bt_results.json'
159
- # ref_generations_rewards_file_path = 'ref_models_generations_reward_trl-libqwen1.5-1.8b-sft.json'
160
- # finetuned_generations_rewards_file_path = 'finetuned_models_generations_reward_trl-libqwen1.5-1.8b-sft.json'
161
-
162
- # # Open and read JSON files
163
- # with open(ref_generations_rewards_file_path, 'r') as f:
164
- # ref_rewards = json.load(f)
165
-
166
- # with open(finetuned_generations_rewards_file_path, 'r') as g:
167
- # finetuned_rewards = json.load(g)
168
-
169
- # # assert len(ref_rewards) != len(finetuned_rewards), 'ERROR: files are not with the same length.'
170
-
171
- # results = []
172
- # finetuned_preffered = 0
173
- # for ix in range(len(ref_rewards)):
174
- # ref = ref_rewards[ix]
175
- # finetuned = finetuned_rewards[ix]
176
- # assert ref['prompt'] == finetuned['prompt'], 'ERROR: ref and finetuned prompt are not the same.'
177
-
178
- # # Bradely Terry
179
- # finetuned_reward = torch.tensor(finetuned['reward'], dtype=torch.float32)
180
- # ref_reward = torch.tensor(ref['reward'], dtype=torch.float32)
181
- # prob_finetuned_preferred = torch.sigmoid(finetuned_reward - ref_reward)
182
-
183
-
184
- # if prob_finetuned_preferred > 0.5:
185
- # finetuned_preffered +=1
186
- # print(f'example {ix}: finetuned preffered')
187
- # else:
188
- # print(f'example {ix}: ref preffered')
189
-
190
- # # log results
191
- # bt_result = {}
192
- # bt_result['prompt'] = ref['prompt']
193
- # bt_result['ref_output'] = ref['output']
194
- # bt_result['finetuned_output'] = finetuned['output']
195
- # bt_result['ref_reward'] = ref['output']
196
- # bt_result['finetuned_reward'] = finetuned['output']
197
- # bt_result['preffered'] = 'finetuned' if prob_finetuned_preferred > 0.5 else 'ref'
198
- # results.append(bt_result)
199
-
200
-
201
- # # save results in json files
202
-
203
- # with open(output_file_path, "w") as f:
204
- # json.dump(results, f, indent=4)
205
-
206
- # print('BT EVALUATION COMPLETED.')
 
11
  """
12
  Arguments for the Bradley-Terry evaluation script.
13
  """
14
+ old_generations_file: str
15
+ new_generations_file: str
16
+ output_file: str = 'bt_results.json'
17
 
18
 
19
  ####################################
 
34
  return json.load(f)
35
 
36
 
37
+ def bradley_terry_comparison(old_rewards, new_rewards):
38
  """
39
  Perform Bradley-Terry comparison between two sets of model generations.
40
 
41
  Args:
42
+ old_rewards (list): List of dictionaries for the OLD model's generations and rewards.
43
+ new_rewards (list): List of dictionaries for the NEW model's generations and rewards.
44
 
45
  Returns:
46
  list: Comparison results including preferred outputs and probabilities.
47
  dict: Metrics summary including percentage preferred and average probabilities.
48
  """
49
  results = []
50
+ new_preferred_count = 0
51
+ old_preferred_count = 0
52
  probabilities = []
53
 
54
+ for ix in range(len(old_rewards)):
55
+ old = old_rewards[ix]
56
+ new = new_rewards[ix]
57
 
58
  # Ensure prompts match
59
+ assert old['prompt'] == new['prompt'], f"ERROR: Prompts at index {ix} do not match."
60
 
61
  # Compute Bradley-Terry probability
62
+ new_reward = torch.tensor(old['reward'], dtype=torch.float32)
63
+ old_reward = torch.tensor(new['reward'], dtype=torch.float32)
64
+ prob_new_preferred = torch.sigmoid(new_reward - old_reward).item()
65
 
66
+ probabilities.append(prob_new_preferred)
67
+ preferred_model = 'new' if prob_new_preferred > 0.5 else 'old'
68
 
69
  # Count preferences
70
+ if preferred_model == 'new':
71
+ new_preferred_count += 1
72
  else:
73
+ old_preferred_count += 1
74
 
75
  # Log results
76
  bt_result = {
77
+ 'prompt': old['prompt'],
78
+ 'old_output': old['output'],
79
+ 'new_output': new['output'],
80
+ 'old_reward': old['reward'],
81
+ 'new_reward': new['reward'],
82
  'preferred': preferred_model,
83
+ 'prob_new_preferred': prob_new_preferred
84
  }
85
  results.append(bt_result)
86
 
87
  # Calculate metrics
88
+ total_examples = len(old_rewards)
89
  metrics = {
90
  'total_examples': total_examples,
91
+ 'new_preferred_percentage': 100 * new_preferred_count / total_examples,
92
+ 'old_preferred_percentage': 100 * old_preferred_count / total_examples,
93
+ 'avg_probability_new_preferred': sum(probabilities) / total_examples
94
  }
95
 
96
  return results, metrics
 
118
  """
119
  print("\nEVALUATION METRICS:")
120
  print(f"Total examples: {metrics['total_examples']}")
121
+ print(f"Percentage preferred - KTO model: {metrics['new_preferred_percentage']:.2f}%")
122
+ print(f"Percentage preferred - SFT model: {metrics['old_preferred_percentage']:.2f}%")
123
+ print(f"Average probability of KTO model being preferred: {metrics['avg_probability_new_preferred']:.4f}")
124
 
125
 
126
  ####################################
 
128
  ####################################
129
 
130
  def main():
 
131
  args = ScriptArguments()
132
 
 
133
  print("Loading data...")
134
+ old_rewards = load_rewards(args.sft_generations_file)
135
+ new_rewards = load_rewards(args.kto_generations_file)
136
 
137
  # Perform Bradley-Terry comparison
138
  print("Performing Bradley-Terry comparison...")
139
+ results, metrics = bradley_terry_comparison(old_rewards, new_rewards)
140
 
 
141
  save_results(results, args.output_file)
 
 
142
  print_metrics(metrics)
143
 
144
 
 
147
 
148
 
149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ml/eval/evaluate.py DELETED
@@ -1,185 +0,0 @@
1
- import sys
2
- import os
3
- from typing import Any, Dict, List
4
-
5
- import torch
6
- import transformers
7
- from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSequenceClassification
8
- from accelerate import Accelerator
9
- from trl import KTOConfig, KTOTrainer, ModelConfig, get_peft_config, maybe_unpair_preference_dataset, setup_chat_format
10
- from tqdm import tqdm
11
-
12
- # Add script directory to system path for importing local modules
13
- SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
14
- sys.path.append(os.path.dirname(SCRIPT_DIR))
15
-
16
- from eval.utils import jload, jdump
17
- from eval.evaluate_arguments import EvalArguments
18
-
19
-
20
- # set `device` to "cuda" if a GPU is available. otherwise, defaults to CPU
21
- device = "cuda" if torch.cuda.is_available() else "cpu"
22
-
23
- def create_model():
24
- # loads a specified reward model and sets it to use the GPU ("cuda")
25
- # CHANGE FUNCTION DEPENDING OF THE MODEL YOU LOAD
26
- model = AutoModelForSequenceClassification.from_pretrained("Skywork/Skywork-Reward-Llama-3.1-8B-v0.2", torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", num_labels=1).to("cuda")
27
- return model
28
-
29
-
30
- def create_tokenizer():
31
- # loads the tokenizer that pairs with the model for encoding the text data
32
- tokenizer = AutoTokenizer.from_pretrained("Skywork/Skywork-Reward-Llama-3.1-8B-v0.2", use_auth_token=True)
33
- return tokenizer
34
-
35
-
36
- def MyAccelerator(mixed_precision):
37
- # wrap `Accelerator` to set up model handling with mixed-precision (to save memory)
38
- accelerator = Accelerator(mixed_precision=mixed_precision)
39
- return accelerator
40
-
41
-
42
- #####################################
43
- # Idan's script from here
44
- #####################################
45
-
46
-
47
- def main():
48
-
49
- # Parse evaluation arguments from `EvalArguments`
50
- parser = transformers.HfArgumentParser((EvalArguments, ))
51
- args, = parser.parse_args_into_dataclasses()
52
-
53
- # set `mixed_precision` based on `args.bfloat16` (if true use bf16, otherwise fp16)
54
- mixed_precision = 'bf16' if args.bfloat16 else 'fp16'
55
- args.mixed_precision = mixed_precision
56
-
57
- # initialize `MyAccelerator` with the chosen mixed precision setting
58
- accelerator = MyAccelerator(
59
- mixed_precision=mixed_precision,
60
- )
61
-
62
-
63
- # load model and tokenizer
64
- model = create_model()
65
- if 't5' not in args.model_name_or_path:
66
- # t5 models where trained with fp32
67
- model = accelerator.prepare(model)
68
- model.eval()
69
-
70
- tokenizer = create_tokenizer()
71
-
72
- print("Output file path:", args.output_filepath)
73
-
74
- # load LM generations data from `args.output_filepath` + handles cases where it’s a single file or directory.
75
- filenames = []
76
- eval_data_list_dict = []
77
- if os.path.isfile(args.output_filepath):
78
- print(f'Loading data from {args.output_filepath}...')
79
- eval_data_list_dict.append(jload(args.output_filepath))
80
- filenames.append(args.output_filepath)
81
- elif os.path.isdir(args.output_filepath):
82
- print(f'Loading data from {args.output_filepath}...')
83
- for filename in os.listdir(args.output_filepath):
84
- if filename.endswith('.json'):
85
- print(f'Loaded file {filename}')
86
- eval_data_list_dict.append(jload(os.path.join(args.output_filepath, filename)))
87
- filenames.append(os.path.join(args.output_filepath, filename))
88
- else:
89
- raise Exception('Output file(s) not found!')
90
-
91
-
92
- # process each file and call `evaluate_data()` to calculate reward scores
93
- for filename, eval_data_dict in zip(filenames, eval_data_list_dict):
94
- eval_data = evaluate_data(args, model, tokenizer, eval_data_dict)
95
-
96
- if args.result_filename is None:
97
- path_to_result = os.path.basename(filename).split('.json')[0] + f"_reward_{args.model_name_or_path.replace('/', '')}.json"
98
- else:
99
- path_to_result = args.result_filename
100
-
101
- print(f'Saving results to file {path_to_result}...')
102
- jdump(eval_data, path_to_result)
103
-
104
-
105
- def get_reward_output_fn(reward_output_fmt: str, apply_sigmoid_to_reward: bool):
106
- # defines the reward output function format based on `reward_output_fmt`
107
- if reward_output_fmt is None:
108
- reward_output_fn = lambda x: x.squeeze().cpu().detach().numpy().tolist()
109
- elif reward_output_fmt == '0':
110
- reward_output_fn = lambda x: x.squeeze().cpu().detach().softmax(dim=-1).numpy()[0].tolist()
111
- elif reward_output_fmt == '1':
112
- reward_output_fn = lambda x: x.squeeze().cpu().detach().softmax(dim=-1).numpy()[1].tolist()
113
- elif reward_output_fmt == '1-0':
114
- reward_output_fn = lambda x: (x.squeeze().cpu().detach().softmax(dim=-1).numpy()[1] - x.squeeze().cpu().detach().softmax(dim=-1).numpy()[0]).tolist()
115
- else:
116
- raise NotImplementedError(f'Unsupported reward output format: {reward_output_fmt}')
117
-
118
- # Apply sigmoid transformation if `apply_sigmoid_to_reward` is true
119
- if apply_sigmoid_to_reward:
120
- reward_output_fn = lambda x: torch.sigmoid(torch.tensor(x)).numpy().tolist()
121
-
122
- return reward_output_fn
123
-
124
-
125
- @torch.inference_mode()
126
- def evaluate_data(args: EvalArguments, model, tokenizer, eval_data_list_dict) -> List[Dict[str, Any]]:
127
- """Given a generated dataset, evaluate it using the reward model
128
-
129
- args: argparse.Namespace, the arguments to use
130
- reward_model: reward_model_module.RewardModel, the reward model to use
131
- eval_data_list_dict: List[Dict[str, Any]], the generated data to evaluate
132
- """
133
-
134
- pbar = tqdm(total=len(eval_data_list_dict), desc="eval")
135
- rewards_list = []
136
- reward_output_fn = get_reward_output_fn(args.reward_output_fmt, args.apply_sigmoid_to_reward)
137
-
138
- print('Evaluating reward scores...')
139
-
140
- # Split `eval_data_list_dict` into batches for processing
141
- for idx in range(0, len(eval_data_list_dict), args.per_device_batch_size):
142
- if len(eval_data_list_dict) > (idx + args.per_device_batch_size):
143
- batch_list_dict = eval_data_list_dict[idx:idx+args.per_device_batch_size]
144
- else:
145
- batch_list_dict = eval_data_list_dict[idx:]
146
-
147
- # create formatted text from prompts and outputs for tokenization
148
- if 'prompt' in batch_list_dict[0]:
149
- batch_full_outputs = [l['prompt'] + ' ' + l['output'] for l in batch_list_dict]
150
- else:
151
- print('Overriding with custom prompt format')
152
- prompt_fmt = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response: {output}"
153
- for l in batch_list_dict:
154
- l['output'] = l['output'].split('.')[0] + '.'
155
- batch_full_outputs = [prompt_fmt.format_map(l) for l in batch_list_dict]
156
-
157
- # tokenize and send the batched text to the model’s device
158
- encoded_full_responses = tokenizer(batch_full_outputs, return_tensors="pt", padding=True, truncation=True)
159
- encoded_full_responses = encoded_full_responses.to(model.device) # i added this
160
-
161
- # generate reward scores and stores them in `rewards_list`
162
- reward_outputs = model(**encoded_full_responses)
163
- rewards = reward_output_fn(reward_outputs.logits)
164
- rewards_list.extend(rewards if isinstance(rewards, list) else [rewards])
165
-
166
- # update progress bar after each batch is processed
167
- pbar.update(len(batch_list_dict))
168
-
169
- print('Combining reward outputs into outputs...')
170
-
171
- # add calculated rewards to each item in `eval_data_list_dict`
172
- for j in range(len(eval_data_list_dict)):
173
- eval_data_list_dict[j]['reward'] = rewards_list[j]
174
- eval_data_list_dict[j]['reward_model'] = args.model_name_or_path + args.model_pretrained_lora_weights if args.model_pretrained_lora_weights is not None else args.model_name_or_path
175
-
176
- print('Finished evaluating reward scores!')
177
-
178
- print('Mean reward score: ', sum(rewards_list) / len(rewards_list))
179
- print('Std reward score: ', torch.tensor(rewards_list).std().item())
180
-
181
- return eval_data_list_dict
182
-
183
-
184
- if __name__ == '__main__':
185
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ml/eval/evaluate_arguments.py CHANGED
@@ -3,7 +3,7 @@ from dataclasses import dataclass, field
3
  @dataclass
4
  class EvalArguments:
5
  model_name_or_path: str = field(
6
- default="mistralai/Mistral-7B-v0.1", metadata={"help": "Name to a huggingface native pretrained model or path to a model on disk."})
7
  model_pretrained_lora_weights: str = field(
8
  default=None, metadata={"help": "Path to a checkpoint directory."})
9
  output_filepath: str = field(
 
3
  @dataclass
4
  class EvalArguments:
5
  model_name_or_path: str = field(
6
+ default="CohereForAI/aya-expanse-8b", metadata={"help": "Name to a huggingface native pretrained model or path to a model on disk."})
7
  model_pretrained_lora_weights: str = field(
8
  default=None, metadata={"help": "Path to a checkpoint directory."})
9
  output_filepath: str = field(
ml/eval/evaluation_pipeline.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ###########
2
+ # IMPORTS #
3
+ ###########
4
+ from reward_eval import process_evaluation
5
+ from generate import generate_files
6
+ from alpaca import alpaca_evaluator, judge_responses
7
+ from bt import bradley_terry_comparison, load_rewards
8
+ from evaluate_arguments import EvalArguments
9
+ import pandas as pd
10
+ import numpy as np
11
+
12
+ #############
13
+ # EVALUATOR #
14
+ #############
15
+ '''
16
+ Evaluation Pipeline
17
+
18
+ Parameters:
19
+ eval_dataset: list of dictionaries that contain the prompt and response in the same form as below:
20
+ [{"prompt": "How are you?", "output": "I'm doing great!"}, {"prompt": "What's your name?", "output": "Assistant"}]
21
+ reward_output_filepath: string (must end in .json) that represents the path of the output of the reward score evaluation
22
+ model: base model that is being evaluated (defaults to starter base model - Aya-23-8B )
23
+ all_responses: should be a path to a csv file that has all the model's responses and their corresponding prompts with the following
24
+ format: response1 --> col 1, response2 --> col 2, prompt --> col 3
25
+
26
+ language: which language is being used for this model (needs to be a valid FeeLLanguage object once FeeLLanguage class is updated)
27
+ '''
28
+ def evaluator_master_fn(eval_dataset: list[dict],
29
+ reward_output_filepath: str,
30
+ all_responses: str,
31
+ language: str,
32
+ new_model,
33
+ old_model="CohereForAI/aya-expanse-8b"):
34
+ # language is string for now, will be an object later with FeeLLanguage class definition with specific lanugage
35
+ # functionalities (will also store latest model and be much easier to handle such functions)
36
+
37
+ # 1. Reward score evaluation:
38
+ args = EvalArguments(bfloat16=True,
39
+ reward_output_fmt='1-0',
40
+ apply_sigmoid_to_reward=False,
41
+ per_device_batch_size=8,
42
+ output_filepath="new_evaluation",
43
+ result_filename=None,
44
+ model_name_or_path=new_model)
45
+ reward_score_result = process_evaluation(args, model_name=new_model, eval_data_list_dict=eval_dataset)
46
+
47
+ # 2. Alpaca Eval - Judging Responses
48
+ judge_df = pd.read_csv(all_responses)
49
+ judge_df["winner"] = judge_df.apply(lambda r: judge_responses(r["response1"], r["response2"], r["prompt"]), axis = 1) # axis = 1 -- loops rows
50
+
51
+ # 3. Alpaca Eval - model comparison
52
+ alpaca_results = alpaca_evaluator(new_model, num_samples=200) # can adjust num_samples as needed, potentially based on language
53
+
54
+ # 4. Bradley Terry Evaluation
55
+ bt_results = bradley_terry_comparison(load_rewards(old_model), load_rewards(new_model))
56
+
57
+ return reward_score_result, judge_df, alpaca_results, bt_results
58
+
ml/eval/generate.py CHANGED
@@ -1,6 +1,4 @@
1
- import torch
2
  from dataclasses import dataclass
3
- from accelerate import PartialState
4
  from datasets import load_dataset
5
  from transformers import AutoModelForCausalLM, AutoTokenizer
6
  from trl import ModelConfig, maybe_unpair_preference_dataset, setup_chat_format
@@ -8,15 +6,10 @@ from tqdm import tqdm
8
  import json
9
  import os
10
  import sys
11
- from pdb import set_trace as st
12
-
13
 
14
  SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
15
  sys.path.append(os.path.dirname(SCRIPT_DIR))
16
 
17
- from dataloaders.data_loader import get_oasst
18
-
19
-
20
  ####################################
21
  # CONFIGURATION
22
  ####################################
@@ -96,7 +89,7 @@ def load_oasst_test_dataset():
96
  """Load and prepare the dataset."""
97
 
98
  # Load oasst test dataset
99
- test_dataset = get_oasst(split='test')
100
  return test_dataset
101
 
102
 
@@ -141,7 +134,7 @@ def save_results(results, output_file):
141
  # MAIN SCRIPT
142
  ####################################
143
 
144
- def main():
145
  # Load model and tokenizer
146
  print("Loading kto fine-tuned model...")
147
  kto_model, kto_tokenizer = load_model_and_tokenizer(script_args.kto_model_path, use_auth_token=True)
@@ -166,4 +159,4 @@ def main():
166
 
167
 
168
  if __name__ == "__main__":
169
- main()
 
 
1
  from dataclasses import dataclass
 
2
  from datasets import load_dataset
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  from trl import ModelConfig, maybe_unpair_preference_dataset, setup_chat_format
 
6
  import json
7
  import os
8
  import sys
 
 
9
 
10
  SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
11
  sys.path.append(os.path.dirname(SCRIPT_DIR))
12
 
 
 
 
13
  ####################################
14
  # CONFIGURATION
15
  ####################################
 
89
  """Load and prepare the dataset."""
90
 
91
  # Load oasst test dataset
92
+ test_dataset = load_dataset(split='test')
93
  return test_dataset
94
 
95
 
 
134
  # MAIN SCRIPT
135
  ####################################
136
 
137
+ def generate_files():
138
  # Load model and tokenizer
139
  print("Loading kto fine-tuned model...")
140
  kto_model, kto_tokenizer = load_model_and_tokenizer(script_args.kto_model_path, use_auth_token=True)
 
159
 
160
 
161
  if __name__ == "__main__":
162
+ generate_files()
ml/eval/generate_sanity_check.py CHANGED
@@ -45,7 +45,7 @@ ref_model = AutoModelForCausalLM.from_pretrained(
45
  ).to("cuda")
46
  print(f'loaded reference model')
47
 
48
- # load a tokenaizer
49
  ref_tokenizer = AutoTokenizer.from_pretrained(
50
  ref_model_args.model_name_or_path, trust_remote_code=ref_model_args.trust_remote_code
51
  )
 
45
  ).to("cuda")
46
  print(f'loaded reference model')
47
 
48
+ # load a tokenizer
49
  ref_tokenizer = AutoTokenizer.from_pretrained(
50
  ref_model_args.model_name_or_path, trust_remote_code=ref_model_args.trust_remote_code
51
  )
ml/eval/reward_eval.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ from typing import Any, Dict, List
4
+ import json
5
+ import torch
6
+ from transformers import AutoModelForCausalLM, AutoTokenizer, CohereConfig, AutoModel
7
+ from accelerate import Accelerator
8
+ from tqdm import tqdm
9
+
10
+ # Add script directory to system path for importing local modules
11
+ SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
12
+ sys.path.append(os.path.dirname(SCRIPT_DIR))
13
+
14
+ from eval.utils import jload, jdump
15
+ from eval.evaluate_arguments import EvalArguments
16
+
17
+
18
+ # set `device` to "cuda" if a GPU is available. otherwise, defaults to CPU
19
+ device = "cuda" if torch.cuda.is_available() else "cpu"
20
+
21
+ def create_model(model_name: str):
22
+ """
23
+ loads pre-trained reward model and moves it onto device
24
+ """
25
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", num_labels=1).to("cuda")
26
+ return model
27
+
28
+
29
+ def create_tokenizer(model_name):
30
+ # loads the tokenizer that pairs with the model for encoding the text data
31
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
32
+ return tokenizer
33
+
34
+
35
+ def MyAccelerator(mixed_precision: str):
36
+ """
37
+ accelerator initialization (wrapper) for handling mixed precision
38
+ """
39
+ return Accelerator(mixed_precision=mixed_precision)
40
+
41
+ def get_reward_output_fn(reward_output_format: str, sigmoid: bool):
42
+ def default(x):
43
+ return x.squeeze().cpu().detach().numpy().tolist()
44
+ reward_fn_map = {
45
+ '0': lambda x: x.squeeze().cpu().detach().softmax(dim=-1).numpy()[0].tolist(),
46
+ '1': lambda x: x.squeeze().cpu().detach().softmax(dim=-1).numpy()[1].tolist(),
47
+ '1-0': lambda x: (x.squeeze().cpu().detach().softmax(dim=-1).numpy()[1] - x.squeeze().cpu().detach().softmax(dim=-1).numpy()[0]).tolist()
48
+ }
49
+ reward_output_fn = reward_fn_map.get(reward_output_format, default)
50
+ if sigmoid:
51
+ return lambda x: torch.sigmoid(torch.tensor(x)).numpy().tolist()
52
+ return reward_output_fn
53
+
54
+ def evaluate_data(args, model, tokenizer, eval_data_list_dict) -> List[Dict[str, Any]]:
55
+ """
56
+ Evaluate the dataset using the reward model.
57
+ """
58
+ reward_output_fn = get_reward_output_fn(args.reward_output_fmt, args.apply_sigmoid_to_reward)
59
+ pbar = tqdm(total=len(eval_data_list_dict), desc="Evaluating Rewards")
60
+ rewards_list = []
61
+
62
+ for idx in range(0, len(eval_data_list_dict), args.per_device_batch_size):
63
+ batch_list_dict = eval_data_list_dict[idx:idx+args.per_device_batch_size]
64
+
65
+ # Create prompt-response pairs
66
+ batch_full_outputs = [
67
+ f"{l['prompt']} {l['output']}" for l in batch_list_dict
68
+ ] if 'prompt' in batch_list_dict[0] else [f"Below is an instruction: {l['instruction']} Response: {l['output']}" for l in batch_list_dict]
69
+
70
+ # Tokenize reponse and send to device
71
+ encoded_full_responses = tokenizer(batch_full_outputs, return_tensors="pt", padding=True, truncation=True)
72
+ encoded_full_responses = encoded_full_responses.to(model.device)
73
+
74
+ # Generate rewards
75
+ with torch.inference_mode():
76
+ reward_outputs = model(**encoded_full_responses)
77
+ rewards = reward_output_fn(reward_outputs.logits)
78
+ rewards_list.extend(rewards)
79
+
80
+ pbar.update(len(batch_list_dict))
81
+
82
+ # Adding reward scores to original data
83
+ for i, data in enumerate(eval_data_list_dict):
84
+ data['reward'] = rewards_list[i]
85
+
86
+ return eval_data_list_dict
87
+
88
+ def process_evaluation(args, model_name: str, eval_data_list_dict) -> List[Dict[str, Any]]:
89
+ """
90
+ Main function for processing evaluation, takes model name as input.
91
+ """
92
+ # mixed_precision = 'bf16' if args.bfloat16 else 'fp16'
93
+
94
+ # Initialize accelerator and model
95
+ # accelerator = MyAccelerator(mixed_precision)
96
+ model = create_model(model_name)
97
+ tokenizer = create_tokenizer(model_name)
98
+
99
+ model.eval()
100
+
101
+ eval_data = evaluate_data(args, model, tokenizer, eval_data_list_dict)
102
+
103
+ result_filename = args.result_filename or f"{os.path.basename(args.output_filepath).split('.')[0]}_reward_results.json"
104
+ with open(result_filename, "w") as f:
105
+ json.dump(eval_data, f)
106
+
107
+ return eval_data
108
+
109
+
110
+ # ONLY FOR TESTING:
111
+ if __name__ == '__main__':
112
+ args = EvalArguments(bfloat16=True,
113
+ reward_output_fmt='1-0',
114
+ apply_sigmoid_to_reward=False,
115
+ per_device_batch_size=8,
116
+ output_filepath= '/path/to/your/data.json',
117
+ result_filename=None,
118
+ model_name_or_path="CohereForAI/aya-expanse-8b")
119
+
120
+
121
+ eval_data_list_dict = [{"prompt": "How are you?", "output": "I'm doing great!"}, {"prompt": "What's your name?", "output": "Assistant"}]
122
+
123
+ process_evaluation(args, model_name="CohereForAI/aya-expanse-8b", eval_data_list_dict=eval_data_list_dict)
ml/feel.yaml ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: feel
2
+ channels:
3
+ - conda-forge
4
+ - bioconda
5
+ - defaults
6
+ dependencies:
7
+ - accelerate=1.4.0=pyhd8ed1ab_0
8
+ - aiohappyeyeballs=2.4.6=pyhd8ed1ab_0
9
+ - aiohttp=3.11.13=py313ha9b7d5b_0
10
+ - aiosignal=1.3.2=pyhd8ed1ab_0
11
+ - annotated-types=0.7.0=pyhd8ed1ab_1
12
+ - anyio=4.8.0=pyhd8ed1ab_0
13
+ - attrs=25.1.0=pyh71513ae_0
14
+ - aws-c-auth=0.8.1=hfc2798a_0
15
+ - aws-c-cal=0.8.1=hc8a0bd2_3
16
+ - aws-c-common=0.10.6=h5505292_0
17
+ - aws-c-compression=0.3.0=hc8a0bd2_5
18
+ - aws-c-event-stream=0.5.0=h54f970a_11
19
+ - aws-c-http=0.9.2=h96aa502_4
20
+ - aws-c-io=0.15.3=haba67d1_6
21
+ - aws-c-mqtt=0.11.0=h24f418c_12
22
+ - aws-c-s3=0.7.9=hf37e03c_1
23
+ - aws-c-sdkutils=0.2.2=hc8a0bd2_0
24
+ - aws-checksums=0.2.2=hc8a0bd2_4
25
+ - aws-crt-cpp=0.29.9=ha81f72f_2
26
+ - aws-sdk-cpp=1.11.489=h0e5014b_0
27
+ - azure-core-cpp=1.14.0=hd50102c_0
28
+ - azure-identity-cpp=1.10.0=hc602bab_0
29
+ - azure-storage-blobs-cpp=12.13.0=h7585a09_1
30
+ - azure-storage-common-cpp=12.8.0=h9ca1f76_1
31
+ - azure-storage-files-datalake-cpp=12.12.0=hcdd55da_1
32
+ - brotli-python=1.1.0=py313h3579c5c_2
33
+ - bzip2=1.0.8=h99b78c6_7
34
+ - c-ares=1.34.4=h5505292_0
35
+ - ca-certificates=2025.1.31=hf0a4a13_0
36
+ - certifi=2025.1.31=pyhd8ed1ab_0
37
+ - charset-normalizer=3.4.1=pyhd8ed1ab_0
38
+ - colorama=0.4.6=pyhd8ed1ab_1
39
+ - cpython=3.13.2=py313hd8ed1ab_101
40
+ - datasets=3.3.2=pyhd8ed1ab_0
41
+ - dill=0.3.8=pyhd8ed1ab_0
42
+ - distro=1.9.0=pyhd8ed1ab_1
43
+ - docstring_parser=0.16=pyhd8ed1ab_0
44
+ - eval_type_backport=0.2.2=pyha770c72_0
45
+ - exceptiongroup=1.2.2=pyhd8ed1ab_1
46
+ - filelock=3.17.0=pyhd8ed1ab_0
47
+ - frozendict=2.4.6=py313h63a2874_0
48
+ - frozenlist=1.5.0=py313ha9b7d5b_1
49
+ - fsspec=2024.12.0=pyhd8ed1ab_0
50
+ - gflags=2.2.2=hf9b8971_1005
51
+ - glog=0.7.1=heb240a5_0
52
+ - gmp=6.3.0=h7bae524_2
53
+ - gmpy2=2.1.5=py313h2cdc120_3
54
+ - h11=0.14.0=pyhd8ed1ab_1
55
+ - h2=4.2.0=pyhd8ed1ab_0
56
+ - hpack=4.1.0=pyhd8ed1ab_0
57
+ - httpcore=1.0.7=pyh29332c3_1
58
+ - httpx=0.28.1=pyhd8ed1ab_0
59
+ - huggingface_hub=0.29.1=pyhd8ed1ab_0
60
+ - hyperframe=6.1.0=pyhd8ed1ab_0
61
+ - idna=3.10=pyhd8ed1ab_1
62
+ - jinja2=3.1.5=pyhd8ed1ab_0
63
+ - jiter=0.8.2=py313hdde674f_0
64
+ - krb5=1.21.3=h237132a_0
65
+ - libabseil=20240722.0=cxx17_h07bc746_4
66
+ - libarrow=19.0.1=h0945df6_0_cpu
67
+ - libarrow-acero=19.0.1=hf07054f_0_cpu
68
+ - libarrow-dataset=19.0.1=hf07054f_0_cpu
69
+ - libarrow-substrait=19.0.1=h4239455_0_cpu
70
+ - libblas=3.9.0=31_h10e41b3_openblas
71
+ - libbrotlicommon=1.1.0=hd74edd7_2
72
+ - libbrotlidec=1.1.0=hd74edd7_2
73
+ - libbrotlienc=1.1.0=hd74edd7_2
74
+ - libcblas=3.9.0=31_hb3479ef_openblas
75
+ - libcrc32c=1.1.2=hbdafb3b_0
76
+ - libcurl=8.12.1=h73640d1_0
77
+ - libcxx=19.1.7=ha82da77_0
78
+ - libedit=3.1.20250104=pl5321hafb1f1b_0
79
+ - libev=4.33=h93a5062_2
80
+ - libevent=2.1.12=h2757513_1
81
+ - libexpat=2.6.4=h286801f_0
82
+ - libffi=3.4.2=h3422bc3_5
83
+ - libgfortran=5.0.0=13_2_0_hd922786_3
84
+ - libgfortran5=13.2.0=hf226fd6_3
85
+ - libgoogle-cloud=2.35.0=hdbe95d5_0
86
+ - libgoogle-cloud-storage=2.35.0=h7081f7f_0
87
+ - libgrpc=1.67.1=h0a426d6_2
88
+ - libiconv=1.18=hfe07756_1
89
+ - liblapack=3.9.0=31_hc9a63f6_openblas
90
+ - liblzma=5.6.4=h39f12f2_0
91
+ - libmpdec=4.0.0=h99b78c6_0
92
+ - libnghttp2=1.64.0=h6d7220d_0
93
+ - libopenblas=0.3.29=openmp_hf332438_0
94
+ - libopentelemetry-cpp=1.18.0=h0c05b2d_1
95
+ - libopentelemetry-cpp-headers=1.18.0=hce30654_1
96
+ - libparquet=19.0.1=h636d7b7_0_cpu
97
+ - libprotobuf=5.28.3=h3bd63a1_1
98
+ - libre2-11=2024.07.02=h07bc746_2
99
+ - libsqlite=3.49.1=h3f77e49_1
100
+ - libssh2=1.11.1=h9cc3647_0
101
+ - libthrift=0.21.0=h64651cc_0
102
+ - libtorch=2.6.0=cpu_generic_h6adcabc_0
103
+ - libutf8proc=2.10.0=hda25de7_0
104
+ - libuv=1.50.0=h5505292_0
105
+ - libxml2=2.13.6=hce475f1_0
106
+ - libzlib=1.3.1=h8359307_2
107
+ - llvm-openmp=19.1.7=hdb05f8b_0
108
+ - lz4-c=1.10.0=h286801f_1
109
+ - markdown-it-py=3.0.0=pyhd8ed1ab_1
110
+ - markupsafe=3.0.2=py313ha9b7d5b_1
111
+ - mdurl=0.1.2=pyhd8ed1ab_1
112
+ - mpc=1.3.1=h8f1351a_1
113
+ - mpfr=4.2.1=hb693164_3
114
+ - mpmath=1.3.0=pyhd8ed1ab_1
115
+ - multidict=6.1.0=py313h6347b5a_1
116
+ - multiprocess=0.70.16=py313h20a7fcf_1
117
+ - ncurses=6.5=h5e97a16_3
118
+ - networkx=3.4.2=pyh267e887_2
119
+ - nlohmann_json=3.11.3=h00cdb27_1
120
+ - nomkl=1.0=h5ca1d4c_0
121
+ - numpy=2.2.3=py313h41a2e72_0
122
+ - openai=1.65.2=pyhd8ed1ab_0
123
+ - openssl=3.4.1=h81ee809_0
124
+ - optree=0.14.1=py313h0ebd0e5_0
125
+ - orc=2.0.3=h0ff2369_2
126
+ - packaging=24.2=pyhd8ed1ab_2
127
+ - pandas=2.2.3=py313h47b39a6_1
128
+ - pip=25.0.1=pyh145f28c_0
129
+ - prometheus-cpp=1.3.0=h0967b3e_0
130
+ - propcache=0.2.1=py313ha9b7d5b_1
131
+ - psutil=7.0.0=py313h90d716c_0
132
+ - pyarrow=19.0.1=py313h39782a4_0
133
+ - pyarrow-core=19.0.1=py313hf9431ad_0_cpu
134
+ - pybind11=2.13.6=pyh1ec8472_2
135
+ - pybind11-global=2.13.6=pyh415d2e4_2
136
+ - pydantic=2.10.6=pyh3cfb1c2_0
137
+ - pydantic-core=2.27.2=py313hdde674f_0
138
+ - pygments=2.19.1=pyhd8ed1ab_0
139
+ - pysocks=1.7.1=pyha55dd90_7
140
+ - python=3.13.2=h81fe080_101_cp313
141
+ - python-dateutil=2.9.0.post0=pyhff2d567_1
142
+ - python-tzdata=2025.1=pyhd8ed1ab_0
143
+ - python-xxhash=3.5.0=py313h90d716c_2
144
+ - python_abi=3.13=5_cp313
145
+ - pytorch=2.6.0=cpu_generic_py313_h2e75435_0
146
+ - pytz=2024.1=pyhd8ed1ab_0
147
+ - pyyaml=6.0.2=py313ha9b7d5b_2
148
+ - re2=2024.07.02=h6589ca4_2
149
+ - readline=8.2=h1d1bf99_2
150
+ - regex=2024.11.6=py313h90d716c_0
151
+ - requests=2.32.3=pyhd8ed1ab_1
152
+ - rich=13.9.4=pyhd8ed1ab_1
153
+ - safetensors=0.5.3=py313hdde674f_0
154
+ - setuptools=75.8.2=pyhff2d567_0
155
+ - shtab=1.7.1=pyhd8ed1ab_1
156
+ - six=1.17.0=pyhd8ed1ab_0
157
+ - sleef=3.8=h8391f65_0
158
+ - snappy=1.2.1=h98b9ce2_1
159
+ - sniffio=1.3.1=pyhd8ed1ab_1
160
+ - sympy=1.13.3=pyh2585a3b_105
161
+ - tk=8.6.13=h5083fa2_1
162
+ - tokenizers=0.21.0=py313h9a4dfeb_0
163
+ - tqdm=4.67.1=pyhd8ed1ab_1
164
+ - transformers=4.49.0=pyhd8ed1ab_0
165
+ - trl=0.15.2=pyhd8ed1ab_0
166
+ - typing-extensions=4.12.2=hd8ed1ab_1
167
+ - typing_extensions=4.12.2=pyha770c72_1
168
+ - tyro=0.9.1=pyhff2d567_0
169
+ - tzdata=2025a=h78e105d_0
170
+ - urllib3=2.2.2=pyhd8ed1ab_0
171
+ - xxhash=0.8.3=h5505292_0
172
+ - yaml=0.2.5=h3422bc3_2
173
+ - yarl=1.18.3=py313ha9b7d5b_1
174
+ - zlib=1.3.1=h8359307_2
175
+ - zstd=1.5.7=h6491c7d_1
176
+ prefix: /opt/anaconda3/envs/hf-rlhf