File size: 5,735 Bytes
ed96846 598efb2 f2b34d3 8322cab f2b34d3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
---
library_name: transformers
tags: []
---
training script: https://gist.github.com/notlober/9bf4c3ab6ddeb12ec669ca495653708a
inference code:
```python
from transformers import AutoModelForCausalLM, AutoTokenizer
#####
max_new_toks = 2048
N_BEAMS = 5
#####
def do_instruct(prompt):
return f"A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first does reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think> <answer> answer here </answer>. User: {prompt} Assistant:"
def generate_output_once(prompt):
message = [
{"role": "user", "content": do_instruct(prompt)}
]
text = tokenizer.apply_chat_template(
message,
tokenize=False,
add_generation_prompt=True
)
model_inputs = tokenizer(text, return_tensors="pt").input_ids.to(model.device)
generated_ids = model.generate(
model_inputs,
max_new_tokens=max_new_toks,
repetition_penalty=1.2,
num_beam_groups=N_BEAMS,
num_beams=N_BEAMS,
diversity_penalty=0.5,
early_stopping=True,
do_sample=False # do not set to True if you get a warning, skip it
)
return tokenizer.decode(generated_ids[0, model_inputs.shape[1]:], skip_special_tokens=True)
def test_gen(prompt):
answer_str = generate_output_once(prompt)
print(f"Answer: {answer_str}")
#####
model = AutoModelForCausalLM.from_pretrained(
"notbdq/gemma-grpo",
torch_dtype="auto",
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-14B-Instruct-1M")
#####
test_gen("...") # put your prompt here
```
benchmarks: its definitely better than qwen 14b 1m, but i have only tested for 15 samples of aime validation set and it was doing better than
qwen 2.5 1m since first sample but there are 75 samples more so i am sharing the script so someone can benchmark it if wants:
```python
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
#####
max_new_toks = 2048
N_BEAMS = 5
#####
def do_instruct(prompt):
return f"A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first does reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think> <answer> answer here </answer>. User: {prompt} Assistant:"
def generate_output_once_grpo(model, prompt):
message = [
{"role": "user", "content": do_instruct(prompt)}
]
text = tokenizer.apply_chat_template(
message,
tokenize=False,
add_generation_prompt=True
)
model_inputs = tokenizer(text, return_tensors="pt").input_ids.to(model.device)
generated_ids = model.generate(
model_inputs,
max_new_tokens=max_new_toks,
repetition_penalty=1.2,
num_beam_groups=N_BEAMS,
num_beams=N_BEAMS,
diversity_penalty=0.5,
early_stopping=True,
do_sample=False # do not set to True if you get a warning, skip it
)
return tokenizer.decode(generated_ids[0, model_inputs.shape[1]:], skip_special_tokens=True)
def generate_output_once(model, prompt):
message = [
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(
message,
tokenize=False,
add_generation_prompt=True
)
model_inputs = tokenizer(text, return_tensors="pt").input_ids.to(model.device)
generated_ids = model.generate(
model_inputs,
max_new_tokens=max_new_toks
)
return tokenizer.decode(generated_ids[0, model_inputs.shape[1]:], skip_special_tokens=True)
def check_model_contain_output(model_output, ground_t_output):
if ground_t_output in model_output:
return True
return False
def extract_answer(text):
try: return text.split("<answer>")[1].split("</answer>")[0]
except: return None
def do_eval(debug):
total_iters = len(eval_dataset)
wins_reasoning = 0
wins_qwen = 0
for l in range(len(eval_dataset)):
row = eval_dataset[l]
problem = row["problem"]
ground_truth = row["answer"]
response = generate_output_once_grpo(model, problem)
response_qwen = generate_output_once(model_qwen, problem)
reward = check_model_contain_output(response, ground_truth)
reward_qwen = check_model_contain_output(response_qwen, ground_truth)
if reward: wins_reasoning += 1
if reward_qwen: wins_qwen += 1
print(f"reasoning model: %{wins_reasoning / total_iters}")
print(f"qwen model: %{wins_qwen / total_iters}")
if debug:
print("qwen:", response_qwen)
print("reasoning fine tuned:", response)
#####
model = AutoModelForCausalLM.from_pretrained(
"notbdq/gemma-grpo",
torch_dtype="auto",
device_map="auto"
)
model_qwen = AutoModelForCausalLM.from_pretrained(
"Qwen/Qwen2.5-14B-Instruct-1M",
torch_dtype="auto",
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-14B-Instruct-1M")
eval_dataset = load_dataset("AI-MO/aimo-validation-aime", split="train")
#####
do_eval(debug=False)
```
technique: GRPO applied to qwen 14b 1m with Numina cot dataset
hardware: 8xmi300x for like 64 steps
current issues: 1. infinite generation when hit a hard problem
2. growing sequence length when training
author: baki
contact: https://x.com/bakiv11771441 |