This is a Remake, refined and better version of the KingNish Reasoning model.


pip install -U bitsandbytes

pip install -U transformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM

MAX_REASONING_TOKENS = 1024
MAX_RESPONSE_TOKENS = 512

model = AutoModelForCausalLM.from_pretrained("Guilherme34/Reasoning-2.6", token="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")

tokenizer = AutoTokenizer.from_pretrained("Guilherme34/Reasoning-2.6")

prompt = "hey, how are you?"
messages = [
    {"role": "user", "content": prompt}
]

# Generate reasoning
reasoning_template = tokenizer.apply_chat_template(messages, tokenize=False, add_reasoning_prompt=True)
reasoning_inputs = tokenizer(reasoning_template, return_tensors="pt").to(model.device)
reasoning_ids = model.generate(**reasoning_inputs, max_new_tokens=MAX_REASONING_TOKENS)
reasoning_output = tokenizer.decode(reasoning_ids[0, reasoning_inputs.input_ids.shape[1]:], skip_special_tokens=True)

# print("REASONING: " + reasoning_output)

# Generate answer
messages.append({"role": "reasoning", "content": reasoning_output})
response_template = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
response_inputs = tokenizer(response_template, return_tensors="pt").to(model.device)
response_ids = model.generate(**response_inputs, max_new_tokens=MAX_RESPONSE_TOKENS)
response_output = tokenizer.decode(response_ids[0, response_inputs.input_ids.shape[1]:], skip_special_tokens=True)

print("ANSWER: " + response_output)
Downloads last month
12
Safetensors
Model size
1.24B params
Tensor type
F32
·
Inference Examples
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.

Model tree for Guilherme34/Reasoning-2.6-fixed

Quantizations
5 models