File size: 1,393 Bytes
1a81cef
 
81ee8f8
1a81cef
 
81ee8f8
1a81cef
 
1c4f7ad
 
 
1a81cef
 
 
925025c
1a81cef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import torch
from transformers import AutoModel, AutoTokenizer

model_name = "Qwen/Qwen2.5-Math-RM-72B"
device = "cuda" if torch.cuda.is_available() else "cpu"

# Modell und Tokenizer laden
model = AutoModel.from_pretrained(
    model_name,
    revision="main",  # Hier den Namen des Branches oder Commit-Hash einfügen
    device_map=device,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
).eval()

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

# Eingabe für das Gesprächsmodell erstellen
chat = [
    {"role": "system", "content": "Please reason step by step, and put your final answer within \\boxed{}."},
    {"role": "user", "content": "Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?"}
]

# Vorbereitung des Eingabeformats
conversation_str = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=False)

# Tokenisierung der Eingabe
input_ids = tokenizer.encode(conversation_str, return_tensors="pt", add_special_tokens=False).to(model.device)

# Inferenz durchführen
with torch.no_grad():
    outputs = model(input_ids=input_ids)

print(outputs[0])  # Anpassen je nach Ausgabeformat