Spaces:
Runtime error
Runtime error
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
# Modellname für die kleinere Variante | |
model_name = "Qwen/Qwen2.5-Math-1.5B-Instruct" | |
# Überprüfen, ob eine GPU verfügbar ist | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
# Modell und Tokenizer laden | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
device_map="auto", # Modell auf verfügbare Geräte verteilen | |
low_cpu_mem_usage=True, # Versucht, den Speicherverbrauch zu reduzieren | |
trust_remote_code=True, | |
torch_dtype=torch.float16 # Reduziert den Speicherverbrauch | |
).to(device).eval() | |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) | |
# Eingabe für das Gesprächsmodell erstellen | |
chat = [ | |
{"role": "system", "content": "Please reason step by step, and put your final answer within \\boxed{}."}, | |
{"role": "user", "content": "Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?"} | |
] | |
# Vorbereitung des Eingabeformats | |
conversation_str = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=False) | |
# Tokenisierung der Eingabe | |
input_ids = tokenizer.encode(conversation_str, return_tensors="pt", add_special_tokens=False).to(device) | |
# Inferenz durchführen | |
with torch.no_grad(): | |
outputs = model.generate(input_ids=input_ids, max_length=256, num_return_sequences=1) | |
# Ausgabe anzeigen | |
print(tokenizer.decode(outputs[0], skip_special_tokens=True)) | |