Spaces:
Runtime error
Runtime error
File size: 1,664 Bytes
1a81cef 2042c5e 81ee8f8 4a72a49 1a81cef 81ee8f8 1a81cef 2042c5e 1c4f7ad 2042c5e 4a72a49 2042c5e 40a9a05 1a81cef 2042c5e 1a81cef 4a72a49 1a81cef 4a72a49 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
# Modellname für die kleinere Variante
model_name = "Qwen/Qwen2.5-Math-1.5B-Instruct"
# Überprüfen, ob eine GPU verfügbar ist
device = "cuda" if torch.cuda.is_available() else "cpu"
# Modell und Tokenizer laden
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="auto", # Modell auf verfügbare Geräte verteilen
low_cpu_mem_usage=True, # Versucht, den Speicherverbrauch zu reduzieren
trust_remote_code=True,
torch_dtype=torch.float16 # Reduziert den Speicherverbrauch
).to(device).eval()
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
# Eingabe für das Gesprächsmodell erstellen
chat = [
{"role": "system", "content": "Please reason step by step, and put your final answer within \\boxed{}."},
{"role": "user", "content": "Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?"}
]
# Vorbereitung des Eingabeformats
conversation_str = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=False)
# Tokenisierung der Eingabe
input_ids = tokenizer.encode(conversation_str, return_tensors="pt", add_special_tokens=False).to(device)
# Inferenz durchführen
with torch.no_grad():
outputs = model.generate(input_ids=input_ids, max_length=256, num_return_sequences=1)
# Ausgabe anzeigen
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|