Spaces:

dsfdfsghgf
/

SmartMath_AI

Runtime error

dsfdfsghgf commited on Nov 12, 2024

Commit

2042c5e

verified ·

1 Parent(s): 40a9a05

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,17 +1,16 @@
 import torch
-from transformers import AutoModel, AutoTokenizer
-model_name = "Qwen/Qwen2.5-Math-RM-72B"
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Modell und Tokenizer laden
-model = AutoModel.from_pretrained(
     model_name,
-    device_map="auto",  # Modell auf verfügbaren Geräten verteilen
-    low_cpu_mem_usage=True,
-    trust_remote_code=True,
-).eval()
 tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
@@ -25,10 +24,12 @@ chat = [
 conversation_str = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=False)
 # Tokenisierung der Eingabe
-input_ids = tokenizer.encode(conversation_str, return_tensors="pt", add_special_tokens=False).to(model.device)
 # Inferenz durchführen
 with torch.no_grad():
-    outputs = model(input_ids=input_ids)
-print(outputs[0])  # Anpassen je nach Ausgabeformat

 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+# Modell und Tokenizer von Hugging Face laden
+model_name = "Qwen/Qwen2.5-Math-7B-Instruct"
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Modell und Tokenizer laden
+model = AutoModelForCausalLM.from_pretrained(
     model_name,
+    device_map="auto",  # Modell auf verfügbare Geräte verteilen
+    trust_remote_code=True
+).to(device).eval()
 tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 conversation_str = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=False)
 # Tokenisierung der Eingabe
+input_ids = tokenizer.encode(conversation_str, return_tensors="pt", add_special_tokens=False).to(device)
 # Inferenz durchführen
 with torch.no_grad():
+    outputs = model.generate(input_ids=input_ids, max_length=512, num_return_sequences=1)
+# Ausgabe dekodieren und anzeigen
+response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+print(response)