dsfdfsghgf commited on
Commit
2042c5e
·
verified ·
1 Parent(s): 40a9a05

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -11
app.py CHANGED
@@ -1,17 +1,16 @@
1
  import torch
2
- from transformers import AutoModel, AutoTokenizer
3
 
4
- model_name = "Qwen/Qwen2.5-Math-RM-72B"
 
5
  device = "cuda" if torch.cuda.is_available() else "cpu"
6
 
7
  # Modell und Tokenizer laden
8
- model = AutoModel.from_pretrained(
9
  model_name,
10
- device_map="auto", # Modell auf verfügbaren Geräten verteilen
11
- low_cpu_mem_usage=True,
12
- trust_remote_code=True,
13
- ).eval()
14
-
15
 
16
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
17
 
@@ -25,10 +24,12 @@ chat = [
25
  conversation_str = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=False)
26
 
27
  # Tokenisierung der Eingabe
28
- input_ids = tokenizer.encode(conversation_str, return_tensors="pt", add_special_tokens=False).to(model.device)
29
 
30
  # Inferenz durchführen
31
  with torch.no_grad():
32
- outputs = model(input_ids=input_ids)
33
 
34
- print(outputs[0]) # Anpassen je nach Ausgabeformat
 
 
 
1
  import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
 
4
+ # Modell und Tokenizer von Hugging Face laden
5
+ model_name = "Qwen/Qwen2.5-Math-7B-Instruct"
6
  device = "cuda" if torch.cuda.is_available() else "cpu"
7
 
8
  # Modell und Tokenizer laden
9
+ model = AutoModelForCausalLM.from_pretrained(
10
  model_name,
11
+ device_map="auto", # Modell auf verfügbare Geräte verteilen
12
+ trust_remote_code=True
13
+ ).to(device).eval()
 
 
14
 
15
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
16
 
 
24
  conversation_str = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=False)
25
 
26
  # Tokenisierung der Eingabe
27
+ input_ids = tokenizer.encode(conversation_str, return_tensors="pt", add_special_tokens=False).to(device)
28
 
29
  # Inferenz durchführen
30
  with torch.no_grad():
31
+ outputs = model.generate(input_ids=input_ids, max_length=512, num_return_sequences=1)
32
 
33
+ # Ausgabe dekodieren und anzeigen
34
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
35
+ print(response)