ccm commited on
Commit
708fcdb
·
verified ·
1 Parent(s): 117b141

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -5
app.py CHANGED
@@ -52,12 +52,11 @@ model = transformers.AutoModelForCausalLM.from_pretrained(
52
  )
53
  model.to("cuda") # Move the model to GPU
54
  tokenizer = transformers.AutoTokenizer.from_pretrained("Qwen/Qwen2.5-7B-Instruct-AWQ")
55
-
56
  llm = transformers.pipeline(
57
  task="text-generation",
58
  model=model,
59
  tokenizer=tokenizer,
60
- device=0, # Ensure the model is loaded on the GPU
61
  )
62
 
63
 
@@ -102,9 +101,7 @@ def reply(message: str, history: list[str]) -> str:
102
 
103
  # Generate a response from the language model
104
  response = llm(
105
- rag_prompt,
106
- max_new_tokens=512,
107
- return_full_text=False,
108
  )
109
 
110
  # Return the generated response
 
52
  )
53
  model.to("cuda") # Move the model to GPU
54
  tokenizer = transformers.AutoTokenizer.from_pretrained("Qwen/Qwen2.5-7B-Instruct-AWQ")
 
55
  llm = transformers.pipeline(
56
  task="text-generation",
57
  model=model,
58
  tokenizer=tokenizer,
59
+ device="cuda",
60
  )
61
 
62
 
 
101
 
102
  # Generate a response from the language model
103
  response = llm(
104
+ rag_prompt, max_new_tokens=512, return_full_text=False, device="cuda"
 
 
105
  )
106
 
107
  # Return the generated response