Spaces:

ccm
/

chat-with-SFF

Sleeping

ccm commited on Nov 9, 2024

Commit

708fcdb

verified ·

1 Parent(s): 117b141

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -52,12 +52,11 @@ model = transformers.AutoModelForCausalLM.from_pretrained(
 )
 model.to("cuda")  # Move the model to GPU
 tokenizer = transformers.AutoTokenizer.from_pretrained("Qwen/Qwen2.5-7B-Instruct-AWQ")
 llm = transformers.pipeline(
     task="text-generation",
     model=model,
     tokenizer=tokenizer,
-    device=0,  # Ensure the model is loaded on the GPU
 )
@@ -102,9 +101,7 @@ def reply(message: str, history: list[str]) -> str:
     # Generate a response from the language model
     response = llm(
-        rag_prompt,
-        max_new_tokens=512,
-        return_full_text=False,
     )
     # Return the generated response

 )
 model.to("cuda")  # Move the model to GPU
 tokenizer = transformers.AutoTokenizer.from_pretrained("Qwen/Qwen2.5-7B-Instruct-AWQ")
 llm = transformers.pipeline(
     task="text-generation",
     model=model,
     tokenizer=tokenizer,
+    device="cuda",
 )
     # Generate a response from the language model
     response = llm(
+        rag_prompt, max_new_tokens=512, return_full_text=False, device="cuda"
     )
     # Return the generated response