Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -52,12 +52,11 @@ model = transformers.AutoModelForCausalLM.from_pretrained(
|
|
52 |
)
|
53 |
model.to("cuda") # Move the model to GPU
|
54 |
tokenizer = transformers.AutoTokenizer.from_pretrained("Qwen/Qwen2.5-7B-Instruct-AWQ")
|
55 |
-
|
56 |
llm = transformers.pipeline(
|
57 |
task="text-generation",
|
58 |
model=model,
|
59 |
tokenizer=tokenizer,
|
60 |
-
device=
|
61 |
)
|
62 |
|
63 |
|
@@ -102,9 +101,7 @@ def reply(message: str, history: list[str]) -> str:
|
|
102 |
|
103 |
# Generate a response from the language model
|
104 |
response = llm(
|
105 |
-
rag_prompt,
|
106 |
-
max_new_tokens=512,
|
107 |
-
return_full_text=False,
|
108 |
)
|
109 |
|
110 |
# Return the generated response
|
|
|
52 |
)
|
53 |
model.to("cuda") # Move the model to GPU
|
54 |
tokenizer = transformers.AutoTokenizer.from_pretrained("Qwen/Qwen2.5-7B-Instruct-AWQ")
|
|
|
55 |
llm = transformers.pipeline(
|
56 |
task="text-generation",
|
57 |
model=model,
|
58 |
tokenizer=tokenizer,
|
59 |
+
device="cuda",
|
60 |
)
|
61 |
|
62 |
|
|
|
101 |
|
102 |
# Generate a response from the language model
|
103 |
response = llm(
|
104 |
+
rag_prompt, max_new_tokens=512, return_full_text=False, device="cuda"
|
|
|
|
|
105 |
)
|
106 |
|
107 |
# Return the generated response
|