Spaces:
Sleeping
Sleeping
Chengxb888
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -22,14 +22,14 @@ def say_hello(msg: Annotated[str, Form()]):
|
|
22 |
# for multiple GPUs install accelerate and do `model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto")`
|
23 |
model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
|
24 |
|
25 |
-
messages = [{"role": "user", "content":
|
26 |
input_text=tokenizer.apply_chat_template(messages, tokenize=False)
|
27 |
print(input_text)
|
28 |
|
29 |
input_ids = tokenizer(msg, return_tensors="pt").to("cpu")
|
30 |
print("output")
|
31 |
inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
|
32 |
-
outputs = model.generate(inputs, max_new_tokens=
|
33 |
|
34 |
print("complete")
|
35 |
return {"message": tokenizer.decode(outputs[0])}
|
|
|
22 |
# for multiple GPUs install accelerate and do `model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto")`
|
23 |
model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
|
24 |
|
25 |
+
messages = [{"role": "user", "content": msg}]
|
26 |
input_text=tokenizer.apply_chat_template(messages, tokenize=False)
|
27 |
print(input_text)
|
28 |
|
29 |
input_ids = tokenizer(msg, return_tensors="pt").to("cpu")
|
30 |
print("output")
|
31 |
inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
|
32 |
+
outputs = model.generate(inputs, max_new_tokens=256, temperature=0.6, top_p=0.92, do_sample=True)
|
33 |
|
34 |
print("complete")
|
35 |
return {"message": tokenizer.decode(outputs[0])}
|