Qwen
/

Qwen1.5-4B-Chat

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

JustinLin610 commited on Jan 31, 2024

Commit

4cd30db

·

verified ·

1 Parent(s): 7cbf3e3

Update README.md

Files changed (1) hide show

README.md +6 -2

README.md CHANGED Viewed

@@ -46,7 +46,10 @@ Here provides a code snippet with `apply_chat_template` to show you how to load
 from transformers import AutoModelForCausalLM, AutoTokenizer
 device = "cuda" # the device to load the model onto
-model = AutoModelForCausalLM.from_pretrained("Qwen2/Qwen2-beta-4B-Chat", device_map="auto")
 tokenizer = AutoTokenizer.from_pretrained("Qwen2/Qwen2-beta-4B-Chat")
 prompt = "Give me a short introduction to large language model."
@@ -55,7 +58,8 @@ messages = [
     {"role": "user", "content": prompt}
 ]
 text = tokenizer.apply_chat_template(
-    messages, tokenize=False,
     add_generation_prompt=True
 )
 model_inputs = tokenizer([text], return_tensors="pt").to(device)

 from transformers import AutoModelForCausalLM, AutoTokenizer
 device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained(
+    "Qwen2/Qwen2-beta-4B-Chat",
+    device_map="auto"
+)
 tokenizer = AutoTokenizer.from_pretrained("Qwen2/Qwen2-beta-4B-Chat")
 prompt = "Give me a short introduction to large language model."
     {"role": "user", "content": prompt}
 ]
 text = tokenizer.apply_chat_template(
+    messages,
+    tokenize=False,
     add_generation_prompt=True
 )
 model_inputs = tokenizer([text], return_tensors="pt").to(device)