jerryzh168 commited on
Commit
a9f7231
·
verified ·
1 Parent(s): 31f1a7e

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +19 -2
README.md CHANGED
@@ -53,12 +53,29 @@ tokenizer.push_to_hub(save_to)
53
 
54
  # Manual Testing
55
  prompt = "Hey, are you conscious? Can you talk to me?"
56
- inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  generated_ids = quantized_model.generate(**inputs, max_new_tokens=128)
58
  output_text = tokenizer.batch_decode(
59
  generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
60
  )
61
- print(output_text)
62
 
63
  # Local Benchmark
64
  import torch.utils.benchmark as benchmark
 
53
 
54
  # Manual Testing
55
  prompt = "Hey, are you conscious? Can you talk to me?"
56
+ messages = [
57
+ {
58
+ "role": "system",
59
+ "content": "",
60
+ },
61
+ {"role": "user", "content": prompt},
62
+ ]
63
+ templated_prompt = tokenizer.apply_chat_template(
64
+ messages,
65
+ tokenize=False,
66
+ add_generation_prompt=True,
67
+ )
68
+ print("Prompt:", prompt)
69
+ print("Templated prompt:", templated_prompt)
70
+ inputs = tokenizer(
71
+ templated_prompt,
72
+ return_tensors="pt",
73
+ ).to("cuda")
74
  generated_ids = quantized_model.generate(**inputs, max_new_tokens=128)
75
  output_text = tokenizer.batch_decode(
76
  generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
77
  )
78
+ print("Response:", output_text[0][len(prompt):])
79
 
80
  # Local Benchmark
81
  import torch.utils.benchmark as benchmark