Safetensors
llama
keeeeenw commited on
Commit
74040d7
·
verified ·
1 Parent(s): 05dbb65

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +21 -7
README.md CHANGED
@@ -49,40 +49,54 @@ Thanks to **Hugging Face**, we now have a streamlined framework to make this pro
49
 
50
  ### How to run the code?
51
 
52
- ```{python}
 
53
  import transformers
54
  from transformers import TextStreamer
55
 
56
  from transformers import AutoTokenizer, AutoModel, LlamaForCausalLM
57
 
58
- # use the same tokenizer as MicroLlama
 
 
59
  tokenizer = AutoTokenizer.from_pretrained("keeeeenw/Llama-3.2-1B-Instruct-Open-R1-Distill")
 
 
60
  model = LlamaForCausalLM.from_pretrained("keeeeenw/Llama-3.2-1B-Instruct-Open-R1-Distill")
 
61
 
62
- # Prompt supported by HuggingFaceH4/Bespoke-Stratos-17k
 
63
  messages = [
64
  {
65
  "role": "system",
66
  "content": "Your role as an assistant involves thoroughly exploring questions through a systematic long thinking process before providing the final precise and accurate solutions. This requires engaging in a comprehensive cycle of analysis, summarizing, exploration, reassessment, reflection, backtracing, and iteration to develop well-considered thinking process. Please structure your response into two main sections: Thought and Solution. In the Thought section, detail your reasoning process using the specified format: <|begin_of_thought|> {thought with steps separated with '\n\n'} <|end_of_thought|> Each step should include detailed considerations such as analisying questions, summarizing relevant findings, brainstorming new ideas, verifying the accuracy of the current steps, refining any errors, and revisiting previous steps. In the Solution section, based on various attempts, explorations, and reflections from the Thought section, systematically present the final solution that you deem correct. The solution should remain a logical, accurate, concise expression style and detail necessary step needed to reach the conclusion, formatted as follows: <|begin_of_solution|> {final formatted, precise, and clear solution} <|end_of_solution|> Now, try to solve the following question through the above guidelines:",
67
  },
68
- # question from https://www.reddit.com/r/LocalLLaMA/comments/13zz8y5/what_questions_do_you_ask_llms_to_check_their/
69
  {"role": "user", "content": "Please provide me instructions on how to steal an egg from my chicken?"},
70
  ]
71
  formatted_chat = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True, return_tensors="pt")
72
  print(formatted_chat)
73
 
 
74
  inputs = tokenizer(formatted_chat, return_tensors="pt", padding=True)
 
75
  attention_mask = inputs["attention_mask"]
76
 
 
77
  streamer = TextStreamer(tokenizer, skip_prompt=True)
78
- outputs = model.generate(inputs['input_ids'],
79
  streamer=streamer,
80
  attention_mask=attention_mask,
81
  pad_token_id=tokenizer.eos_token_id,
82
  top_k=5,
83
  top_p=0.9,
84
- max_new_tokens=131072)
85
- print(tokenizer.decode(outputs[0]))
 
 
 
 
 
86
  ```
87
 
88
  ### Sample Output
 
49
 
50
  ### How to run the code?
51
 
52
+ ```python
53
+ import torch
54
  import transformers
55
  from transformers import TextStreamer
56
 
57
  from transformers import AutoTokenizer, AutoModel, LlamaForCausalLM
58
 
59
+ device = 'cuda' # if you don't have a CUDA supported GPU, change this to 'cpu' or other supported device
60
+
61
+ # load tokenizer
62
  tokenizer = AutoTokenizer.from_pretrained("keeeeenw/Llama-3.2-1B-Instruct-Open-R1-Distill")
63
+
64
+ # load model
65
  model = LlamaForCausalLM.from_pretrained("keeeeenw/Llama-3.2-1B-Instruct-Open-R1-Distill")
66
+ model.to(device)
67
 
68
+ # Setup the prompt. Because we instruction-tuned with a similar prompt, it is important to use this.
69
+ # Change "content" to your actual question.
70
  messages = [
71
  {
72
  "role": "system",
73
  "content": "Your role as an assistant involves thoroughly exploring questions through a systematic long thinking process before providing the final precise and accurate solutions. This requires engaging in a comprehensive cycle of analysis, summarizing, exploration, reassessment, reflection, backtracing, and iteration to develop well-considered thinking process. Please structure your response into two main sections: Thought and Solution. In the Thought section, detail your reasoning process using the specified format: <|begin_of_thought|> {thought with steps separated with '\n\n'} <|end_of_thought|> Each step should include detailed considerations such as analisying questions, summarizing relevant findings, brainstorming new ideas, verifying the accuracy of the current steps, refining any errors, and revisiting previous steps. In the Solution section, based on various attempts, explorations, and reflections from the Thought section, systematically present the final solution that you deem correct. The solution should remain a logical, accurate, concise expression style and detail necessary step needed to reach the conclusion, formatted as follows: <|begin_of_solution|> {final formatted, precise, and clear solution} <|end_of_solution|> Now, try to solve the following question through the above guidelines:",
74
  },
 
75
  {"role": "user", "content": "Please provide me instructions on how to steal an egg from my chicken?"},
76
  ]
77
  formatted_chat = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True, return_tensors="pt")
78
  print(formatted_chat)
79
 
80
+ # Setup input tokens
81
  inputs = tokenizer(formatted_chat, return_tensors="pt", padding=True)
82
+ inputs = inputs.to(device)
83
  attention_mask = inputs["attention_mask"]
84
 
85
+ # Run inference and stream the output
86
  streamer = TextStreamer(tokenizer, skip_prompt=True)
87
+ outputs = model.generate(inputs['input_ids'],
88
  streamer=streamer,
89
  attention_mask=attention_mask,
90
  pad_token_id=tokenizer.eos_token_id,
91
  top_k=5,
92
  top_p=0.9,
93
+ max_new_tokens=131072) # max supported by llama 3.2 1B
94
+
95
+ # Write output to a file
96
+ decoded_text = tokenizer.decode(outputs[0])
97
+ print("Output written to output.txt")
98
+ with open("output.txt", "w", encoding="utf-8") as f:
99
+ f.write(decoded_text)
100
  ```
101
 
102
  ### Sample Output