sd-inf commited on
Commit
61ef83a
·
verified ·
1 Parent(s): f8717d3

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +85 -1
README.md CHANGED
@@ -11,4 +11,88 @@ pipeline_tag: text-generation
11
 
12
  Pico v1 is a work in progress model. Based off Phi 3.5 Mini, it has been fine tuned for automatic COT and self reflection.
13
 
14
- When making a output, Pico will create three sections, a reasoning section, a self-reflection section and a output section.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  Pico v1 is a work in progress model. Based off Phi 3.5 Mini, it has been fine tuned for automatic COT and self reflection.
13
 
14
+ When making a output, Pico will create three sections, a reasoning section, a self-reflection section and a output section.
15
+
16
+ Pico v1 struggles with non-question related tasks (Small talk, roleplay, etc).
17
+
18
+ Here is a example of how you can use it:
19
+
20
+ ```from transformers import AutoModelForCausalLM, AutoTokenizer
21
+ import torch
22
+
23
+ phi3_template = (
24
+ "{{ bos_token }}"
25
+ "{% for message in messages %}"
26
+ "{{ '<|' + message['role'] + '|>\\n' + message['content'] + '<|end|>\\n' }}"
27
+ "{% endfor %}"
28
+ "{% if add_generation_prompt %}"
29
+ "{{ '<|assistant|>\\n' }}"
30
+ "{% endif %}"
31
+ )
32
+ phi3_template_eos_token = "<|end|>"
33
+
34
+ def build_prompt(messages, bos_token="<|start|>", add_generation_prompt=True):
35
+ """
36
+ Build a prompt using the PHI 3.5 template.
37
+ """
38
+ prompt = bos_token
39
+ for message in messages:
40
+ prompt += f"<|{message['role']}|>\n{message['content']}\n<|end|>\n"
41
+ if add_generation_prompt:
42
+ prompt += "<|assistant|>\n"
43
+ return prompt
44
+
45
+ def chat_with_model():
46
+ # Load the model and tokenizer
47
+ model_name = "LucidityAI/Pico-v1-3b"
48
+ print("Loading model and tokenizer...")
49
+
50
+ # Enforce GPU usage
51
+ if not torch.cuda.is_available():
52
+ raise RuntimeError("CUDA is not available. Please ensure your GPU and CUDA environment are configured correctly.")
53
+
54
+ device = torch.device("cuda")
55
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
56
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
57
+ print("Model and tokenizer loaded successfully.")
58
+
59
+ # Chat loop
60
+ print("Start chatting with the model! Type 'exit' to quit.")
61
+ conversation = []
62
+ while True:
63
+ user_input = input("You: ")
64
+ if user_input.lower() == "exit":
65
+ print("Goodbye!")
66
+ break
67
+
68
+ # Append user's message to the conversation
69
+ conversation.append({"role": "user", "content": user_input})
70
+
71
+ # Build the input prompt using the PHI 3.5 template
72
+ prompt = build_prompt(conversation, bos_token=tokenizer.bos_token or "<|start|>")
73
+
74
+ # Tokenize the input prompt
75
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to(device)
76
+
77
+ # Generate a response
78
+ outputs = model.generate(
79
+ inputs.input_ids,
80
+ max_length=1024,
81
+ num_return_sequences=1,
82
+ temperature=0.5,
83
+ pad_token_id=tokenizer.eos_token_id
84
+ )
85
+
86
+ # Decode the response
87
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
88
+
89
+ # Extract the assistant's reply
90
+ assistant_reply = response[len(prompt):].strip()
91
+ print(f"Model: {assistant_reply}")
92
+
93
+ # Append the assistant's reply to the conversation
94
+ conversation.append({"role": "assistant", "content": assistant_reply})
95
+
96
+ if __name__ == "__main__":
97
+ chat_with_model()
98
+ ```