LucidityAI
/

Pico-v1-3b

@@ -11,4 +11,88 @@ pipeline_tag: text-generation
 Pico v1 is a work in progress model. Based off Phi 3.5 Mini, it has been fine tuned for automatic COT and self reflection.
-When making a output, Pico will create three sections, a reasoning section, a self-reflection section and a output section.

 Pico v1 is a work in progress model. Based off Phi 3.5 Mini, it has been fine tuned for automatic COT and self reflection.
+When making a output, Pico will create three sections, a reasoning section, a self-reflection section and a output section.
+Pico v1 struggles with non-question related tasks (Small talk, roleplay, etc).
+Here is a example of how you can use it:
+```from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+phi3_template = (
+    "{{ bos_token }}"
+    "{% for message in messages %}"
+        "{{ '<|' + message['role'] + '|>\\n' + message['content'] + '<|end|>\\n' }}"
+    "{% endfor %}"
+    "{% if add_generation_prompt %}"
+        "{{ '<|assistant|>\\n' }}"
+    "{% endif %}"
+)
+phi3_template_eos_token = "<|end|>"
+def build_prompt(messages, bos_token="<|start|>", add_generation_prompt=True):
+    """
+    Build a prompt using the PHI 3.5 template.
+    """
+    prompt = bos_token
+    for message in messages:
+        prompt += f"<|{message['role']}|>\n{message['content']}\n<|end|>\n"
+    if add_generation_prompt:
+        prompt += "<|assistant|>\n"
+    return prompt
+def chat_with_model():
+    # Load the model and tokenizer
+    model_name = "LucidityAI/Pico-v1-3b"
+    print("Loading model and tokenizer...")
+    # Enforce GPU usage
+    if not torch.cuda.is_available():
+        raise RuntimeError("CUDA is not available. Please ensure your GPU and CUDA environment are configured correctly.")
+    device = torch.device("cuda")
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
+    print("Model and tokenizer loaded successfully.")
+    # Chat loop
+    print("Start chatting with the model! Type 'exit' to quit.")
+    conversation = []
+    while True:
+        user_input = input("You: ")
+        if user_input.lower() == "exit":
+            print("Goodbye!")
+            break
+        # Append user's message to the conversation
+        conversation.append({"role": "user", "content": user_input})
+        # Build the input prompt using the PHI 3.5 template
+        prompt = build_prompt(conversation, bos_token=tokenizer.bos_token or "<|start|>")
+        # Tokenize the input prompt
+        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to(device)
+        # Generate a response
+        outputs = model.generate(
+            inputs.input_ids,
+            max_length=1024,
+            num_return_sequences=1,
+            temperature=0.5,
+            pad_token_id=tokenizer.eos_token_id
+        )
+        # Decode the response
+        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Extract the assistant's reply
+        assistant_reply = response[len(prompt):].strip()
+        print(f"Model: {assistant_reply}")
+        # Append the assistant's reply to the conversation
+        conversation.append({"role": "assistant", "content": assistant_reply})
+if __name__ == "__main__":
+    chat_with_model()
+```