changed the gguf example too

Files changed (1) hide show

README.md CHANGED Viewed

@@ -60,7 +60,7 @@ from llama_cpp import Llama
 # Load the GGUF model
 print("Loading model...")
 model = Llama(
-    model_path="stable-cypher-instruct-3b.Q4_K_M.gguf",
     n_ctx=512,
     n_batch=512,
     n_gpu_layers=-1,  # Use all available GPU layers
@@ -71,20 +71,17 @@ model = Llama(
 )
 # Define your question
-'''instruction is VERY IMPORTANT the model was finetuned on this particular system prompt.
-Except bad performance without it'''
-instruction = "Create a Cypher statement to answer the following question:"
-question = "List the first 3 articles mentioning organizations with a revenue less than 5 million."
-# Create the full prompt
-full_prompt = f"{instruction}\n\nHuman: {question}\n\nAssistant:"
 # Generate response
 print("Generating response...")
 response = model(
     full_prompt,
     max_tokens=128,
-    stop=["Human:", "\n\n"],
     echo=False
 )

 # Load the GGUF model
 print("Loading model...")
 model = Llama(
+    model_path=r"C:\Users\John\stable-cypher-instruct-3b.Q4_K_M.gguf",
     n_ctx=512,
     n_batch=512,
     n_gpu_layers=-1,  # Use all available GPU layers
 )
 # Define your question
+question = "Show me the people who have Python and Cloud skills and have been in the company for at least 3 years."
+# Create the full prompt (simulating the apply_chat_template function)
+full_prompt = f"<|im_start|>system\nCreate a Cypher statement to answer the following question:<|im_end|>\n<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant\n"
 # Generate response
 print("Generating response...")
 response = model(
     full_prompt,
     max_tokens=128,
+    stop=["<|im_end|>", "<|im_start|>"],
     echo=False
 )