Mehdi Challakh
commited on
changed the gguf example too
Browse files
README.md
CHANGED
@@ -60,7 +60,7 @@ from llama_cpp import Llama
|
|
60 |
# Load the GGUF model
|
61 |
print("Loading model...")
|
62 |
model = Llama(
|
63 |
-
model_path="stable-cypher-instruct-3b.Q4_K_M.gguf",
|
64 |
n_ctx=512,
|
65 |
n_batch=512,
|
66 |
n_gpu_layers=-1, # Use all available GPU layers
|
@@ -71,20 +71,17 @@ model = Llama(
|
|
71 |
)
|
72 |
|
73 |
# Define your question
|
74 |
-
|
75 |
-
Except bad performance without it'''
|
76 |
-
instruction = "Create a Cypher statement to answer the following question:"
|
77 |
-
question = "List the first 3 articles mentioning organizations with a revenue less than 5 million."
|
78 |
|
79 |
-
# Create the full prompt
|
80 |
-
full_prompt = f"
|
81 |
|
82 |
# Generate response
|
83 |
print("Generating response...")
|
84 |
response = model(
|
85 |
full_prompt,
|
86 |
max_tokens=128,
|
87 |
-
stop=["
|
88 |
echo=False
|
89 |
)
|
90 |
|
|
|
60 |
# Load the GGUF model
|
61 |
print("Loading model...")
|
62 |
model = Llama(
|
63 |
+
model_path=r"C:\Users\John\stable-cypher-instruct-3b.Q4_K_M.gguf",
|
64 |
n_ctx=512,
|
65 |
n_batch=512,
|
66 |
n_gpu_layers=-1, # Use all available GPU layers
|
|
|
71 |
)
|
72 |
|
73 |
# Define your question
|
74 |
+
question = "Show me the people who have Python and Cloud skills and have been in the company for at least 3 years."
|
|
|
|
|
|
|
75 |
|
76 |
+
# Create the full prompt (simulating the apply_chat_template function)
|
77 |
+
full_prompt = f"<|im_start|>system\nCreate a Cypher statement to answer the following question:<|im_end|>\n<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant\n"
|
78 |
|
79 |
# Generate response
|
80 |
print("Generating response...")
|
81 |
response = model(
|
82 |
full_prompt,
|
83 |
max_tokens=128,
|
84 |
+
stop=["<|im_end|>", "<|im_start|>"],
|
85 |
echo=False
|
86 |
)
|
87 |
|