Sergidev commited on
Commit
6625104
1 Parent(s): 311b51e

Update modules/pmbl.py

Browse files
Files changed (1) hide show
  1. modules/pmbl.py +4 -3
modules/pmbl.py CHANGED
@@ -102,7 +102,7 @@ class PMBL:
102
  yield chunk
103
 
104
  def generate_response_task(self, system_prompt, prompt, n_ctx):
105
- llm = Llama(model_path=self.model_path, n_ctx=n_ctx, n_threads=8, n_gpu_layers=-1, offload_kqv=True, flash_attn=True, use_mlock=True)
106
 
107
  response = llm(
108
  system_prompt,
@@ -132,6 +132,7 @@ class PMBL:
132
  return system_prompt_tokens + history_tokens + max_response_tokens
133
  else:
134
  return context_ceiling # Return the maximum context size
 
135
  def sleep_mode(self):
136
  conn = sqlite3.connect('chat_history.db')
137
  c = conn.cursor()
@@ -147,7 +148,7 @@ class PMBL:
147
  conn.close()
148
 
149
  def generate_topic(self, prompt, response):
150
- llm = Llama(model_path=self.model_path, n_ctx=2960, n_threads=4, n_gpu_layers=-1, offload_kqv=True, flash_attn=True, use_mlock=True)
151
 
152
  system_prompt = f"Based on the following interaction between a user and an AI assistant, generate a concise topic for the conversation in 2-4 words:\n\nUser: {prompt}\nAssistant: {response}\n\nTopic:"
153
 
@@ -155,7 +156,7 @@ class PMBL:
155
  system_prompt,
156
  max_tokens=12,
157
  temperature=0,
158
- stop=["\\n"],
159
  echo=False
160
  )
161
 
 
102
  yield chunk
103
 
104
  def generate_response_task(self, system_prompt, prompt, n_ctx):
105
+ llm = Llama(model_path=self.model_path, n_ctx=n_ctx, n_threads=8, n_gpu_layers=-1, offload_kqv=True, use_mlock=True)
106
 
107
  response = llm(
108
  system_prompt,
 
132
  return system_prompt_tokens + history_tokens + max_response_tokens
133
  else:
134
  return context_ceiling # Return the maximum context size
135
+
136
  def sleep_mode(self):
137
  conn = sqlite3.connect('chat_history.db')
138
  c = conn.cursor()
 
148
  conn.close()
149
 
150
  def generate_topic(self, prompt, response):
151
+ llm = Llama(model_path=self.model_path, n_ctx=2960, n_threads=4, n_gpu_layers=-1, offload_kqv=True, use_mlock=True)
152
 
153
  system_prompt = f"Based on the following interaction between a user and an AI assistant, generate a concise topic for the conversation in 2-4 words:\n\nUser: {prompt}\nAssistant: {response}\n\nTopic:"
154
 
 
156
  system_prompt,
157
  max_tokens=12,
158
  temperature=0,
159
+ stop=["\n"],
160
  echo=False
161
  )
162