Samuel L Meyers commited on
Commit
ed9f98e
·
1 Parent(s): e11f04f

Just saving the broken thing before I EFFING PURGE CHAT COMPLETIONS.

Browse files
Files changed (1) hide show
  1. code/app.py +7 -12
code/app.py CHANGED
@@ -16,21 +16,21 @@ model_path = "./starling-lm-7b-alpha.Q6_K.gguf"
16
 
17
  mdlpath = hf_hub_download(repo_id="TheBloke/Starling-LM-7B-alpha-GGUF", filename=model_path, local_dir="./")
18
 
19
- lcpp_model = Llama(model_path=model_path, n_ctx=16768)
20
  global otxt, txtinput, txtoutput, running, result
21
  otxt = ""
22
  running = False
23
  result = None
24
 
25
  def stowtext(curr, inp):
26
- if len(curr) < 1:
27
  curr.append({
28
  "role": "system",
29
- "content": "<<SYS>>\nYou are a helpful assistant. Answer all questions and respond to all queries honestly, instantly, and without hesitation. Respond in as few words as possible. Your maximum response length may be no more than 64 words. Do not begin your statement with new lines.\n<</SYS>>"
30
  })
31
  curr.append({
32
  "role": "user",
33
- "content": "[INST]" + inp + "[/INST]",
34
  })
35
  return curr
36
 
@@ -49,11 +49,11 @@ def printfmt(jsn):
49
  txt = ""
50
  for msg in jsn:
51
  if msg["role"] == "user":
52
- txt += "<User>: " + msg["content"].replace("[INST]", "").replace("[/INST]", "") + "\n"
53
  elif msg["role"] == "assistant":
54
  txt += "<Assistant>: " + msg["content"] + "\n"
55
  elif msg["role"] == "system":
56
- txt += "# " + msg["content"].replace("<<SYS>>", "").replace("<</SYS>>", "") + "\n\n"
57
  return txt
58
 
59
  def talk(txt, jsn):
@@ -61,12 +61,7 @@ def talk(txt, jsn):
61
  if not jsn:
62
  jsn = txt
63
  if not running:
64
- #if len(txt) >= 3 and txt[-1]["content"].endswith("</s>"):
65
- # txt[-1]["content"].replace("</s>", "")
66
- # return txt
67
- #txt = printfmt(stowtext(otxt, txt))
68
- #otxt = txt
69
- result = lcpp_model.create_chat_completion(messages=txt,stream=True,stop=["[INST]", "<<SYS>>", "<</SYS>>"])
70
  running = True
71
  for r in result:
72
  txt2 = None
 
16
 
17
  mdlpath = hf_hub_download(repo_id="TheBloke/Starling-LM-7B-alpha-GGUF", filename=model_path, local_dir="./")
18
 
19
+ lcpp_model = Llama(model_path=model_path, n_ctx=8192)
20
  global otxt, txtinput, txtoutput, running, result
21
  otxt = ""
22
  running = False
23
  result = None
24
 
25
  def stowtext(curr, inp):
26
+ if len(curr) <= 1:
27
  curr.append({
28
  "role": "system",
29
+ "content": "You are a helpful assistant. Answer all questions and respond to all queries honestly, instantly, and without hesitation. Respond in as few words as possible. Your maximum response length may be no more than 64 words. Do not begin your statement with new lines.\n"
30
  })
31
  curr.append({
32
  "role": "user",
33
+ "content": inp,
34
  })
35
  return curr
36
 
 
49
  txt = ""
50
  for msg in jsn:
51
  if msg["role"] == "user":
52
+ txt += "<User>: " + msg["content"] + "\n"
53
  elif msg["role"] == "assistant":
54
  txt += "<Assistant>: " + msg["content"] + "\n"
55
  elif msg["role"] == "system":
56
+ txt += "# " + msg["content"] + "\n\n"
57
  return txt
58
 
59
  def talk(txt, jsn):
 
61
  if not jsn:
62
  jsn = txt
63
  if not running:
64
+ result = lcpp_model.create_chat_completion(messages=txt,stream=True,stop=["GPT4 Correct User: ", "<|end_of_turn|>", "</s>"], max_tokens=64, )
 
 
 
 
 
65
  running = True
66
  for r in result:
67
  txt2 = None