None1145 commited on
Commit
79a34ec
·
verified ·
1 Parent(s): e49477a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -21
app.py CHANGED
@@ -21,15 +21,10 @@ For more information on `huggingface_hub` Inference API support, please check th
21
 
22
  repo_id = "None1145/ChatGLM3-6B-Theresa-GGML"
23
  filename = "ChatGLM3-6B-Theresa-GGML-Q4_0.bin"
24
-
25
- hf_hub_download(repo_id=repo_id, filename=filename, local_dir="./Models")
26
- list_files_tree("./Models")
27
- import time
28
- time.sleep(10)
29
-
30
  pipeline = chatglm_cpp.Pipeline(model, max_length=max_length)
31
 
32
-
33
  def respond(
34
  message,
35
  history: list[tuple[str, str]],
@@ -38,30 +33,36 @@ def respond(
38
  temperature,
39
  top_p,
40
  ):
41
- messages = [{"role": "system", "content": system_message}]
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  for val in history:
44
  if val[0]:
45
- messages.append({"role": "user", "content": val[0]})
46
  if val[1]:
47
- messages.append({"role": "assistant", "content": val[1]})
48
 
49
- messages.append({"role": "user", "content": message})
50
 
51
  response = ""
52
 
53
- for message in client.chat_completion(
54
- messages,
55
- max_tokens=max_tokens,
56
- stream=True,
57
- temperature=temperature,
58
- top_p=top_p,
59
- ):
60
- token = message.choices[0].delta.content
61
-
62
- response += token
63
  yield response
64
 
 
 
65
 
66
  """
67
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 
21
 
22
  repo_id = "None1145/ChatGLM3-6B-Theresa-GGML"
23
  filename = "ChatGLM3-6B-Theresa-GGML-Q4_0.bin"
24
+ hf_hub_download(repo_id=repo_id, filename=filename, local_dir=f"./Models/{repo_id}")
25
+ model = f"./Models/{repo_id}/{filename}"
 
 
 
 
26
  pipeline = chatglm_cpp.Pipeline(model, max_length=max_length)
27
 
 
28
  def respond(
29
  message,
30
  history: list[tuple[str, str]],
 
33
  temperature,
34
  top_p,
35
  ):
36
+ generation_kwargs = dict(
37
+ max_length=max_length,
38
+ max_context_length=max_tokens,
39
+ do_sample=temperature > 0,
40
+ top_k=0,
41
+ top_p=top_p,
42
+ temperature=temperature,
43
+ repetition_penalty=1.0,
44
+ stream=True,
45
+ )
46
+
47
+ messages = [chatglm_cpp.ChatMessage(role="system", content=system_message)]
48
 
49
  for val in history:
50
  if val[0]:
51
+ messages.append(chatglm_cpp.ChatMessage(role="user", content=val[0]))
52
  if val[1]:
53
+ messages.append(chatglm_cpp.ChatMessage(role="assistant", content=val[0]))
54
 
55
+ messages.append(chatglm_cpp.ChatMessage(role="user", content=message))
56
 
57
  response = ""
58
 
59
+ for chunk in pipeline.chat(messages, **generation_kwargs)
60
+ response += chunk.content
61
+ chunks.append(chunk)
 
 
 
 
 
 
 
62
  yield response
63
 
64
+ messages.append(chatglm_cpp.ChatMessage(role="assistant", content=response))
65
+
66
 
67
  """
68
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface