None1145 commited on
Commit
7855bae
·
verified ·
1 Parent(s): 1b493cd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -40
app.py CHANGED
@@ -14,17 +14,17 @@ def list_files_tree(directory, indent=""):
14
  next_indent = indent + (" " if i == len(items) - 1 else "│ ")
15
  list_files_tree(item_path, next_indent)
16
 
17
- """
18
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
19
- """
20
- # client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
21
 
22
- repo_id = "None1145/ChatGLM3-6B-Theresa-GGML"
23
- filename = "ChatGLM3-6B-Theresa-GGML-Q4_0.bin"
24
- hf_hub_download(repo_id=repo_id, filename=filename, local_dir=f"./Models/{repo_id}")
25
- model = f"./Models/{repo_id}/{filename}"
26
- max_length = 8192
27
- pipeline = chatglm_cpp.Pipeline(model, max_length=max_length)
 
 
 
28
 
29
  messages = []
30
 
@@ -38,11 +38,15 @@ def respond(
38
  ):
39
  global messages
40
 
 
 
 
 
41
  response = ""
42
  yield response
43
 
44
  generation_kwargs = dict(
45
- max_length=max_length,
46
  max_context_length=max_tokens,
47
  do_sample=temperature > 0,
48
  top_k=0,
@@ -55,43 +59,38 @@ def respond(
55
  if messages == []:
56
  messages = [chatglm_cpp.ChatMessage(role="system", content=system_message)]
57
 
58
- # for val in history:
59
- # if val[0]:
60
- # messages.append(chatglm_cpp.ChatMessage(role="user", content=val[0]))
61
- # if val[1]:
62
- # messages.append(chatglm_cpp.ChatMessage(role="assistant", content=val[0]))
63
-
64
  messages.append(chatglm_cpp.ChatMessage(role="user", content=message))
65
 
66
- chunks = []
67
-
68
  for chunk in pipeline.chat(messages, **generation_kwargs):
69
  response += chunk.content
70
- chunks.append(chunk)
71
  yield response
72
 
73
  messages.append(chatglm_cpp.ChatMessage(role="assistant", content=response))
74
 
 
 
 
 
 
 
 
 
75
 
76
- """
77
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
78
- """
79
- demo = gr.ChatInterface(
80
- respond,
81
- additional_inputs=[
82
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
83
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
84
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
85
- gr.Slider(
86
- minimum=0.1,
87
- maximum=1.0,
88
- value=0.95,
89
- step=0.05,
90
- label="Top-p (nucleus sampling)",
91
- ),
92
- ],
93
- )
94
-
95
 
96
  if __name__ == "__main__":
97
- demo.launch()
 
14
  next_indent = indent + (" " if i == len(items) - 1 else "│ ")
15
  list_files_tree(item_path, next_indent)
16
 
17
+ pipeline = None
 
 
 
18
 
19
+ def load(repo_id, filename):
20
+ global pipeline
21
+ local_dir = f"./Models/{repo_id}"
22
+ hf_hub_download(repo_id=repo_id, filename=filename, local_dir=local_dir)
23
+ model = os.path.join(local_dir, filename)
24
+ max_length = 8192
25
+ pipeline = chatglm_cpp.Pipeline(model, max_length=max_length)
26
+ return f"Model {filename} from {repo_id} loaded successfully."
27
+ load("None1145/ChatGLM3-6B-Theresa-GGML", "ChatGLM3-6B-Theresa-GGML-Q4_0.bin")
28
 
29
  messages = []
30
 
 
38
  ):
39
  global messages
40
 
41
+ if pipeline is None:
42
+ yield "Error: No model loaded. Please load a model first."
43
+ return
44
+
45
  response = ""
46
  yield response
47
 
48
  generation_kwargs = dict(
49
+ max_length=8192,
50
  max_context_length=max_tokens,
51
  do_sample=temperature > 0,
52
  top_k=0,
 
59
  if messages == []:
60
  messages = [chatglm_cpp.ChatMessage(role="system", content=system_message)]
61
 
 
 
 
 
 
 
62
  messages.append(chatglm_cpp.ChatMessage(role="user", content=message))
63
 
 
 
64
  for chunk in pipeline.chat(messages, **generation_kwargs):
65
  response += chunk.content
 
66
  yield response
67
 
68
  messages.append(chatglm_cpp.ChatMessage(role="assistant", content=response))
69
 
70
+ with gr.Blocks() as chat:
71
+ with gr.Row():
72
+ repo_id_input = gr.Textbox(label="Repo ID", value="None1145/ChatGLM3-6B-Theresa-GGML")
73
+ filename_input = gr.Textbox(label="Filename", value="ChatGLM3-6B-Theresa-GGML-Q4_0.bin")
74
+ load_button = gr.Button("Load Model")
75
+
76
+ load_status = gr.Textbox(label="Load Status", interactive=False)
77
+ load_button.click(load, inputs=[repo_id_input, filename_input], outputs=load_status)
78
 
79
+ chat_interface = gr.ChatInterface(
80
+ respond,
81
+ additional_inputs=[
82
+ gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
83
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
84
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
85
+ gr.Slider(
86
+ minimum=0.1,
87
+ maximum=1.0,
88
+ value=0.95,
89
+ step=0.05,
90
+ label="Top-p (nucleus sampling)",
91
+ ),
92
+ ],
93
+ )
 
 
 
 
94
 
95
  if __name__ == "__main__":
96
+ chat.launch()