Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -14,17 +14,17 @@ def list_files_tree(directory, indent=""):
|
|
14 |
next_indent = indent + (" " if i == len(items) - 1 else "│ ")
|
15 |
list_files_tree(item_path, next_indent)
|
16 |
|
17 |
-
|
18 |
-
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
|
19 |
-
"""
|
20 |
-
# client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
21 |
|
22 |
-
repo_id
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
28 |
|
29 |
messages = []
|
30 |
|
@@ -38,11 +38,15 @@ def respond(
|
|
38 |
):
|
39 |
global messages
|
40 |
|
|
|
|
|
|
|
|
|
41 |
response = ""
|
42 |
yield response
|
43 |
|
44 |
generation_kwargs = dict(
|
45 |
-
max_length=
|
46 |
max_context_length=max_tokens,
|
47 |
do_sample=temperature > 0,
|
48 |
top_k=0,
|
@@ -55,43 +59,38 @@ def respond(
|
|
55 |
if messages == []:
|
56 |
messages = [chatglm_cpp.ChatMessage(role="system", content=system_message)]
|
57 |
|
58 |
-
# for val in history:
|
59 |
-
# if val[0]:
|
60 |
-
# messages.append(chatglm_cpp.ChatMessage(role="user", content=val[0]))
|
61 |
-
# if val[1]:
|
62 |
-
# messages.append(chatglm_cpp.ChatMessage(role="assistant", content=val[0]))
|
63 |
-
|
64 |
messages.append(chatglm_cpp.ChatMessage(role="user", content=message))
|
65 |
|
66 |
-
chunks = []
|
67 |
-
|
68 |
for chunk in pipeline.chat(messages, **generation_kwargs):
|
69 |
response += chunk.content
|
70 |
-
chunks.append(chunk)
|
71 |
yield response
|
72 |
|
73 |
messages.append(chatglm_cpp.ChatMessage(role="assistant", content=response))
|
74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
),
|
92 |
-
],
|
93 |
-
)
|
94 |
-
|
95 |
|
96 |
if __name__ == "__main__":
|
97 |
-
|
|
|
14 |
next_indent = indent + (" " if i == len(items) - 1 else "│ ")
|
15 |
list_files_tree(item_path, next_indent)
|
16 |
|
17 |
+
pipeline = None
|
|
|
|
|
|
|
18 |
|
19 |
+
def load(repo_id, filename):
|
20 |
+
global pipeline
|
21 |
+
local_dir = f"./Models/{repo_id}"
|
22 |
+
hf_hub_download(repo_id=repo_id, filename=filename, local_dir=local_dir)
|
23 |
+
model = os.path.join(local_dir, filename)
|
24 |
+
max_length = 8192
|
25 |
+
pipeline = chatglm_cpp.Pipeline(model, max_length=max_length)
|
26 |
+
return f"Model {filename} from {repo_id} loaded successfully."
|
27 |
+
load("None1145/ChatGLM3-6B-Theresa-GGML", "ChatGLM3-6B-Theresa-GGML-Q4_0.bin")
|
28 |
|
29 |
messages = []
|
30 |
|
|
|
38 |
):
|
39 |
global messages
|
40 |
|
41 |
+
if pipeline is None:
|
42 |
+
yield "Error: No model loaded. Please load a model first."
|
43 |
+
return
|
44 |
+
|
45 |
response = ""
|
46 |
yield response
|
47 |
|
48 |
generation_kwargs = dict(
|
49 |
+
max_length=8192,
|
50 |
max_context_length=max_tokens,
|
51 |
do_sample=temperature > 0,
|
52 |
top_k=0,
|
|
|
59 |
if messages == []:
|
60 |
messages = [chatglm_cpp.ChatMessage(role="system", content=system_message)]
|
61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
messages.append(chatglm_cpp.ChatMessage(role="user", content=message))
|
63 |
|
|
|
|
|
64 |
for chunk in pipeline.chat(messages, **generation_kwargs):
|
65 |
response += chunk.content
|
|
|
66 |
yield response
|
67 |
|
68 |
messages.append(chatglm_cpp.ChatMessage(role="assistant", content=response))
|
69 |
|
70 |
+
with gr.Blocks() as chat:
|
71 |
+
with gr.Row():
|
72 |
+
repo_id_input = gr.Textbox(label="Repo ID", value="None1145/ChatGLM3-6B-Theresa-GGML")
|
73 |
+
filename_input = gr.Textbox(label="Filename", value="ChatGLM3-6B-Theresa-GGML-Q4_0.bin")
|
74 |
+
load_button = gr.Button("Load Model")
|
75 |
+
|
76 |
+
load_status = gr.Textbox(label="Load Status", interactive=False)
|
77 |
+
load_button.click(load, inputs=[repo_id_input, filename_input], outputs=load_status)
|
78 |
|
79 |
+
chat_interface = gr.ChatInterface(
|
80 |
+
respond,
|
81 |
+
additional_inputs=[
|
82 |
+
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
|
83 |
+
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
84 |
+
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
85 |
+
gr.Slider(
|
86 |
+
minimum=0.1,
|
87 |
+
maximum=1.0,
|
88 |
+
value=0.95,
|
89 |
+
step=0.05,
|
90 |
+
label="Top-p (nucleus sampling)",
|
91 |
+
),
|
92 |
+
],
|
93 |
+
)
|
|
|
|
|
|
|
|
|
94 |
|
95 |
if __name__ == "__main__":
|
96 |
+
chat.launch()
|