Spaces:
Sleeping
Sleeping
File size: 3,054 Bytes
85873b8 e49477a 85873b8 341f9f2 c904117 85873b8 c904117 79a34ec 36a1a00 c904117 85873b8 b221f27 38f57eb 85873b8 b221f27 633d2b3 b221f27 38f57eb 79a34ec 85873b8 b221f27 38f57eb 633d2b3 38f57eb 85873b8 79a34ec 85873b8 b221f27 85873b8 7245df1 f3d5429 b221f27 85873b8 9f5e025 79a34ec 85873b8 b221f27 79a34ec 633d2b3 85873b8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
import gradio as gr
import os
from huggingface_hub import InferenceClient
from huggingface_hub import hf_hub_download
import chatglm_cpp
def list_files_tree(directory, indent=""):
items = os.listdir(directory)
for i, item in enumerate(items):
prefix = "βββ " if i == len(items) - 1 else "βββ "
print(indent + prefix + item)
item_path = os.path.join(directory, item)
if os.path.isdir(item_path):
next_indent = indent + (" " if i == len(items) - 1 else "β ")
list_files_tree(item_path, next_indent)
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
# client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
repo_id = "None1145/ChatGLM3-6B-Theresa-GGML"
filename = "ChatGLM3-6B-Theresa-GGML-Q4_0.bin"
hf_hub_download(repo_id=repo_id, filename=filename, local_dir=f"./Models/{repo_id}")
model = f"./Models/{repo_id}/{filename}"
max_length = 8192
pipeline = chatglm_cpp.Pipeline(model, max_length=max_length)
messages = []
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
global messages
print(messages)
generation_kwargs = dict(
max_length=max_length,
max_context_length=max_tokens,
do_sample=temperature > 0,
top_k=0,
top_p=top_p,
temperature=temperature,
repetition_penalty=1.0,
stream=True,
)
if messages == []:
messages = [chatglm_cpp.ChatMessage(role="system", content=system_message)]
print(messages)
# for val in history:
# if val[0]:
# messages.append(chatglm_cpp.ChatMessage(role="user", content=val[0]))
# if val[1]:
# messages.append(chatglm_cpp.ChatMessage(role="assistant", content=val[0]))
messages.append(chatglm_cpp.ChatMessage(role="user", content=message))
print(messages)
response = ""
yield response
chunks = []
yield response
for chunk in pipeline.chat(messages, **generation_kwargs):
response += chunk.content
chunks.append(chunk)
yield response
yield response
messages.append(chatglm_cpp.ChatMessage(role="assistant", content=response))
print(messages)
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
if __name__ == "__main__":
demo.launch()
|