Spaces:
Sleeping
Sleeping
File size: 2,946 Bytes
85873b8 e49477a 85873b8 341f9f2 c904117 85873b8 c904117 79a34ec 36a1a00 c904117 85873b8 b221f27 38f57eb 85873b8 b221f27 633d2b3 1b493cd 38f57eb 79a34ec 85873b8 b221f27 38f57eb 85873b8 79a34ec 1b493cd f3d5429 85873b8 9f5e025 79a34ec 1b493cd 79a34ec 85873b8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
import gradio as gr
import os
from huggingface_hub import InferenceClient
from huggingface_hub import hf_hub_download
import chatglm_cpp
def list_files_tree(directory, indent=""):
items = os.listdir(directory)
for i, item in enumerate(items):
prefix = "βββ " if i == len(items) - 1 else "βββ "
print(indent + prefix + item)
item_path = os.path.join(directory, item)
if os.path.isdir(item_path):
next_indent = indent + (" " if i == len(items) - 1 else "β ")
list_files_tree(item_path, next_indent)
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
# client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
repo_id = "None1145/ChatGLM3-6B-Theresa-GGML"
filename = "ChatGLM3-6B-Theresa-GGML-Q4_0.bin"
hf_hub_download(repo_id=repo_id, filename=filename, local_dir=f"./Models/{repo_id}")
model = f"./Models/{repo_id}/{filename}"
max_length = 8192
pipeline = chatglm_cpp.Pipeline(model, max_length=max_length)
messages = []
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
global messages
response = ""
yield response
generation_kwargs = dict(
max_length=max_length,
max_context_length=max_tokens,
do_sample=temperature > 0,
top_k=0,
top_p=top_p,
temperature=temperature,
repetition_penalty=1.0,
stream=True,
)
if messages == []:
messages = [chatglm_cpp.ChatMessage(role="system", content=system_message)]
# for val in history:
# if val[0]:
# messages.append(chatglm_cpp.ChatMessage(role="user", content=val[0]))
# if val[1]:
# messages.append(chatglm_cpp.ChatMessage(role="assistant", content=val[0]))
messages.append(chatglm_cpp.ChatMessage(role="user", content=message))
chunks = []
for chunk in pipeline.chat(messages, **generation_kwargs):
response += chunk.content
chunks.append(chunk)
yield response
messages.append(chatglm_cpp.ChatMessage(role="assistant", content=response))
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
if __name__ == "__main__":
demo.launch()
|