Spaces:
Sleeping
Sleeping
File size: 2,743 Bytes
85873b8 e49477a 0afeba3 85873b8 341f9f2 c904117 7855bae c904117 7855bae 85873b8 b221f27 38f57eb 85873b8 b221f27 633d2b3 7855bae 68e035f 0afeba3 579d4ba 38f57eb 79a34ec 7855bae 79a34ec 85873b8 b221f27 38f57eb 79a34ec e012ad3 9f5e025 79a34ec 1b493cd 79a34ec 7855bae e3591cc 7855bae 85873b8 7855bae 85873b8 7855bae |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import gradio as gr
import os
import time
from huggingface_hub import InferenceClient
from huggingface_hub import hf_hub_download
import chatglm_cpp
pipeline = None
def load(repo_id, filename):
global pipeline
local_dir = f"./Models/{repo_id}"
hf_hub_download(repo_id=repo_id, filename=filename, local_dir=local_dir)
model = os.path.join(local_dir, filename)
max_length = 8192
pipeline = chatglm_cpp.Pipeline(model, max_length=max_length)
return f"Model {filename} from {repo_id} loaded successfully."
load("None1145/ChatGLM3-6B-Theresa-GGML", "ChatGLM3-6B-Theresa-GGML-Q4_0.bin")
messages = []
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
global messages
if pipeline is None:
yield "Error: No model loaded. Please load a model first."
return
response = "..."
for _ in range(0, 3):
yield response
time.sleep(1)
response += " ..."
generation_kwargs = dict(
max_length=8192,
max_context_length=max_tokens,
do_sample=temperature > 0,
top_k=0,
top_p=top_p,
temperature=temperature,
repetition_penalty=1.0,
stream=True,
)
if messages == []:
messages = [chatglm_cpp.ChatMessage(role="system", content=system_message)]
messages.append(chatglm_cpp.ChatMessage(role="user", content=message))
response = ""
for chunk in pipeline.chat(messages, **generation_kwargs):
response += chunk.content
yield response
messages.append(chatglm_cpp.ChatMessage(role="assistant", content=response))
with gr.Blocks() as chat:
with gr.Row():
repo_id_input = gr.Textbox(label="Repo ID", value="None1145/ChatGLM3-6B-Theresa-GGML")
filename_input = gr.Textbox(label="Filename", value="ChatGLM3-6B-Theresa-GGML-Q4_0.bin")
load_button = gr.Button("Load Model")
load_status = gr.Textbox(label="Load Status", interactive=False)
load_button.click(load, inputs=[repo_id_input, filename_input], outputs=load_status)
chat_interface = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
if __name__ == "__main__":
chat.launch()
|