File size: 3,054 Bytes
85873b8
e49477a
85873b8
341f9f2
c904117
 
 
 
 
 
 
 
 
 
 
85873b8
 
 
 
c904117
 
 
 
79a34ec
 
36a1a00
c904117
85873b8
b221f27
38f57eb
85873b8
 
 
 
 
 
 
 
b221f27
633d2b3
b221f27
38f57eb
79a34ec
 
 
 
 
 
 
 
 
 
85873b8
b221f27
 
38f57eb
633d2b3
 
38f57eb
 
 
 
 
85873b8
79a34ec
85873b8
b221f27
 
85873b8
7245df1
f3d5429
b221f27
85873b8
9f5e025
79a34ec
 
85873b8
b221f27
79a34ec
 
633d2b3
 
85873b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import gradio as gr
import os
from huggingface_hub import InferenceClient
from huggingface_hub import hf_hub_download
import chatglm_cpp

def list_files_tree(directory, indent=""):
    items = os.listdir(directory)
    for i, item in enumerate(items):
        prefix = "└── " if i == len(items) - 1 else "β”œβ”€β”€ "
        print(indent + prefix + item)
        item_path = os.path.join(directory, item)
        if os.path.isdir(item_path):
            next_indent = indent + ("    " if i == len(items) - 1 else "β”‚   ")
            list_files_tree(item_path, next_indent)

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
# client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

repo_id = "None1145/ChatGLM3-6B-Theresa-GGML"
filename = "ChatGLM3-6B-Theresa-GGML-Q4_0.bin"
hf_hub_download(repo_id=repo_id, filename=filename, local_dir=f"./Models/{repo_id}")
model = f"./Models/{repo_id}/{filename}"
max_length = 8192
pipeline = chatglm_cpp.Pipeline(model, max_length=max_length)

messages = []

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    global messages

    print(messages)
    
    generation_kwargs = dict(
        max_length=max_length,
        max_context_length=max_tokens,
        do_sample=temperature > 0,
        top_k=0,
        top_p=top_p,
        temperature=temperature,
        repetition_penalty=1.0,
        stream=True,
    )

    if messages == []:
        messages = [chatglm_cpp.ChatMessage(role="system", content=system_message)]

    print(messages)

    # for val in history:
    #     if val[0]:
    #         messages.append(chatglm_cpp.ChatMessage(role="user", content=val[0]))
    #     if val[1]:
    #         messages.append(chatglm_cpp.ChatMessage(role="assistant", content=val[0]))

    messages.append(chatglm_cpp.ChatMessage(role="user", content=message))

    print(messages)

    response = ""
    yield response
    chunks = []
    yield response

    for chunk in pipeline.chat(messages, **generation_kwargs):
        response += chunk.content
        chunks.append(chunk)
        yield response
    yield response
    messages.append(chatglm_cpp.ChatMessage(role="assistant", content=response))

    print(messages)


"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
)


if __name__ == "__main__":
    demo.launch()