File size: 3,000 Bytes
85873b8
e49477a
85873b8
341f9f2
c904117
 
 
 
 
 
 
 
 
 
 
85873b8
 
 
 
c904117
 
 
 
79a34ec
 
36a1a00
c904117
85873b8
b3068c3
38f57eb
85873b8
 
 
 
 
 
 
 
b3068c3
633d2b3
b3068c3
38f57eb
79a34ec
 
 
 
 
 
 
 
 
 
85873b8
b3068c3
 
38f57eb
633d2b3
 
38f57eb
 
 
 
 
85873b8
79a34ec
85873b8
 
7245df1
f3d5429
85873b8
9f5e025
79a34ec
 
85873b8
 
79a34ec
 
633d2b3
 
85873b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import gradio as gr
import os
from huggingface_hub import InferenceClient
from huggingface_hub import hf_hub_download
import chatglm_cpp

def list_files_tree(directory, indent=""):
    items = os.listdir(directory)
    for i, item in enumerate(items):
        prefix = "└── " if i == len(items) - 1 else "β”œβ”€β”€ "
        print(indent + prefix + item)
        item_path = os.path.join(directory, item)
        if os.path.isdir(item_path):
            next_indent = indent + ("    " if i == len(items) - 1 else "β”‚   ")
            list_files_tree(item_path, next_indent)

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
# client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

repo_id = "None1145/ChatGLM3-6B-Theresa-GGML"
filename = "ChatGLM3-6B-Theresa-GGML-Q4_0.bin"
hf_hub_download(repo_id=repo_id, filename=filename, local_dir=f"./Models/{repo_id}")
model = f"./Models/{repo_id}/{filename}"
max_length = 8192
pipeline = chatglm_cpp.Pipeline(model, max_length=max_length)

# messages = []

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    # global messages

    # print(messages)
    
    generation_kwargs = dict(
        max_length=max_length,
        max_context_length=max_tokens,
        do_sample=temperature > 0,
        top_k=0,
        top_p=top_p,
        temperature=temperature,
        repetition_penalty=1.0,
        stream=True,
    )

    # if messages == []:
    messages = [chatglm_cpp.ChatMessage(role="system", content=system_message)]

    print(messages)

    # for val in history:
    #     if val[0]:
    #         messages.append(chatglm_cpp.ChatMessage(role="user", content=val[0]))
    #     if val[1]:
    #         messages.append(chatglm_cpp.ChatMessage(role="assistant", content=val[0]))

    messages.append(chatglm_cpp.ChatMessage(role="user", content=message))

    response = ""
    yield response
    chunks = []

    for chunk in pipeline.chat(messages, **generation_kwargs):
        response += chunk.content
        chunks.append(chunk)
        yield response

    messages.append(chatglm_cpp.ChatMessage(role="assistant", content=response))

    print(messages)


"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
)


if __name__ == "__main__":
    demo.launch()