File size: 7,639 Bytes
cab2adc
 
2bff321
cab2adc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
778d736
cab2adc
 
 
 
 
 
 
 
 
 
7f19aac
cab2adc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f2f5944
7f19aac
cab2adc
 
 
 
7f19aac
cab2adc
 
 
 
 
 
 
 
 
 
 
01359d3
cab2adc
 
 
 
2bff321
01359d3
 
e527b61
01359d3
f38df31
01359d3
 
 
cab2adc
 
36bc0f1
cab2adc
 
 
f2f5944
 
cab2adc
 
 
f2f5944
 
 
 
cab2adc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36bc0f1
cab2adc
 
01359d3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
import gradio as gr
import yaml
from huggingface_hub import hf_hub_download
from huggingface_hub.utils import LocalEntryNotFoundError
from llama_cpp import Llama

with open("./config.yml", "r") as f:
    config = yaml.load(f, Loader=yaml.Loader)
while True:
    try:
        load_config = config.copy()
        hub_config = load_config["hub"].copy()
        repo_id = hub_config.pop("repo_id")
        filename = hub_config.pop("filename")
        fp = hf_hub_download(
            repo_id=repo_id, filename=filename, **hub_config
        )
        break
    except LocalEntryNotFoundError as e:
        if "Connection error" in str(e):
            print(str(e) + ", retrying...")
        else:
            raise(e)

llm = Llama(model_path=fp, **config["llama_cpp"])


def user(message, history):
    history = history or []
    # Append the user's message to the conversation history
    history.append([message, ""])
    return "", history

def chat(history, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty):
    history = history or []

    messages = system_message.strip() + "\n" + \
               "\n".join(["\n".join(["Kullanıcı: "+item[0], "Asistan: "+item[1]])
                        for item in history])

    # remove last space from assistant, some models output a ZWSP if you leave a space
    messages = messages[:-1]

    history[-1][1] = ""
    for output in llm(
            messages,
            echo=False,
            stream=True,
            max_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
            top_k=top_k,
            repeat_penalty=repeat_penalty,
            **config['chat']
    ):
        answer = output['choices'][0]['text']
        history[-1][1] += answer
        # stream the response
        yield history, history

def rp_chat(history, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty):
    history = history or []

    messages = "<|system|>" + system_message.strip() + "\n" + \
               "\n".join(["\n".join(["<|user|>"+item[0], "<|model|>"+item[1]])
                        for item in history])

    # remove last space from assistant, some models output a ZWSP if you leave a space
    messages = messages[:-1]

    history[-1][1] = ""
    for output in llm(
            messages,
            echo=False,
            stream=True,
            max_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
            top_k=top_k,
            repeat_penalty=repeat_penalty,
            **config['chat']
    ):
        answer = output['choices'][0]['text']
        history[-1][1] += answer
        # stream the response
        yield history, history


def clear_chat(chat_history_state, chat_message):
    chat_history_state = []
    chat_message = ''
    return chat_history_state, chat_message


start_message = """
- Akıllı, dürüst ve yardımsever bir asistansın.
- Her türlü soruya dürüstçe cevap vereceksin.
"""

def generate_text_instruct(input_text):
    response = ""
    for output in llm(f"Kullanıcı: {input_text}\nAsistan:",  echo=False, stream=True, **config['chat']):
        answer = output['choices'][0]['text']
        response += answer
        yield response


instruct_interface = gr.Interface(
    fn=generate_text_instruct,
    inputs=gr.inputs.Textbox(lines= 10, label="Enter your input text"),
    outputs=gr.outputs.Textbox(label="Output text"),
)


with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            gr.Markdown(f"""
                    ### This is the [{config["hub"]["repo_id"]}](https://huggingface.co/{config["hub"]["repo_id"]}) quantized model file [{config["hub"]["filename"]}](https://huggingface.co/{config["hub"]["repo_id"]}/blob/main/{config["hub"]["filename"]})
    
                    <details>
                        <summary><a href="https://huggingface.co/spaces/Nekochu/Luminia-13B-v3-GGUF?duplicate=true">Duplicate the Space</a> to skip the queue and run in a private space or to use your own GGUF models, simply update the <a href="https://huggingface.co/spaces/Nekochu/Luminia-13B-v3-GGUF/blob/main/config.yml">config.yml</a></summary>
                        <ul>
                            <li>This Space uses GGUF with CPU-<strong>FREE</strong>, GPU support, so it can quickly run larger models on smaller GPUs & VRAM. <a href="https://github.com/OpenAccess-AI-Collective/ggml-webui">[Contribute]</a></li>
                            <li>This is running on a smaller, shared GPU, so it may take a few seconds to respond.</li>
                        </ul>
                    </details>
                    """)
    with gr.Tab("Chatbot"):
        gr.Markdown("# GGUF Spaces Chatbot Demo")
        chatbot = gr.Chatbot()
        with gr.Row():
            message = gr.Textbox(
                label="Ne konuda konuşmak istersin?",
                placeholder="Bana bir şeyler sor.",
                lines=3,
            )
        with gr.Row():
            submit = gr.Button(value="Mesaj Gönder", variant="secondary").style(full_width=True)
            roleplay = gr.Button(value="Rol", variant="secondary").style(full_width=True)
            clear = gr.Button(value="Yeni Konu", variant="secondary").style(full_width=False)
            stop = gr.Button(value="Dur", variant="secondary").style(full_width=False)
        with gr.Row():
            with gr.Column():
                max_tokens = gr.Slider(20, 1000, label="Max Tokens", step=20, value=300)
                temperature = gr.Slider(0.2, 2.0, label="Temperature", step=0.1, value=0.8)
                top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.95)
                top_k = gr.Slider(0, 100, label="Top K", step=1, value=40)
                repeat_penalty = gr.Slider(0.0, 2.0, label="Repetition Penalty", step=0.1, value=1.1)

        system_msg = gr.Textbox(
            start_message, label="System Message", interactive=True, visible=True, placeholder="system prompt, useful for RP", lines=5)

        chat_history_state = gr.State()
        clear.click(clear_chat, inputs=[chat_history_state, message], outputs=[chat_history_state, message], queue=False)
        clear.click(lambda: None, None, chatbot, queue=False)

        submit_click_event = submit.click(
            fn=user, inputs=[message, chat_history_state], outputs=[message, chat_history_state], queue=True
        ).then(
            fn=chat, inputs=[chat_history_state, system_msg, max_tokens, temperature, top_p, top_k, repeat_penalty], outputs=[chatbot, chat_history_state], queue=True
        )
        roleplay_click_event = roleplay.click(
            fn=user, inputs=[message, chat_history_state], outputs=[message, chat_history_state], queue=True
        ).then(
            fn=rp_chat, inputs=[chat_history_state, system_msg, max_tokens, temperature, top_p, top_k, repeat_penalty], outputs=[chatbot, chat_history_state], queue=True
        )
        # message_submit_event = message.submit(
        #     fn=user, inputs=[message, chat_history_state], outputs=[message, chat_history_state], queue=True
        # ).then(
        #     fn=chat, inputs=[chat_history_state, system_msg, max_tokens, temperature, top_p, top_k, repeat_penalty], outputs=[chatbot, chat_history_state], queue=True
        # )
        stop.click(fn=None, inputs=None, outputs=None, cancels=[submit_click_event, roleplay_click_event], queue=False)
    with gr.Tab("Instruct"):
        gr.Markdown("# GGUF Spaces Instruct Demo")
        instruct_interface.render()

demo.queue(**config["queue"]).launch(debug=True, server_name="0.0.0.0", server_port=7860)