File size: 5,923 Bytes
4b1a870
 
 
 
 
 
 
 
 
ad15fea
 
4b1a870
 
571bf3a
4b1a870
7e885fe
 
9e85760
 
 
26d0948
 
 
 
 
 
 
 
 
 
 
 
 
c2a4e8b
9e85760
 
 
0699d9c
 
 
 
 
 
 
 
 
 
 
 
 
9838fcd
a4a5e65
f51457e
 
 
ab2c4ae
f51457e
 
 
 
 
 
9e85760
a4a5e65
9e85760
 
 
f51457e
9e85760
 
 
 
 
 
 
 
 
 
f51457e
9e85760
f51457e
9e85760
 
 
 
f51457e
9e85760
f51457e
 
 
 
 
 
 
9e85760
f51457e
9e85760
f51457e
9e85760
 
 
 
 
 
 
 
 
 
f51457e
 
9e85760
 
 
 
 
f51457e
 
 
a4a5e65
 
 
9e85760
cbc7628
fa23caf
9e85760
7e885fe
26d0948
 
9e85760
 
 
 
 
 
 
 
 
dff70a0
 
 
 
26d0948
 
 
dff70a0
7e885fe
dff70a0
 
 
 
7e885fe
dff70a0
 
548d242
dff70a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ad15fea
e8f00c3
dff70a0
 
 
 
 
 
 
 
 
 
879bccf
dff70a0
cfa5ed1
879bccf
c0010d0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
import spaces
import json
import subprocess
from llama_cpp import Llama
from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
from llama_cpp_agent.providers import LlamaCppPythonProvider
from llama_cpp_agent.chat_history import BasicChatHistory
from llama_cpp_agent.chat_history.messages import Roles
import gradio as gr
import random
from datasets import load_dataset
from huggingface_hub import hf_hub_download

# モデルのダウンロード
hf_hub_download(
    repo_id="team-hatakeyama-phase2/Tanuki-8x8B-dpo-v1.0-GGUF",
    filename="Tanuki-8x8B-dpo-v1.0-IQ4_NL.gguf",
    local_dir="./models"
)

hf_hub_download(
    repo_id="team-hatakeyama-phase2/Tanuki-8x8B-dpo-v1.0-GGUF",
    filename="Tanuki-8x8B-dpo-v1.0-Q6_K.gguf",
    local_dir="./models"
)

hf_hub_download(
    repo_id="team-hatakeyama-phase2/Tanuki-8x8B-dpo-v1.0-GGUF",
    filename="Tanuki-8x8B-dpo-v1.0-IQ3_M.gguf",
    local_dir="./models"
)



llm = None
llm_model = None

# データセットをロードしてスプリットを確認
dataset = load_dataset("elyza/ELYZA-tasks-100")
print(dataset)

# 使用するスプリット名を確認
split_name = "train" if "train" in dataset else "test"  # デフォルトをtrainにし、なければtestにフォールバック

# 適切なスプリットから10個の例を取得
examples_list = list(dataset[split_name])  # スプリットをリストに変換
examples = random.sample(examples_list, 10)  # リストからランダムに10個選択
example_inputs = [[example['input']] for example in examples]  # ネストされたリストに変換


@spaces.GPU(duration=120)
def respond(
    message,
    history: list[tuple[str, str]],
    model,
    template,
    system_message,
    max_tokens,
    temperature,
    top_p,
    top_k,
    repeat_penalty,
):
    chat_template = MessagesFormatterType[template]

    global llm
    global llm_model
    
    if llm is None or llm_model != model:
        llm = Llama(
            model_path=f"models/{model}",
            flash_attn=True,
            n_gpu_layers=81,
            n_batch=1024,
            n_ctx=8192,
        )
        llm_model = model

    provider = LlamaCppPythonProvider(llm)

    agent = LlamaCppAgent(
        provider,
        system_prompt=f"{system_message}",
        predefined_messages_formatter_type=chat_template,
        debug_output=True
    )
    
    settings = provider.get_provider_default_settings()
    settings.temperature = temperature
    settings.top_k = top_k
    settings.top_p = top_p
    settings.max_tokens = max_tokens
    settings.repeat_penalty = repeat_penalty
    settings.stream = True

    messages = BasicChatHistory()

    for msn in history:
        user = {
            'role': Roles.user,
            'content': msn[0]
        }
        assistant = {
            'role': Roles.assistant,
            'content': msn[1]
        }
        messages.add_message(user)
        messages.add_message(assistant)
    
    stream = agent.get_chat_response(
        message,
        llm_sampling_settings=settings,
        chat_history=messages,
        returns_streaming_generator=True,
        print_output=False
    )
    
    outputs = ""
    for output in stream:
        outputs += output
        yield outputs

description = """<p align="center">★画面下のAdditional Inputから、使用したいモデルと、チャットテンプレートを選択してください。★</p>
<p align="center">★使用時の注意:現在GGUF版は著明な性能低下が確認されていますので、本来の性能が発揮されていません。本来の性能は公式推奨の推論方法ご利用ください★</p>
<p><center>
<a href="https://huggingface.co/team-hatakeyama-phase2/Tanuki-8x8B-dpo-v1.0-GGUF/" target="_blank">Tanuki-8x8B-dpo-v1.0-IQ4_NL.gguf</a><br>
<a href="https://huggingface.co/team-hatakeyama-phase2/Tanuki-8x8B-dpo-v1.0-GGUF/" target="_blank">Tanuki-8x8B-dpo-v1.0-Q6_K.gguf</a><br>
<a href="https://huggingface.co/team-hatakeyama-phase2/Tanuki-8x8B-dpo-v1.0-GGUF/" target="_blank">Tanuki-8x8B-dpo-v1.0-IQ3_M.gguf</a><br>
</center></p>
"""

templates = [
    "MISTRAL", "CHATML", "VICUNA", "LLAMA_2", "SYNTHIA",
    "NEURAL_CHAT", "SOLAR", "OPEN_CHAT", "ALPACA", "CODE_DS",
    "B22", "LLAMA_3", "PHI_3"
]

demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Dropdown([
                'Tanuki-8x8B-dpo-v1.0-IQ4_NL.gguf',
                'Tanuki-8x8B-dpo-v1.0-Q6_K.gguf',
                'Tanuki-8x8B-dpo-v1.0-IQ3_M.gguf'
            ],
            value="Tanuki-8x8B-dpo-v1.0-IQ4_NL.gguf",
            label="Model"
        ),
        gr.Dropdown(
            choices=templates,
            value="ALPACA",
            label="Template"
        ),
        gr.Textbox(value="以下は、タスクを説明する指示です。要求を適切に満たす応答を書きなさい。", label="System message"),
        gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p",
        ),
        gr.Slider(
            minimum=0,
            maximum=100,
            value=40,
            step=1,
            label="Top-k",
        ),
        gr.Slider(
            minimum=0.0,
            maximum=2.0,
            value=1.1,
            step=0.1,
            label="Repetition penalty",
        ),
    ],
    examples=example_inputs,
    cache_examples=False,
    retry_btn="Retry",
    undo_btn="Undo",
    clear_btn="Clear",
    submit_btn="Send",
    title="Chat with various models using llama.cpp", 
    description=description,
    chatbot=gr.Chatbot(
        scale=1, 
        likeable=False,
        show_copy_button=True
    )
)

if __name__ == "__main__":
    demo.launch()