aixsatoshi commited on
Commit
9e85760
1 Parent(s): f51457e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +149 -121
app.py CHANGED
@@ -11,18 +11,44 @@ from huggingface_hub import hf_hub_download
11
 
12
  # モデルのダウンロード
13
  hf_hub_download(
14
- repo_id="bartowski/gemma-2-9b-it-GGUF",
15
- filename="gemma-2-9b-it-Q5_K_M.gguf",
16
- local_dir="./models"
17
  )
18
 
19
  hf_hub_download(
20
- repo_id="bartowski/Gemma-2-9B-It-SPPO-Iter3-GGUF",
21
- filename="Gemma-2-9B-It-SPPO-Iter3-Q5_K_M.gguf",
22
- local_dir="./models"
23
  )
24
 
25
- # 推論関数
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  @spaces.GPU(duration=120)
27
  def respond(
28
  message,
@@ -34,25 +60,32 @@ def respond(
34
  top_p,
35
  top_k,
36
  repeat_penalty,
37
- ):
38
- chat_template = MessagesFormatterType.GEMMA_2
 
 
 
 
39
 
40
- llm = Llama(
41
- model_path=f"models/{model}",
42
- flash_attn=True,
43
- n_gpu_layers=81,
44
- n_batch=1024,
45
- n_ctx=8192,
46
- )
 
 
 
47
  provider = LlamaCppPythonProvider(llm)
48
-
49
  agent = LlamaCppAgent(
50
- provider,
51
- system_prompt=f"{system_message}",
52
- predefined_messages_formatter_type=chat_template,
53
- debug_output=True
54
  )
55
-
56
  settings = provider.get_provider_default_settings()
57
  settings.temperature = temperature
58
  settings.top_k = top_k
@@ -60,112 +93,107 @@ def respond(
60
  settings.max_tokens = max_tokens
61
  settings.repeat_penalty = repeat_penalty
62
  settings.stream = True
63
-
64
  messages = BasicChatHistory()
65
-
66
- one_shot_prompt = """
67
- あなたは優れた翻訳者です。以下の文章を日本語から英語に翻訳してください。翻訳は文法的に正しく、自然な表現を使用し、文脈に適した内容にしてください。また、専門用語や文化的なニュアンスを正確に伝えるよう心がけてください。
68
-
69
- 期待する品質基準:
70
- 1. 文法的に正確であること。
71
- 2. 自然な英語表現を使用すること。
72
- 3. 文脈に適した訳を提供すること。
73
- 4. 専門用語や文化的なニュアンスを正確に伝えること。
74
-
75
- 以下に翻訳の例を示します。
76
-
77
- 例:
78
- 日本語: 優れた翻訳は、原文の意味を正確に伝えるだけでなく、読み手にとって自然な表現である必要があります。文法的な正確さはもちろんのこと、文化的なニュアンスも重要です。専門用語の正確な訳も求められます。さらに、文脈に適した訳を提供することが、翻訳の品質を高めます。最終的には、読み手にとってわかりやすい訳を目指してください。
79
- 英語: A good translation should not only convey the meaning of the original text accurately but also be expressed in a natural way for the reader. In addition to grammatical accuracy, cultural nuances are important. Accurate translation of technical terms is also required. Furthermore, providing a translation that fits the context enhances the quality of the translation. Ultimately, aim for a translation that is easy for the reader to understand.
80
-
81
- 以下の日本語の文章を英語に翻訳してください:
82
- """
83
- system_message += one_shot_prompt
84
-
85
  for msn in history:
86
- user = {
87
- 'role': Roles.user,
88
- 'content': msn[0]
89
- }
90
- assistant = {
91
- 'role': Roles.assistant,
92
- 'content': msn[1]
93
- }
94
- messages.add_message(user)
95
- messages.add_message(assistant)
96
 
97
  stream = agent.get_chat_response(
98
- message,
99
- llm_sampling_settings=settings,
100
- chat_history=messages,
101
- returns_streaming_generator=True,
102
- print_output=False
103
  )
104
 
105
  outputs = ""
106
  for output in stream:
107
- outputs += output
108
- yield outputs
109
-
110
- # Gradioのインターフェースを作成
111
- def create_interface(model_name, description):
112
- return gr.ChatInterface(
113
- respond,
114
- additional_inputs=[
115
- gr.Textbox(value=model_name, label="Model", interactive=False),
116
- gr.Textbox(value="You are a helpful assistant.", label="System message"),
117
- gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"),
118
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
119
- gr.Slider(
120
- minimum=0.1,
121
- maximum=1.0,
122
- value=0.95,
123
- step=0.05,
124
- label="Top-p",
125
- ),
126
- gr.Slider(
127
- minimum=0,
128
- maximum=100,
129
- value=40,
130
- step=1,
131
- label="Top-k",
132
- ),
133
- gr.Slider(
134
- minimum=0.0,
135
- maximum=2.0,
136
- value=1.1,
137
- step=0.1,
138
- label="Repetition penalty",
139
- ),
140
- ],
141
- retry_btn="Retry",
142
- undo_btn="Undo",
143
- clear_btn="Clear",
144
- submit_btn="Send",
145
- title=f"Chat with Gemma 2 using llama.cpp - {model_name}",
146
- description=description,
147
- chatbot=gr.Chatbot(
148
- scale=1,
149
- likeable=False,
150
- show_copy_button=True
151
- )
152
- )
153
-
154
- # 各モデルのインターフェース
155
- description_9b = """<p align="center">Gemma-2 9B it Model</p>"""
156
- description_27b = """<p align="center">Gemma-2 9B SPPO it Model</p>"""
157
-
158
- interface_9b = create_interface('gemma-2-9b-it-Q5_K_M.gguf', description_9b)
159
- interface_27b = create_interface('Gemma-2-9B-It-SPPO-Iter3-Q5_K_M.gguf', description_27b)
160
-
161
- # Gradio Blocksで2つのインターフェースを並べて表示
162
- with gr.Blocks() as demo:
163
- #gr.Markdown("# Compare Gemma-2 9B and 27B Models")
164
- with gr.Row():
165
- with gr.Column():
166
- interface_9b.render()
167
- with gr.Column():
168
- interface_27b.render()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
 
170
  if __name__ == "__main__":
171
- demo.launch()
 
11
 
12
  # モデルのダウンロード
13
  hf_hub_download(
14
+ repo_id="Aratako/Oumuamua-7b-RP-GGUF",
15
+ filename="Oumuamua-7b-RP_Q4_K_M.gguf",
16
+ local_dir="./models"
17
  )
18
 
19
  hf_hub_download(
20
+ repo_id="bartowski/Oumuamua-7b-instruct-v2-GGUF",
21
+ filename="Oumuamua-7b-instruct-v2-Q4_K_M.gguf",
22
+ local_dir="./models"
23
  )
24
 
25
+ hf_hub_download(
26
+ repo_id="mmnga/umiyuki-Umievo-itr012-Gleipnir-7B-gguf",
27
+ filename="umiyuki-Umievo-itr012-Gleipnir-7B-Q4_K_M.gguf",
28
+ local_dir="./models"
29
+ )
30
+
31
+ hf_hub_download(
32
+ repo_id="Local-Novel-LLM-project/Ninja-V3-GGUF",
33
+ filename="Ninja-V3-Q4_K_M.gguf",
34
+ local_dir="./models"
35
+ )
36
+
37
+ hf_hub_download(
38
+ repo_id="Local-Novel-LLM-project/Kagemusya-7B-v1-GGUF",
39
+ filename="kagemusya-7b-v1Q8_0.gguf",
40
+ local_dir="./models"
41
+ )
42
+
43
+ hf_hub_download(
44
+ repo_id="elyza/Llama-3-ELYZA-JP-8B-GGUF",
45
+ filename="Llama-3-ELYZA-JP-8B-q4_k_m.gguf",
46
+ local_dir="./models"
47
+ )
48
+
49
+ llm = None
50
+ llm_model = None
51
+
52
  @spaces.GPU(duration=120)
53
  def respond(
54
  message,
 
60
  top_p,
61
  top_k,
62
  repeat_penalty,
63
+ template,
64
+ ):
65
+ chat_template = MessagesFormatterType[template]
66
+
67
+ global llm
68
+ global llm_model
69
 
70
+ if llm is None or llm_model != model:
71
+ llm = Llama(
72
+ model_path=f"models/{model}",
73
+ flash_attn=True,
74
+ n_gpu_layers=81,
75
+ n_batch=1024,
76
+ n_ctx=8192,
77
+ )
78
+ llm_model = model
79
+
80
  provider = LlamaCppPythonProvider(llm)
81
+
82
  agent = LlamaCppAgent(
83
+ provider,
84
+ system_prompt=f"{system_message}",
85
+ predefined_messages_formatter_type=chat_template,
86
+ debug_output=True
87
  )
88
+
89
  settings = provider.get_provider_default_settings()
90
  settings.temperature = temperature
91
  settings.top_k = top_k
 
93
  settings.max_tokens = max_tokens
94
  settings.repeat_penalty = repeat_penalty
95
  settings.stream = True
96
+
97
  messages = BasicChatHistory()
98
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  for msn in history:
100
+ user = {
101
+ 'role': Roles.user,
102
+ 'content': msn[0]
103
+ }
104
+ assistant = {
105
+ 'role': Roles.assistant,
106
+ 'content': msn[1]
107
+ }
108
+ messages.add_message(user)
109
+ messages.add_message(assistant)
110
 
111
  stream = agent.get_chat_response(
112
+ message,
113
+ llm_sampling_settings=settings,
114
+ chat_history=messages,
115
+ returns_streaming_generator=True,
116
+ print_output=False
117
  )
118
 
119
  outputs = ""
120
  for output in stream:
121
+ outputs += output
122
+ yield outputs
123
+
124
+ description = """<p align="center">Defaults to Oumuamua-7b-RP (you can switch to other models from additional inputs)</p>
125
+ <p><center>
126
+ <a href="https://huggingface.co/Aratako/Oumuamua-7b-RP-GGUF" target="_blank">[Oumuamua-7b-RP Model]</a>
127
+ <a href="https://huggingface.co/bartowski/Oumuamua-7b-instruct-v2-GGUF" target="_blank">[Oumuamua-7b-instruct-v2 Model]</a>
128
+ <a href="https://huggingface.co/mmnga/umiyuki-Umievo-itr012-Gleipnir-7B-gguf" target="_blank">[Umievo-itr012-Gleipnir-7B Model]</a>
129
+ <a href="https://huggingface.co/Local-Novel-LLM-project/Ninja-V3-GGUF" target="_blank">[Ninja-V3 Model]</a>
130
+ <a href="https://huggingface.co/Local-Novel-LLM-project/Kagemusya-7B-v1-GGUF" target="_blank">[Kagemusya-7B-v1 Model]</a>
131
+ <a href="https://huggingface.co/elyza/Llama-3-ELYZA-JP-8B-GGUF" target="_blank">[Llama-3-ELYZA-JP-8B Model]</a>
132
+ </center></p>
133
+ """
134
+
135
+ templates = [
136
+ "MISTRAL", "CHATML", "VICUNA", "LLAMA_2", "SYNTHIA",
137
+ "NEURAL_CHAT", "SOLAR", "OPEN_CHAT", "ALPACA", "CODE_DS",
138
+ "B22", "LLAMA_3", "PHI_3"
139
+ ]
140
+
141
+ demo = gr.ChatInterface(
142
+ respond,
143
+ additional_inputs=[
144
+ gr.Dropdown([
145
+ 'Oumuamua-7b-RP_Q4_K_M.gguf',
146
+ 'Oumuamua-7b-instruct-v2-Q4_K_M.gguf',
147
+ 'umiyuki-Umievo-itr012-Gleipnir-7B-Q4_K_M.gguf',
148
+ 'Ninja-V3-Q4_K_M.gguf',
149
+ 'kagemusya-7b-v1Q8_0.gguf',
150
+ 'Llama-3-ELYZA-JP-8B-q4_k_m.gguf'
151
+ ],
152
+ value="Oumuamua-7b-RP_Q4_K_M.gguf",
153
+ label="Model"
154
+ ),
155
+ gr.Textbox(value="You are a helpful assistant.", label="System message"),
156
+ gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"),
157
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
158
+ gr.Slider(
159
+ minimum=0.1,
160
+ maximum=1.0,
161
+ value=0.95,
162
+ step=0.05,
163
+ label="Top-p",
164
+ ),
165
+ gr.Slider(
166
+ minimum=0,
167
+ maximum=100,
168
+ value=40,
169
+ step=1,
170
+ label="Top-k",
171
+ ),
172
+ gr.Slider(
173
+ minimum=0.0,
174
+ maximum=2.0,
175
+ value=1.1,
176
+ step=0.1,
177
+ label="Repetition penalty",
178
+ ),
179
+ gr.Dropdown(
180
+ choices=templates,
181
+ value="LLAMA_3",
182
+ label="Template"
183
+ ),
184
+ ],
185
+ retry_btn="Retry",
186
+ undo_btn="Undo",
187
+ clear_btn="Clear",
188
+ submit_btn="Send",
189
+ title="Chat with various models using llama.cpp",
190
+ description=description,
191
+ chatbot=gr.Chatbot(
192
+ scale=1,
193
+ likeable=False,
194
+ show_copy_button=True
195
+ )
196
+ )
197
 
198
  if __name__ == "__main__":
199
+ demo.launch()