aixsatoshi commited on
Commit
94a3252
1 Parent(s): 821dc90

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -129
app.py CHANGED
@@ -22,49 +22,47 @@ hf_hub_download(
22
  local_dir="./models"
23
  )
24
 
25
-
26
  # 推論関数
27
  @spaces.GPU(duration=120)
28
  def respond(
29
- message,
30
- history: list[tuple[str, str]],
31
- model,
32
- system_message,
33
- max_tokens,
34
- temperature,
35
- top_p,
36
- top_k,
37
- repeat_penalty,
38
  ):
39
- chat_template = MessagesFormatterType.GEMMA_2
40
-
41
- llm = Llama(
42
- model_path=f"models/{model}",
43
- flash_attn=True,
44
- n_gpu_layers=81,
45
- n_batch=1024,
46
- n_ctx=8192,
47
- )
48
- provider = LlamaCppPythonProvider(llm)
49
-
50
- agent = LlamaCppAgent(
51
- provider,
52
- system_prompt=f"{system_message}",
53
- predefined_messages_formatter_type=chat_template,
54
- debug_output=True
55
- )
56
-
57
- settings = provider.get_provider_default_settings()
58
- settings.temperature = temperature
59
- settings.top_k = top_k
60
- settings.top_p = top_p
61
- settings.max_tokens = max_tokens
62
- settings.repeat_penalty = repeat_penalty
63
- settings.stream = True
64
-
65
- messages = BasicChatHistory()
66
-
67
- # 1ショットプロンプトを追加
68
  one_shot_prompt = """
69
  あなたは優れた翻訳者です。以下の文章を日本語から英語に翻訳してください。翻訳は文法的に正しく、自然な表現を使用し、文脈に適した内容にしてください。また、専門用語や文化的なニュアンスを正確に伝えるよう心がけてください。
70
 
@@ -80,108 +78,95 @@ def respond(
80
  日本語: 優れた翻訳は、原文の意味を正確に伝えるだけでなく、読み手にとって自然な表現である必要があります。文法的な正確さはもちろんのこと、文化的なニュアンスも重要です。専門用語の正確な訳も求められます。さらに、文脈に適した訳を提供することが、翻訳の品質を高めます。最終的には、読み手にとってわかりやすい訳を目指してください。
81
  英語: A good translation should not only convey the meaning of the original text accurately but also be expressed in a natural way for the reader. In addition to grammatical accuracy, cultural nuances are important. Accurate translation of technical terms is also required. Furthermore, providing a translation that fits the context enhances the quality of the translation. Ultimately, aim for a translation that is easy for the reader to understand.
82
 
 
83
  """
84
-
85
-
86
- system_message = one_shot_prompt + system_message
87
-
88
- for msn in history:
89
- user = {
90
- 'role': Roles.user,
91
- 'content': msn[0]
92
- }
93
- assistant = {
94
- 'role': Roles.assistant,
95
- 'content': msn[1]
96
- }
97
- messages.add_message(user)
98
- messages.add_message(assistant)
99
-
100
- stream = agent.get_chat_response(
101
- message,
102
- llm_sampling_settings=settings,
103
- chat_history=messages,
104
- returns_streaming_generator=True,
105
- print_output=False
106
- )
107
-
108
- outputs = ""
109
- for output in stream:
110
- outputs += output
111
- yield [(message, outputs)]
112
 
113
  # Gradioのインターフェースを作成
114
  def create_interface(model_name, description):
115
- return gr.ChatInterface(
116
- respond,
117
- additional_inputs=[
118
- gr.Textbox(value=model_name, label="Model", interactive=False),
119
- gr.Textbox(value="以下の英語の文章を日本語に翻訳してください:", label="System message"),
120
- gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"),
121
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
122
- gr.Slider(
123
- minimum=0.1,
124
- maximum=1.0,
125
- value=0.95,
126
- step=0.05,
127
- label="Top-p",
128
- ),
129
- gr.Slider(
130
- minimum=0,
131
- maximum=100,
132
- value=40,
133
- step=1,
134
- label="Top-k",
135
- ),
136
- gr.Slider(
137
- minimum=0.0,
138
- maximum=2.0,
139
- value=1.1,
140
- step=0.1,
141
- label="Repetition penalty",
142
- ),
143
- ],
144
- retry_btn="Retry",
145
- undo_btn="Undo",
146
- clear_btn="Clear",
147
- submit_btn=None, # Remove the individual submit button
148
- title=f"Chat with Gemma 2 using llama.cpp - {model_name}",
149
- description=description,
150
- chatbot=gr.Chatbot(
151
- scale=1,
152
- likeable=False,
153
- show_copy_button=True
154
- )
155
- )
156
 
157
  # 各モデルのインターフェース
158
  description_9b = """<p align="center">Gemma-2 9B it Model</p>"""
159
- description_27b = """<p align="center">Gemma-2-9B-It-SPPO-Iter3 Model</p>"""
160
 
161
  interface_9b = create_interface('gemma-2-9b-it-Q5_K_M.gguf', description_9b)
162
  interface_27b = create_interface('Gemma-2-9B-It-SPPO-Iter3-Q5_K_M.gguf', description_27b)
163
 
164
  # Gradio Blocksで2つのインターフェースを並べて表示
165
  with gr.Blocks() as demo:
166
- #gr.Markdown("# Compare Gemma-2 9B and 27B Models")
167
- with gr.Row():
168
- with gr.Column():
169
- input_field = gr.Textbox(label="Input", interactive=True)
170
- with gr.Column():
171
- interface_9b.render()
172
- with gr.Column():
173
- interface_27b.render()
174
-
175
- submit_btn = gr.Button("Send")
176
-
177
- def send_to_both(input_text):
178
- return input_text, input_text
179
-
180
- submit_btn.click(
181
- fn=send_to_both,
182
- inputs=input_field,
183
- outputs=[interface_9b.chatbot, interface_27b.chatbot]
184
- )
185
 
186
  if __name__ == "__main__":
187
- demo.launch(share=True)
 
22
  local_dir="./models"
23
  )
24
 
 
25
  # 推論関数
26
  @spaces.GPU(duration=120)
27
  def respond(
28
+ message,
29
+ history: list[tuple[str, str]],
30
+ model,
31
+ system_message,
32
+ max_tokens,
33
+ temperature,
34
+ top_p,
35
+ top_k,
36
+ repeat_penalty,
37
  ):
38
+ chat_template = MessagesFormatterType.GEMMA_2
39
+
40
+ llm = Llama(
41
+ model_path=f"models/{model}",
42
+ flash_attn=True,
43
+ n_gpu_layers=81,
44
+ n_batch=1024,
45
+ n_ctx=8192,
46
+ )
47
+ provider = LlamaCppPythonProvider(llm)
48
+
49
+ agent = LlamaCppAgent(
50
+ provider,
51
+ system_prompt=f"{system_message}",
52
+ predefined_messages_formatter_type=chat_template,
53
+ debug_output=True
54
+ )
55
+
56
+ settings = provider.get_provider_default_settings()
57
+ settings.temperature = temperature
58
+ settings.top_k = top_k
59
+ settings.top_p = top_p
60
+ settings.max_tokens = max_tokens
61
+ settings.repeat_penalty = repeat_penalty
62
+ settings.stream = True
63
+
64
+ messages = BasicChatHistory()
65
+
 
66
  one_shot_prompt = """
67
  あなたは優れた翻訳者です。以下の文章を日本語から英語に翻訳してください。翻訳は文法的に正しく、自然な表現を使用し、文脈に適した内容にしてください。また、専門用語や文化的なニュアンスを正確に伝えるよう心がけてください。
68
 
 
78
  日本語: 優れた翻訳は、原文の意味を正確に伝えるだけでなく、読み手にとって自然な表現である必要があります。文法的な正確さはもちろんのこと、文化的なニュアンスも重要です。専門用語の正確な訳も求められます。さらに、文脈に適した訳を提供することが、翻訳の品質を高めます。最終的には、読み手にとってわかりやすい訳を目指してください。
79
  英語: A good translation should not only convey the meaning of the original text accurately but also be expressed in a natural way for the reader. In addition to grammatical accuracy, cultural nuances are important. Accurate translation of technical terms is also required. Furthermore, providing a translation that fits the context enhances the quality of the translation. Ultimately, aim for a translation that is easy for the reader to understand.
80
 
81
+ 以下の日本語の文章を英語に翻訳してください:
82
  """
83
+ system_message += one_shot_prompt
84
+
85
+
86
+ for msn in history:
87
+ user = {
88
+ 'role': Roles.user,
89
+ 'content': msn[0]
90
+ }
91
+ assistant = {
92
+ 'role': Roles.assistant,
93
+ 'content': msn[1]
94
+ }
95
+ messages.add_message(user)
96
+ messages.add_message(assistant)
97
+
98
+ stream = agent.get_chat_response(
99
+ message,
100
+ llm_sampling_settings=settings,
101
+ chat_history=messages,
102
+ returns_streaming_generator=True,
103
+ print_output=False
104
+ )
105
+
106
+ outputs = ""
107
+ for output in stream:
108
+ outputs += output
109
+ yield outputs
 
110
 
111
  # Gradioのインターフェースを作成
112
  def create_interface(model_name, description):
113
+ return gr.ChatInterface(
114
+ respond,
115
+ additional_inputs=[
116
+ gr.Textbox(value=model_name, label="Model", interactive=False),
117
+ gr.Textbox(value="You are a helpful assistant.", label="System message"),
118
+ gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"),
119
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
120
+ gr.Slider(
121
+ minimum=0.1,
122
+ maximum=1.0,
123
+ value=0.95,
124
+ step=0.05,
125
+ label="Top-p",
126
+ ),
127
+ gr.Slider(
128
+ minimum=0,
129
+ maximum=100,
130
+ value=40,
131
+ step=1,
132
+ label="Top-k",
133
+ ),
134
+ gr.Slider(
135
+ minimum=0.0,
136
+ maximum=2.0,
137
+ value=1.1,
138
+ step=0.1,
139
+ label="Repetition penalty",
140
+ ),
141
+ ],
142
+ retry_btn="Retry",
143
+ undo_btn="Undo",
144
+ clear_btn="Clear",
145
+ submit_btn="Send",
146
+ title=f"Chat with Gemma 2 using llama.cpp - {model_name}",
147
+ description=description,
148
+ chatbot=gr.Chatbot(
149
+ scale=1,
150
+ likeable=False,
151
+ show_copy_button=True
152
+ )
153
+ )
154
 
155
  # 各モデルのインターフェース
156
  description_9b = """<p align="center">Gemma-2 9B it Model</p>"""
157
+ description_27b = """<p align="center">Gemma-2 9B SPPO it Model</p>"""
158
 
159
  interface_9b = create_interface('gemma-2-9b-it-Q5_K_M.gguf', description_9b)
160
  interface_27b = create_interface('Gemma-2-9B-It-SPPO-Iter3-Q5_K_M.gguf', description_27b)
161
 
162
  # Gradio Blocksで2つのインターフェースを並べて表示
163
  with gr.Blocks() as demo:
164
+ #gr.Markdown("# Compare Gemma-2 9B and 27B Models")
165
+ with gr.Row():
166
+ with gr.Column():
167
+ interface_9b.render()
168
+ with gr.Column():
169
+ interface_27b.render()
 
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
  if __name__ == "__main__":
172
+ demo.launch()