aixsatoshi commited on
Commit
246cb3f
1 Parent(s): 0dfa748

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -122
app.py CHANGED
@@ -25,112 +25,112 @@ hf_hub_download(
25
  # 推論関数
26
  @spaces.GPU(duration=120)
27
  def respond(
28
- message,
29
- history: list[tuple[str, str]],
30
- model,
31
- system_message,
32
- max_tokens,
33
- temperature,
34
- top_p,
35
- top_k,
36
- repeat_penalty,
37
  ):
38
- chat_template = MessagesFormatterType.GEMMA_2
39
-
40
- llm = Llama(
41
- model_path=f"models/{model}",
42
- flash_attn=True,
43
- n_gpu_layers=81,
44
- n_batch=1024,
45
- n_ctx=8192,
46
- )
47
- provider = LlamaCppPythonProvider(llm)
48
-
49
- agent = LlamaCppAgent(
50
- provider,
51
- system_prompt=f"{system_message}",
52
- predefined_messages_formatter_type=chat_template,
53
- debug_output=True
54
- )
55
-
56
- settings = provider.get_provider_default_settings()
57
- settings.temperature = temperature
58
- settings.top_k = top_k
59
- settings.top_p = top_p
60
- settings.max_tokens = max_tokens
61
- settings.repeat_penalty = repeat_penalty
62
- settings.stream = True
63
-
64
- messages = BasicChatHistory()
65
-
66
- for msn in history:
67
- user = {
68
- 'role': Roles.user,
69
- 'content': msn[0]
70
- }
71
- assistant = {
72
- 'role': Roles.assistant,
73
- 'content': msn[1]
74
- }
75
- messages.add_message(user)
76
- messages.add_message(assistant)
77
-
78
- stream = agent.get_chat_response(
79
- message,
80
- llm_sampling_settings=settings,
81
- chat_history=messages,
82
- returns_streaming_generator=True,
83
- print_output=False
84
- )
85
-
86
- outputs = ""
87
- for output in stream:
88
- outputs += output
89
- yield outputs
90
 
91
  # Gradioのインターフェースを作成
92
  def create_interface(model_name, description):
93
- return gr.ChatInterface(
94
- respond,
95
- additional_inputs=[
96
- gr.Textbox(value=model_name, label="Model", interactive=False),
97
- gr.Textbox(value="You are a helpful assistant.", label="System message"),
98
- gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"),
99
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
100
- gr.Slider(
101
- minimum=0.1,
102
- maximum=1.0,
103
- value=0.95,
104
- step=0.05,
105
- label="Top-p",
106
- ),
107
- gr.Slider(
108
- minimum=0,
109
- maximum=100,
110
- value=40,
111
- step=1,
112
- label="Top-k",
113
- ),
114
- gr.Slider(
115
- minimum=0.0,
116
- maximum=2.0,
117
- value=1.1,
118
- step=0.1,
119
- label="Repetition penalty",
120
- ),
121
- ],
122
- retry_btn="Retry",
123
- undo_btn="Undo",
124
- clear_btn="Clear",
125
- submit_btn=None, # Remove the individual submit button
126
- title=f"Chat with Gemma 2 using llama.cpp - {model_name}",
127
- description=description,
128
- chatbot=gr.Chatbot(
129
- scale=1,
130
- likeable=False,
131
- show_copy_button=True
132
- )
133
- )
134
 
135
  # 各モデルのインターフェース
136
  description_9b = """<p align="center">Gemma-2 9B it Model</p>"""
@@ -141,25 +141,25 @@ interface_27b = create_interface('gemma-2-27b-it-Q5_K_M.gguf', description_27b)
141
 
142
  # Gradio Blocksで2つのインターフェースを並べて表示
143
  with gr.Blocks() as demo:
144
- #gr.Markdown("# Compare Gemma-2 9B and 27B Models")
145
- with gr.Row():
146
- with gr.Column():
147
- input_field = gr.Textbox(label="Input", interactive=True)
148
- with gr.Column():
149
- interface_9b.render()
150
- with gr.Column():
151
- interface_27b.render()
152
-
153
- submit_btn = gr.Button("Send")
154
-
155
- def send_to_both(input_text):
156
- return input_text, input_text
157
-
158
- submit_btn.click(
159
- fn=send_to_both,
160
- inputs=input_field,
161
- outputs=[interface_9b.chatbot, interface_27b.chatbot]
162
- )
163
 
164
  if __name__ == "__main__":
165
- demo.launch()
 
25
  # 推論関数
26
  @spaces.GPU(duration=120)
27
  def respond(
28
+ message,
29
+ history: list[tuple[str, str]],
30
+ model,
31
+ system_message,
32
+ max_tokens,
33
+ temperature,
34
+ top_p,
35
+ top_k,
36
+ repeat_penalty,
37
  ):
38
+ chat_template = MessagesFormatterType.GEMMA_2
39
+
40
+ llm = Llama(
41
+ model_path=f"models/{model}",
42
+ flash_attn=True,
43
+ n_gpu_layers=81,
44
+ n_batch=1024,
45
+ n_ctx=8192,
46
+ )
47
+ provider = LlamaCppPythonProvider(llm)
48
+
49
+ agent = LlamaCppAgent(
50
+ provider,
51
+ system_prompt=f"{system_message}",
52
+ predefined_messages_formatter_type=chat_template,
53
+ debug_output=True
54
+ )
55
+
56
+ settings = provider.get_provider_default_settings()
57
+ settings.temperature = temperature
58
+ settings.top_k = top_k
59
+ settings.top_p = top_p
60
+ settings.max_tokens = max_tokens
61
+ settings.repeat_penalty = repeat_penalty
62
+ settings.stream = True
63
+
64
+ messages = BasicChatHistory()
65
+
66
+ for msn in history:
67
+ user = {
68
+ 'role': Roles.user,
69
+ 'content': msn[0]
70
+ }
71
+ assistant = {
72
+ 'role': Roles.assistant,
73
+ 'content': msn[1]
74
+ }
75
+ messages.add_message(user)
76
+ messages.add_message(assistant)
77
+
78
+ stream = agent.get_chat_response(
79
+ message,
80
+ llm_sampling_settings=settings,
81
+ chat_history=messages,
82
+ returns_streaming_generator=True,
83
+ print_output=False
84
+ )
85
+
86
+ outputs = ""
87
+ for output in stream:
88
+ outputs += output
89
+ yield [(message, outputs)]
90
 
91
  # Gradioのインターフェースを作成
92
  def create_interface(model_name, description):
93
+ return gr.ChatInterface(
94
+ respond,
95
+ additional_inputs=[
96
+ gr.Textbox(value=model_name, label="Model", interactive=False),
97
+ gr.Textbox(value="You are a helpful assistant.", label="System message"),
98
+ gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"),
99
+ gr.Slider(minimum=0.1, maximum 4.0, value=0.7, step=0.1, label="Temperature"),
100
+ gr.Slider(
101
+ minimum=0.1,
102
+ maximum=1.0,
103
+ value=0.95,
104
+ step=0.05,
105
+ label="Top-p",
106
+ ),
107
+ gr.Slider(
108
+ minimum=0,
109
+ maximum=100,
110
+ value=40,
111
+ step=1,
112
+ label="Top-k",
113
+ ),
114
+ gr.Slider(
115
+ minimum=0.0,
116
+ maximum=2.0,
117
+ value=1.1,
118
+ step=0.1,
119
+ label="Repetition penalty",
120
+ ),
121
+ ],
122
+ retry_btn="Retry",
123
+ undo_btn="Undo",
124
+ clear_btn="Clear",
125
+ submit_btn=None, # Remove the individual submit button
126
+ title=f"Chat with Gemma 2 using llama.cpp - {model_name}",
127
+ description=description,
128
+ chatbot=gr.Chatbot(
129
+ scale=1,
130
+ likeable=False,
131
+ show_copy_button=True
132
+ )
133
+ )
134
 
135
  # 各モデルのインターフェース
136
  description_9b = """<p align="center">Gemma-2 9B it Model</p>"""
 
141
 
142
  # Gradio Blocksで2つのインターフェースを並べて表示
143
  with gr.Blocks() as demo:
144
+ #gr.Markdown("# Compare Gemma-2 9B and 27B Models")
145
+ with gr.Row():
146
+ with gr.Column():
147
+ input_field = gr.Textbox(label="Input", interactive=True)
148
+ with gr.Column():
149
+ interface_9b.render()
150
+ with gr.Column():
151
+ interface_27b.render()
152
+
153
+ submit_btn = gr.Button("Send")
154
+
155
+ def send_to_both(input_text):
156
+ return input_text, input_text
157
+
158
+ submit_btn.click(
159
+ fn=send_to_both,
160
+ inputs=input_field,
161
+ outputs=[interface_9b.chatbot, interface_27b.chatbot]
162
+ )
163
 
164
  if __name__ == "__main__":
165
+ demo.launch(share=True)