pabloce commited on
Commit
e76be92
·
verified ·
1 Parent(s): 536cf8a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -51
app.py CHANGED
@@ -1,16 +1,21 @@
1
  import spaces
2
- import json
3
- import subprocess
4
- import time
5
  import gradio as gr
 
 
6
  from llama_cpp import Llama
7
- from llama_cpp_agent import LlamaCppAgent
8
  from llama_cpp_agent.providers import LlamaCppPythonProvider
 
9
  from llama_cpp_agent.chat_history import BasicChatHistory
10
  from llama_cpp_agent.chat_history.messages import Roles
11
- from llama_cpp_agent.llm_output_settings import LlmStructuredOutputSettings
12
- from huggingface_hub import hf_hub_download
13
- from web_search import WebSearchTool
 
 
 
 
 
14
 
15
  model_selected = "Mistral-7B-Instruct-v0.3-Q6_K.gguf"
16
  examples = [
@@ -115,21 +120,17 @@ def write_message_to_user():
115
 
116
  @spaces.GPU(duration=120)
117
  def respond(
118
- message,
119
- history: list[tuple[str, str]],
120
- model,
121
- system_message,
122
- max_tokens,
123
- temperature,
124
- top_p,
125
- top_k,
126
- repeat_penalty,
127
  ):
128
  chat_template = get_messages_formatter_type(model)
129
- model_selected = model
130
-
131
- system_message += f" {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))}"
132
-
133
  llm = Llama(
134
  model_path=f"models/{model}",
135
  flash_attn=True,
@@ -139,56 +140,92 @@ def respond(
139
  n_ctx=get_context_by_model(model),
140
  )
141
  provider = LlamaCppPythonProvider(llm)
 
 
 
 
 
 
 
142
 
143
- agent = LlamaCppAgent(
144
  provider,
145
- system_prompt=f"{system_message}",
146
  predefined_messages_formatter_type=chat_template,
147
- debug_output=True
148
  )
149
- search_tool = WebSearchTool(provider, chat_template, get_context_by_model(model))
 
 
 
 
 
 
 
150
  settings = provider.get_provider_default_settings()
 
151
  settings.temperature = temperature
152
  settings.top_k = top_k
153
  settings.top_p = top_p
 
154
  settings.max_tokens = max_tokens
155
- settings.repeat_penalty = repeat_penalty
156
- settings.stream = True
157
  output_settings = LlmStructuredOutputSettings.from_functions(
158
- [search_tool.get_tool(), write_message_to_user])
 
 
159
  messages = BasicChatHistory()
160
 
161
  for msn in history:
162
- user = {
163
- 'role': Roles.user,
164
- 'content': msn[0]
165
- }
166
- assistant = {
167
- 'role': Roles.assistant,
168
- 'content': msn[1]
169
- }
170
  messages.add_message(user)
171
  messages.add_message(assistant)
172
- result = agent.get_chat_response(message, llm_sampling_settings=settings, structured_output_settings=output_settings,
173
- chat_history=messages,
174
- print_output=False)
175
- while True:
176
- if result[0]["function"] == "write_message_to_user":
177
- break
178
- else:
179
- result = agent.get_chat_response(result[0]["return_value"], role=Roles.tool, chat_history=messages,structured_output_settings=output_settings,
180
- print_output=False)
181
-
182
- stream = agent.get_chat_response(
183
- result[0]["return_value"], role=Roles.tool, llm_sampling_settings=settings, chat_history=messages, returns_streaming_generator=True,
184
- print_output=False
185
  )
186
 
187
  outputs = ""
188
- for output in stream:
189
- outputs += output
 
 
 
 
 
 
 
 
 
 
 
 
190
  yield outputs
191
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
 
193
  demo = gr.ChatInterface(
194
  respond,
@@ -200,7 +237,7 @@ demo = gr.ChatInterface(
200
  value="Mistral-7B-Instruct-v0.3-Q6_K.gguf",
201
  label="Model"
202
  ),
203
- gr.Textbox(value="You are a helpful assistant. Use additional available information you have access to when giving a response. Always give detailed and long responses. Format your response, well structured in markdown format.", label="System message"),
204
  gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"),
205
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
206
  gr.Slider(
@@ -247,6 +284,7 @@ demo = gr.ChatInterface(
247
  submit_btn="Send",
248
  examples = (examples),
249
  description="Llama-cpp-agent: Chat Web Search DDG Agent",
 
250
  chatbot=gr.Chatbot(scale=1, placeholder=PLACEHOLDER)
251
  )
252
 
 
1
  import spaces
2
+ import logging
 
 
3
  import gradio as gr
4
+ from huggingface_hub import hf_hub_download
5
+
6
  from llama_cpp import Llama
 
7
  from llama_cpp_agent.providers import LlamaCppPythonProvider
8
+ from llama_cpp_agent import LlamaCppAgent
9
  from llama_cpp_agent.chat_history import BasicChatHistory
10
  from llama_cpp_agent.chat_history.messages import Roles
11
+ from llama_cpp_agent.llm_output_settings import (
12
+ LlmStructuredOutputSettings,
13
+ LlmStructuredOutputType,
14
+ )
15
+ from llama_cpp_agent.tools import WebSearchTool
16
+ from llama_cpp_agent.prompt_templates import web_search_system_prompt, research_system_prompt
17
+ from style import css, PLACEHOLDER
18
+ from utils import CitingSources
19
 
20
  model_selected = "Mistral-7B-Instruct-v0.3-Q6_K.gguf"
21
  examples = [
 
120
 
121
  @spaces.GPU(duration=120)
122
  def respond(
123
+ message,
124
+ history: list[tuple[str, str]],
125
+ model,
126
+ system_message,
127
+ max_tokens,
128
+ temperature,
129
+ top_p,
130
+ top_k,
131
+ repeat_penalty,
132
  ):
133
  chat_template = get_messages_formatter_type(model)
 
 
 
 
134
  llm = Llama(
135
  model_path=f"models/{model}",
136
  flash_attn=True,
 
140
  n_ctx=get_context_by_model(model),
141
  )
142
  provider = LlamaCppPythonProvider(llm)
143
+ logging.info(f"Loaded chat examples: {chat_template}")
144
+ search_tool = WebSearchTool(
145
+ llm_provider=provider,
146
+ message_formatter_type=chat_template,
147
+ max_tokens_search_results=12000,
148
+ max_tokens_per_summary=2048,
149
+ )
150
 
151
+ web_search_agent = LlamaCppAgent(
152
  provider,
153
+ system_prompt=web_search_system_prompt,
154
  predefined_messages_formatter_type=chat_template,
155
+ debug_output=True,
156
  )
157
+
158
+ answer_agent = LlamaCppAgent(
159
+ provider,
160
+ system_prompt=research_system_prompt,
161
+ predefined_messages_formatter_type=chat_template,
162
+ debug_output=True,
163
+ )
164
+
165
  settings = provider.get_provider_default_settings()
166
+ settings.stream = False
167
  settings.temperature = temperature
168
  settings.top_k = top_k
169
  settings.top_p = top_p
170
+
171
  settings.max_tokens = max_tokens
172
+ settings.repeat_penalty = repetition_penalty
173
+
174
  output_settings = LlmStructuredOutputSettings.from_functions(
175
+ [search_tool.get_tool()]
176
+ )
177
+
178
  messages = BasicChatHistory()
179
 
180
  for msn in history:
181
+ user = {"role": Roles.user, "content": msn[0]}
182
+ assistant = {"role": Roles.assistant, "content": msn[1]}
 
 
 
 
 
 
183
  messages.add_message(user)
184
  messages.add_message(assistant)
185
+
186
+ result = web_search_agent.get_chat_response(
187
+ message,
188
+ llm_sampling_settings=settings,
189
+ structured_output_settings=output_settings,
190
+ add_message_to_chat_history=False,
191
+ add_response_to_chat_history=False,
192
+ print_output=False,
 
 
 
 
 
193
  )
194
 
195
  outputs = ""
196
+
197
+ settings.stream = True
198
+ response_text = answer_agent.get_chat_response(
199
+ f"Write a detailed and complete research document that fulfills the following user request: '{message}', based on the information from the web below.\n\n" +
200
+ result[0]["return_value"],
201
+ role=Roles.tool,
202
+ llm_sampling_settings=settings,
203
+ chat_history=messages,
204
+ returns_streaming_generator=True,
205
+ print_output=False,
206
+ )
207
+
208
+ for text in response_text:
209
+ outputs += text
210
  yield outputs
211
 
212
+ output_settings = LlmStructuredOutputSettings.from_pydantic_models(
213
+ [CitingSources], LlmStructuredOutputType.object_instance
214
+ )
215
+
216
+ citing_sources = answer_agent.get_chat_response(
217
+ "Cite the sources you used in your response.",
218
+ role=Roles.tool,
219
+ llm_sampling_settings=settings,
220
+ chat_history=messages,
221
+ returns_streaming_generator=False,
222
+ structured_output_settings=output_settings,
223
+ print_output=False,
224
+ )
225
+ outputs += "\n\nSources:\n"
226
+ outputs += "\n".join(citing_sources.sources)
227
+ yield outputs
228
+
229
 
230
  demo = gr.ChatInterface(
231
  respond,
 
237
  value="Mistral-7B-Instruct-v0.3-Q6_K.gguf",
238
  label="Model"
239
  ),
240
+ gr.Textbox(value=web_search_system_prompt, label="System message"),
241
  gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"),
242
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
243
  gr.Slider(
 
284
  submit_btn="Send",
285
  examples = (examples),
286
  description="Llama-cpp-agent: Chat Web Search DDG Agent",
287
+ analytics_enabled=False,
288
  chatbot=gr.Chatbot(scale=1, placeholder=PLACEHOLDER)
289
  )
290