jedick commited on
Commit
f52b66d
Β·
1 Parent(s): 158fae7

Support multiple tool calls for local models

Browse files
Files changed (3) hide show
  1. app.py +5 -4
  2. mods/tool_calling_llm.py +51 -34
  3. prompts.py +11 -2
app.py CHANGED
@@ -382,7 +382,8 @@ with gr.Blocks(
382
  status_text = f"""
383
  πŸ“ Now in **local** mode, using ZeroGPU hardware<br>
384
  βŒ› Response time is about one minute<br>
385
- 🧠 Thinking is enabled for query; add **/think** to enable thinking for answer</br>
 
386
  ✨ [nomic-embed-text-v1.5](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5) and [{model_id.split("/")[-1]}](https://huggingface.co/{model_id})<br>
387
  🏠 See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
388
  """
@@ -400,8 +401,8 @@ with gr.Blocks(
400
  end = None
401
  info_text = f"""
402
  **Database:** {len(sources)} emails from {start} to {end}.
403
- **Features:** RAG, today's date, hybrid search (dense+sparse), thinking output (local),
404
- multiple retrievals (remote), citations output (remote), chat memory.
405
  **Tech:** LangChain + Hugging Face + Gradio; ChromaDB and BM25S-based retrievers.<br>
406
  """
407
  return info_text
@@ -448,7 +449,7 @@ with gr.Blocks(
448
  gr.Examples(
449
  examples=[[q] for q in multi_tool_questions],
450
  inputs=[input],
451
- label="Multiple retrievals (remote mode)",
452
  elem_id="example-questions",
453
  )
454
  multi_turn_questions = [
 
382
  status_text = f"""
383
  πŸ“ Now in **local** mode, using ZeroGPU hardware<br>
384
  βŒ› Response time is about one minute<br>
385
+ 🧠 Add **/think** to enable thinking for answer</br>
386
+ &emsp;&nbsp; πŸ” Thinking is already enabled for query<br>
387
  ✨ [nomic-embed-text-v1.5](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5) and [{model_id.split("/")[-1]}](https://huggingface.co/{model_id})<br>
388
  🏠 See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
389
  """
 
401
  end = None
402
  info_text = f"""
403
  **Database:** {len(sources)} emails from {start} to {end}.
404
+ **Features:** RAG, today's date, hybrid search (dense+sparse), multiple retrievals,
405
+ thinking output (local), citations output (remote), chat memory.
406
  **Tech:** LangChain + Hugging Face + Gradio; ChromaDB and BM25S-based retrievers.<br>
407
  """
408
  return info_text
 
449
  gr.Examples(
450
  examples=[[q] for q in multi_tool_questions],
451
  inputs=[input],
452
+ label="Multiple retrievals",
453
  elem_id="example-questions",
454
  )
455
  multi_turn_questions = [
mods/tool_calling_llm.py CHANGED
@@ -43,8 +43,12 @@ DEFAULT_SYSTEM_TEMPLATE = """You have access to the following tools:
43
  You must always select one of the above tools and respond with only a JSON object matching the following schema:
44
 
45
  {{
46
- "tool": <name of the selected tool>,
47
- "tool_input": <parameters for the selected tool, matching the tool's JSON schema>
 
 
 
 
48
  }}
49
  """ # noqa: E501
50
 
@@ -173,52 +177,65 @@ class ToolCallingLLM(BaseChatModel, ABC):
173
  # Extract <think>...</think> content and text after </think> for further processing 20250726 jmd
174
  think_text, post_think = extract_think(response_message.content)
175
 
176
- # Parse output for JSON
 
 
 
177
  try:
178
- parsed_json_result = json.loads(post_think)
179
  except json.JSONDecodeError:
180
  # Return entire response if JSON wasn't parsed (or is missing)
181
  return AIMessage(content=response_message.content)
182
 
183
- # Get tool name from output
184
- called_tool_name = (
185
- parsed_json_result["tool"]
186
- if "tool" in parsed_json_result
187
- else parsed_json_result["name"] if "name" in parsed_json_result else None
188
- )
189
-
190
- # Check if tool name is in functions list
191
- called_tool = next(
192
- (fn for fn in functions if fn["function"]["name"] == called_tool_name), None
193
- )
194
- if called_tool is None:
195
- # Issue a warning and return the generated content 20250727 jmd
196
- warnings.warn(f"Called tool ({called_tool_name}) not in functions list")
197
- return AIMessage(content=response_message.content)
198
 
199
- # Get tool arguments from output
200
- called_tool_arguments = (
201
- parsed_json_result["tool_input"]
202
- if "tool_input" in parsed_json_result
203
- else (
204
- parsed_json_result["parameters"]
205
- if "parameters" in parsed_json_result
206
- else {}
 
 
 
 
 
 
 
 
 
 
 
207
  )
208
- )
209
 
210
- # Put together response message
211
- response_message = AIMessage(
212
- content=f"<think>\n{think_text}\n</think>",
213
- tool_calls=[
214
  ToolCall(
215
  name=called_tool_name,
216
  args=called_tool_arguments,
217
  id=f"call_{str(uuid.uuid4()).replace('-', '')}",
218
  )
219
- ],
220
- )
221
 
 
 
 
 
 
 
 
 
 
222
  return response_message
223
 
224
  def _generate(
 
43
  You must always select one of the above tools and respond with only a JSON object matching the following schema:
44
 
45
  {{
46
+ "tool": <name of selected tool 1>,
47
+ "tool_input": <parameters for selected tool 1, matching the tool's JSON schema>
48
+ }},
49
+ {{
50
+ "tool": <name of selected tool 2>,
51
+ "tool_input": <parameters for selected tool 2, matching the tool's JSON schema>
52
  }}
53
  """ # noqa: E501
54
 
 
177
  # Extract <think>...</think> content and text after </think> for further processing 20250726 jmd
178
  think_text, post_think = extract_think(response_message.content)
179
 
180
+ print("post_think")
181
+ print(post_think)
182
+
183
+ # Parse output for JSON (support multiple objects separated by commas)
184
  try:
185
+ parsed_json_results = json.loads(f"[{post_think}]")
186
  except json.JSONDecodeError:
187
  # Return entire response if JSON wasn't parsed (or is missing)
188
  return AIMessage(content=response_message.content)
189
 
190
+ tool_calls = []
191
+ for parsed_json_result in parsed_json_results:
192
+ # Get tool name from output
193
+ called_tool_name = (
194
+ parsed_json_result["tool"]
195
+ if "tool" in parsed_json_result
196
+ else (
197
+ parsed_json_result["name"] if "name" in parsed_json_result else None
198
+ )
199
+ )
 
 
 
 
 
200
 
201
+ # Check if tool name is in functions list
202
+ called_tool = next(
203
+ (fn for fn in functions if fn["function"]["name"] == called_tool_name),
204
+ None,
205
+ )
206
+ if called_tool is None:
207
+ # Issue a warning and skip this tool call
208
+ warnings.warn(f"Called tool ({called_tool_name}) not in functions list")
209
+ continue
210
+
211
+ # Get tool arguments from output
212
+ called_tool_arguments = (
213
+ parsed_json_result["tool_input"]
214
+ if "tool_input" in parsed_json_result
215
+ else (
216
+ parsed_json_result["parameters"]
217
+ if "parameters" in parsed_json_result
218
+ else {}
219
+ )
220
  )
 
221
 
222
+ tool_calls.append(
 
 
 
223
  ToolCall(
224
  name=called_tool_name,
225
  args=called_tool_arguments,
226
  id=f"call_{str(uuid.uuid4()).replace('-', '')}",
227
  )
228
+ )
 
229
 
230
+ if not tool_calls:
231
+ # If nothing valid, return original content
232
+ return AIMessage(content=response_message.content)
233
+
234
+ # Put together response message
235
+ response_message = AIMessage(
236
+ content=f"<think>\n{think_text}\n</think>",
237
+ tool_calls=tool_calls,
238
+ )
239
  return response_message
240
 
241
  def _generate(
prompts.py CHANGED
@@ -31,7 +31,7 @@ def query_prompt(chat_model, think=False):
31
  "Do not answer the user's question and do not ask the user for more information. "
32
  # gpt-4o-mini thinks last two months aren't available with this: "Emails from from {start} to {end} are available for retrieval. "
33
  f"The emails available for retrieval are from {start} to {end}. "
34
- "For questions about differences or comparison between X and Y, retrieve emails about X and Y. "
35
  "For general summaries, use retrieve_emails(search_query='R'). "
36
  "For specific questions, use retrieve_emails(search_query=<specific topic>). "
37
  "For questions about years, use retrieve_emails(search_query=, start_year=, end_year=) (this month is this year). "
@@ -60,7 +60,8 @@ def generate_prompt(chat_model, think=False, with_tools=False):
60
  "Summarize the content of the emails rather than copying the headers. " # Qwen
61
  "You must include inline citations (email senders and dates) in each part of your response. "
62
  "Only answer general questions about R if the answer is in the retrieved emails. "
63
- "Respond with 300 words maximum and 30 lines of code maximum and include any relevant URLs from the retrieved emails. "
 
64
  )
65
  if with_tools:
66
  prompt = (
@@ -87,6 +88,10 @@ You have access to the following tools:
87
 
88
  You must always select one of the above tools and respond with only a JSON object matching the following schema:
89
 
 
 
 
 
90
  {{
91
  "tool": <function-name>,
92
  "tool_input": <args-json-object>
@@ -102,6 +107,10 @@ generic_tools_template = """
102
 
103
  You have access to functions. If you decide to invoke any of the function(s), you MUST put it in the format of
104
 
 
 
 
 
105
  {{
106
  "tool": <function-name>,
107
  "tool_input": <args-json-object>
 
31
  "Do not answer the user's question and do not ask the user for more information. "
32
  # gpt-4o-mini thinks last two months aren't available with this: "Emails from from {start} to {end} are available for retrieval. "
33
  f"The emails available for retrieval are from {start} to {end}. "
34
+ "For questions about differences or comparison between X and Y, retrieve emails about X and Y using separate tool calls. "
35
  "For general summaries, use retrieve_emails(search_query='R'). "
36
  "For specific questions, use retrieve_emails(search_query=<specific topic>). "
37
  "For questions about years, use retrieve_emails(search_query=, start_year=, end_year=) (this month is this year). "
 
60
  "Summarize the content of the emails rather than copying the headers. " # Qwen
61
  "You must include inline citations (email senders and dates) in each part of your response. "
62
  "Only answer general questions about R if the answer is in the retrieved emails. "
63
+ "Your response can include URLs, but make sure they are quoted verbatim from the retrieved emails. " # Qwen
64
+ "Respond with 300 words maximum and 30 lines of code maximum. "
65
  )
66
  if with_tools:
67
  prompt = (
 
88
 
89
  You must always select one of the above tools and respond with only a JSON object matching the following schema:
90
 
91
+ {{
92
+ "tool": <function-name>,
93
+ "tool_input": <args-json-object>
94
+ }},
95
  {{
96
  "tool": <function-name>,
97
  "tool_input": <args-json-object>
 
107
 
108
  You have access to functions. If you decide to invoke any of the function(s), you MUST put it in the format of
109
 
110
+ {{
111
+ "tool": <function-name>,
112
+ "tool_input": <args-json-object>
113
+ }},
114
  {{
115
  "tool": <function-name>,
116
  "tool_input": <args-json-object>