Spaces:
Sleeping
Sleeping
jedick
commited on
Commit
Β·
f52b66d
1
Parent(s):
158fae7
Support multiple tool calls for local models
Browse files- app.py +5 -4
- mods/tool_calling_llm.py +51 -34
- prompts.py +11 -2
app.py
CHANGED
@@ -382,7 +382,8 @@ with gr.Blocks(
|
|
382 |
status_text = f"""
|
383 |
π Now in **local** mode, using ZeroGPU hardware<br>
|
384 |
β Response time is about one minute<br>
|
385 |
-
π§
|
|
|
386 |
β¨ [nomic-embed-text-v1.5](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5) and [{model_id.split("/")[-1]}](https://huggingface.co/{model_id})<br>
|
387 |
π See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
|
388 |
"""
|
@@ -400,8 +401,8 @@ with gr.Blocks(
|
|
400 |
end = None
|
401 |
info_text = f"""
|
402 |
**Database:** {len(sources)} emails from {start} to {end}.
|
403 |
-
**Features:** RAG, today's date, hybrid search (dense+sparse),
|
404 |
-
|
405 |
**Tech:** LangChain + Hugging Face + Gradio; ChromaDB and BM25S-based retrievers.<br>
|
406 |
"""
|
407 |
return info_text
|
@@ -448,7 +449,7 @@ with gr.Blocks(
|
|
448 |
gr.Examples(
|
449 |
examples=[[q] for q in multi_tool_questions],
|
450 |
inputs=[input],
|
451 |
-
label="Multiple retrievals
|
452 |
elem_id="example-questions",
|
453 |
)
|
454 |
multi_turn_questions = [
|
|
|
382 |
status_text = f"""
|
383 |
π Now in **local** mode, using ZeroGPU hardware<br>
|
384 |
β Response time is about one minute<br>
|
385 |
+
π§ Add **/think** to enable thinking for answer</br>
|
386 |
+
  π Thinking is already enabled for query<br>
|
387 |
β¨ [nomic-embed-text-v1.5](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5) and [{model_id.split("/")[-1]}](https://huggingface.co/{model_id})<br>
|
388 |
π See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
|
389 |
"""
|
|
|
401 |
end = None
|
402 |
info_text = f"""
|
403 |
**Database:** {len(sources)} emails from {start} to {end}.
|
404 |
+
**Features:** RAG, today's date, hybrid search (dense+sparse), multiple retrievals,
|
405 |
+
thinking output (local), citations output (remote), chat memory.
|
406 |
**Tech:** LangChain + Hugging Face + Gradio; ChromaDB and BM25S-based retrievers.<br>
|
407 |
"""
|
408 |
return info_text
|
|
|
449 |
gr.Examples(
|
450 |
examples=[[q] for q in multi_tool_questions],
|
451 |
inputs=[input],
|
452 |
+
label="Multiple retrievals",
|
453 |
elem_id="example-questions",
|
454 |
)
|
455 |
multi_turn_questions = [
|
mods/tool_calling_llm.py
CHANGED
@@ -43,8 +43,12 @@ DEFAULT_SYSTEM_TEMPLATE = """You have access to the following tools:
|
|
43 |
You must always select one of the above tools and respond with only a JSON object matching the following schema:
|
44 |
|
45 |
{{
|
46 |
-
"tool": <name of
|
47 |
-
"tool_input": <parameters for
|
|
|
|
|
|
|
|
|
48 |
}}
|
49 |
""" # noqa: E501
|
50 |
|
@@ -173,52 +177,65 @@ class ToolCallingLLM(BaseChatModel, ABC):
|
|
173 |
# Extract <think>...</think> content and text after </think> for further processing 20250726 jmd
|
174 |
think_text, post_think = extract_think(response_message.content)
|
175 |
|
176 |
-
|
|
|
|
|
|
|
177 |
try:
|
178 |
-
|
179 |
except json.JSONDecodeError:
|
180 |
# Return entire response if JSON wasn't parsed (or is missing)
|
181 |
return AIMessage(content=response_message.content)
|
182 |
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
)
|
194 |
-
if called_tool is None:
|
195 |
-
# Issue a warning and return the generated content 20250727 jmd
|
196 |
-
warnings.warn(f"Called tool ({called_tool_name}) not in functions list")
|
197 |
-
return AIMessage(content=response_message.content)
|
198 |
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
207 |
)
|
208 |
-
)
|
209 |
|
210 |
-
|
211 |
-
response_message = AIMessage(
|
212 |
-
content=f"<think>\n{think_text}\n</think>",
|
213 |
-
tool_calls=[
|
214 |
ToolCall(
|
215 |
name=called_tool_name,
|
216 |
args=called_tool_arguments,
|
217 |
id=f"call_{str(uuid.uuid4()).replace('-', '')}",
|
218 |
)
|
219 |
-
|
220 |
-
)
|
221 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
222 |
return response_message
|
223 |
|
224 |
def _generate(
|
|
|
43 |
You must always select one of the above tools and respond with only a JSON object matching the following schema:
|
44 |
|
45 |
{{
|
46 |
+
"tool": <name of selected tool 1>,
|
47 |
+
"tool_input": <parameters for selected tool 1, matching the tool's JSON schema>
|
48 |
+
}},
|
49 |
+
{{
|
50 |
+
"tool": <name of selected tool 2>,
|
51 |
+
"tool_input": <parameters for selected tool 2, matching the tool's JSON schema>
|
52 |
}}
|
53 |
""" # noqa: E501
|
54 |
|
|
|
177 |
# Extract <think>...</think> content and text after </think> for further processing 20250726 jmd
|
178 |
think_text, post_think = extract_think(response_message.content)
|
179 |
|
180 |
+
print("post_think")
|
181 |
+
print(post_think)
|
182 |
+
|
183 |
+
# Parse output for JSON (support multiple objects separated by commas)
|
184 |
try:
|
185 |
+
parsed_json_results = json.loads(f"[{post_think}]")
|
186 |
except json.JSONDecodeError:
|
187 |
# Return entire response if JSON wasn't parsed (or is missing)
|
188 |
return AIMessage(content=response_message.content)
|
189 |
|
190 |
+
tool_calls = []
|
191 |
+
for parsed_json_result in parsed_json_results:
|
192 |
+
# Get tool name from output
|
193 |
+
called_tool_name = (
|
194 |
+
parsed_json_result["tool"]
|
195 |
+
if "tool" in parsed_json_result
|
196 |
+
else (
|
197 |
+
parsed_json_result["name"] if "name" in parsed_json_result else None
|
198 |
+
)
|
199 |
+
)
|
|
|
|
|
|
|
|
|
|
|
200 |
|
201 |
+
# Check if tool name is in functions list
|
202 |
+
called_tool = next(
|
203 |
+
(fn for fn in functions if fn["function"]["name"] == called_tool_name),
|
204 |
+
None,
|
205 |
+
)
|
206 |
+
if called_tool is None:
|
207 |
+
# Issue a warning and skip this tool call
|
208 |
+
warnings.warn(f"Called tool ({called_tool_name}) not in functions list")
|
209 |
+
continue
|
210 |
+
|
211 |
+
# Get tool arguments from output
|
212 |
+
called_tool_arguments = (
|
213 |
+
parsed_json_result["tool_input"]
|
214 |
+
if "tool_input" in parsed_json_result
|
215 |
+
else (
|
216 |
+
parsed_json_result["parameters"]
|
217 |
+
if "parameters" in parsed_json_result
|
218 |
+
else {}
|
219 |
+
)
|
220 |
)
|
|
|
221 |
|
222 |
+
tool_calls.append(
|
|
|
|
|
|
|
223 |
ToolCall(
|
224 |
name=called_tool_name,
|
225 |
args=called_tool_arguments,
|
226 |
id=f"call_{str(uuid.uuid4()).replace('-', '')}",
|
227 |
)
|
228 |
+
)
|
|
|
229 |
|
230 |
+
if not tool_calls:
|
231 |
+
# If nothing valid, return original content
|
232 |
+
return AIMessage(content=response_message.content)
|
233 |
+
|
234 |
+
# Put together response message
|
235 |
+
response_message = AIMessage(
|
236 |
+
content=f"<think>\n{think_text}\n</think>",
|
237 |
+
tool_calls=tool_calls,
|
238 |
+
)
|
239 |
return response_message
|
240 |
|
241 |
def _generate(
|
prompts.py
CHANGED
@@ -31,7 +31,7 @@ def query_prompt(chat_model, think=False):
|
|
31 |
"Do not answer the user's question and do not ask the user for more information. "
|
32 |
# gpt-4o-mini thinks last two months aren't available with this: "Emails from from {start} to {end} are available for retrieval. "
|
33 |
f"The emails available for retrieval are from {start} to {end}. "
|
34 |
-
"For questions about differences or comparison between X and Y, retrieve emails about X and Y. "
|
35 |
"For general summaries, use retrieve_emails(search_query='R'). "
|
36 |
"For specific questions, use retrieve_emails(search_query=<specific topic>). "
|
37 |
"For questions about years, use retrieve_emails(search_query=, start_year=, end_year=) (this month is this year). "
|
@@ -60,7 +60,8 @@ def generate_prompt(chat_model, think=False, with_tools=False):
|
|
60 |
"Summarize the content of the emails rather than copying the headers. " # Qwen
|
61 |
"You must include inline citations (email senders and dates) in each part of your response. "
|
62 |
"Only answer general questions about R if the answer is in the retrieved emails. "
|
63 |
-
"
|
|
|
64 |
)
|
65 |
if with_tools:
|
66 |
prompt = (
|
@@ -87,6 +88,10 @@ You have access to the following tools:
|
|
87 |
|
88 |
You must always select one of the above tools and respond with only a JSON object matching the following schema:
|
89 |
|
|
|
|
|
|
|
|
|
90 |
{{
|
91 |
"tool": <function-name>,
|
92 |
"tool_input": <args-json-object>
|
@@ -102,6 +107,10 @@ generic_tools_template = """
|
|
102 |
|
103 |
You have access to functions. If you decide to invoke any of the function(s), you MUST put it in the format of
|
104 |
|
|
|
|
|
|
|
|
|
105 |
{{
|
106 |
"tool": <function-name>,
|
107 |
"tool_input": <args-json-object>
|
|
|
31 |
"Do not answer the user's question and do not ask the user for more information. "
|
32 |
# gpt-4o-mini thinks last two months aren't available with this: "Emails from from {start} to {end} are available for retrieval. "
|
33 |
f"The emails available for retrieval are from {start} to {end}. "
|
34 |
+
"For questions about differences or comparison between X and Y, retrieve emails about X and Y using separate tool calls. "
|
35 |
"For general summaries, use retrieve_emails(search_query='R'). "
|
36 |
"For specific questions, use retrieve_emails(search_query=<specific topic>). "
|
37 |
"For questions about years, use retrieve_emails(search_query=, start_year=, end_year=) (this month is this year). "
|
|
|
60 |
"Summarize the content of the emails rather than copying the headers. " # Qwen
|
61 |
"You must include inline citations (email senders and dates) in each part of your response. "
|
62 |
"Only answer general questions about R if the answer is in the retrieved emails. "
|
63 |
+
"Your response can include URLs, but make sure they are quoted verbatim from the retrieved emails. " # Qwen
|
64 |
+
"Respond with 300 words maximum and 30 lines of code maximum. "
|
65 |
)
|
66 |
if with_tools:
|
67 |
prompt = (
|
|
|
88 |
|
89 |
You must always select one of the above tools and respond with only a JSON object matching the following schema:
|
90 |
|
91 |
+
{{
|
92 |
+
"tool": <function-name>,
|
93 |
+
"tool_input": <args-json-object>
|
94 |
+
}},
|
95 |
{{
|
96 |
"tool": <function-name>,
|
97 |
"tool_input": <args-json-object>
|
|
|
107 |
|
108 |
You have access to functions. If you decide to invoke any of the function(s), you MUST put it in the format of
|
109 |
|
110 |
+
{{
|
111 |
+
"tool": <function-name>,
|
112 |
+
"tool_input": <args-json-object>
|
113 |
+
}},
|
114 |
{{
|
115 |
"tool": <function-name>,
|
116 |
"tool_input": <args-json-object>
|