jedick commited on
Commit
17ad0bb
Β·
1 Parent(s): 84ccc57

Clean up ToolCallingLLM

Browse files
Files changed (5) hide show
  1. README.md +1 -1
  2. app.py +26 -26
  3. main.py +1 -1
  4. mods/tool_calling_llm.py +58 -133
  5. prompts.py +11 -9
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: RAG Chat with R-help Emails
3
  emoji: πŸ‡·πŸ€πŸ’¬
4
  colorFrom: indigo
5
  colorTo: red
 
1
  ---
2
+ title: "R-help chat: RAG for emails"
3
  emoji: πŸ‡·πŸ€πŸ’¬
4
  colorFrom: indigo
5
  colorTo: red
app.py CHANGED
@@ -40,6 +40,27 @@ def cleanup_graph(request: gr.Request):
40
  print(f"Deleted remote graph for session {request.session_hash}")
41
 
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  def run_workflow(input, history, compute_mode, thread_id, session_hash):
44
  """The main function to run the chat workflow"""
45
 
@@ -97,17 +118,8 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
97
  if node == "query":
98
  # Get the message (AIMessage class in LangChain)
99
  chunk_messages = chunk["messages"]
100
- # Display non-tool-call content
101
- if chunk_messages.content:
102
- content = chunk_messages.content
103
- metadata = None
104
- # Show thinking content in "metadata" message
105
- if content.startswith("<think>"):
106
- content, _ = extract_think(content)
107
- metadata = {"title": f"🧠 Thinking about query"}
108
- history.append(
109
- gr.ChatMessage(role="assistant", content=content, metadata=metadata)
110
- )
111
  # Look for tool calls
112
  if chunk_messages.tool_calls:
113
  # Loop over tool calls
@@ -171,27 +183,15 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
171
  yield history, retrieved_emails, []
172
 
173
  if node == "generate":
 
174
  chunk_messages = chunk["messages"]
175
- # Chat response without citations
176
- if chunk_messages.content:
177
- content = chunk_messages.content
178
- # Show thinking content in "metadata" message
179
- think_text, content = extract_think(content)
180
- if think_text:
181
- history.append(
182
- gr.ChatMessage(
183
- role="assistant",
184
- content=think_text,
185
- metadata={"title": f"🧠 Thinking about answer"},
186
- )
187
- )
188
- history.append(gr.ChatMessage(role="assistant", content=content))
189
  # None is used for no change to the retrieved emails textbox
190
  yield history, None, []
191
 
192
  if node == "answer_with_citations":
193
- chunk_messages = chunk["messages"][0]
194
  # Parse the message for the answer and citations
 
195
  try:
196
  answer, citations = ast.literal_eval(chunk_messages.content)
197
  except:
 
40
  print(f"Deleted remote graph for session {request.session_hash}")
41
 
42
 
43
+ def append_content(chunk_messages, history, thinking_about):
44
+ """Append thinking and non-thinking content to chatbot history"""
45
+ if chunk_messages.content:
46
+ think_text, post_think = extract_think(chunk_messages.content)
47
+ # Show thinking content in "metadata" message
48
+ if think_text:
49
+ history.append(
50
+ gr.ChatMessage(
51
+ role="assistant",
52
+ content=think_text,
53
+ metadata={"title": f"🧠 Thinking about {thinking_about}"},
54
+ )
55
+ )
56
+ if not post_think and not chunk_messages.tool_calls:
57
+ gr.Warning("Response may be incomplete", title="Thinking-only response")
58
+ # Display non-thinking content
59
+ if post_think:
60
+ history.append(gr.ChatMessage(role="assistant", content=post_think))
61
+ return history
62
+
63
+
64
  def run_workflow(input, history, compute_mode, thread_id, session_hash):
65
  """The main function to run the chat workflow"""
66
 
 
118
  if node == "query":
119
  # Get the message (AIMessage class in LangChain)
120
  chunk_messages = chunk["messages"]
121
+ # Append thinking and non-thinking messages (if present)
122
+ history = append_content(chunk_messages, history, thinking_about="query")
 
 
 
 
 
 
 
 
 
123
  # Look for tool calls
124
  if chunk_messages.tool_calls:
125
  # Loop over tool calls
 
183
  yield history, retrieved_emails, []
184
 
185
  if node == "generate":
186
+ # Append messages (thinking and non-thinking) to history
187
  chunk_messages = chunk["messages"]
188
+ history = append_content(chunk_messages, history, thinking_about="answer")
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  # None is used for no change to the retrieved emails textbox
190
  yield history, None, []
191
 
192
  if node == "answer_with_citations":
 
193
  # Parse the message for the answer and citations
194
+ chunk_messages = chunk["messages"][0]
195
  try:
196
  answer, citations = ast.literal_eval(chunk_messages.content)
197
  except:
main.py CHANGED
@@ -41,7 +41,7 @@ model_id = os.getenv("MODEL_ID")
41
  if model_id is None:
42
  # model_id = "HuggingFaceTB/SmolLM3-3B"
43
  # model_id = "google/gemma-3-12b-it"
44
- model_id = "Qwen/Qwen3-14B"
45
 
46
  # Suppress these messages:
47
  # INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
 
41
  if model_id is None:
42
  # model_id = "HuggingFaceTB/SmolLM3-3B"
43
  # model_id = "google/gemma-3-12b-it"
44
+ model_id = "Qwen/Qwen3-8B"
45
 
46
  # Suppress these messages:
47
  # INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
mods/tool_calling_llm.py CHANGED
@@ -3,7 +3,6 @@ import json
3
  import uuid
4
  import warnings
5
  from abc import ABC
6
- from shutil import Error
7
  from typing import (
8
  Any,
9
  AsyncIterator,
@@ -14,8 +13,6 @@ from typing import (
14
  Sequence,
15
  Tuple,
16
  Type,
17
- TypedDict,
18
- TypeVar,
19
  Union,
20
  cast,
21
  )
@@ -51,29 +48,6 @@ You must always select one of the above tools and respond with only a JSON objec
51
  }}
52
  """ # noqa: E501
53
 
54
- DEFAULT_RESPONSE_FUNCTION = {
55
- "type": "function",
56
- "function": {
57
- "name": "__conversational_response",
58
- "description": (
59
- "Respond conversationally if no other tools should be called for a given query."
60
- ),
61
- "parameters": {
62
- "type": "object",
63
- "properties": {
64
- "response": {
65
- "type": "string",
66
- "description": "Conversational response to the user.",
67
- },
68
- },
69
- "required": ["response"],
70
- },
71
- },
72
- }
73
-
74
- _BM = TypeVar("_BM", bound=BaseModel)
75
- _DictOrPydantic = Union[Dict, _BM]
76
-
77
 
78
  def _is_pydantic_class(obj: Any) -> bool:
79
  """
@@ -91,12 +65,6 @@ def _is_pydantic_object(obj: Any) -> bool:
91
  return isinstance(obj, BaseModel)
92
 
93
 
94
- class _AllReturnType(TypedDict):
95
- raw: BaseMessage
96
- parsed: Optional[_DictOrPydantic]
97
- parsing_error: Optional[BaseException]
98
-
99
-
100
  def RawJSONDecoder(index):
101
  class _RawJSONDecoder(json.JSONDecoder):
102
  end = None
@@ -126,26 +94,6 @@ def parse_json_garbage(s: str) -> Any:
126
  raise ValueError("Not a valid JSON string")
127
 
128
 
129
- def parse_response(message: BaseMessage) -> str:
130
- """Extract `function_call` from `AIMessage`."""
131
- if isinstance(message, AIMessage):
132
- kwargs = message.additional_kwargs
133
- tool_calls = message.tool_calls
134
- if len(tool_calls) > 0:
135
- tool_call = tool_calls[-1]
136
- args = tool_call.get("args")
137
- return json.dumps(args)
138
- elif "function_call" in kwargs:
139
- if "arguments" in kwargs["function_call"]:
140
- return kwargs["function_call"]["arguments"]
141
- raise ValueError(
142
- f"`arguments` missing from `function_call` within AIMessage: {message}"
143
- )
144
- else:
145
- raise ValueError("`tool_calls` missing from AIMessage: {message}")
146
- raise ValueError(f"`message` is not an instance of `AIMessage`: {message}")
147
-
148
-
149
  def extract_think(content):
150
  # Added by Cursor 20250726 jmd
151
  # Extract content within <think>...</think>
@@ -155,7 +103,15 @@ def extract_think(content):
155
  if think_match:
156
  post_think = content[think_match.end() :].lstrip()
157
  else:
158
- post_think = content
 
 
 
 
 
 
 
 
159
  return think_text, post_think
160
 
161
 
@@ -226,27 +182,6 @@ class ToolCallingLLM(BaseChatModel, ABC):
226
  [{'name': 'GetWeather', 'args': {'location': 'Austin, TX'}, 'id': 'call_25ed526917b94d8fa5db3fe30a8cf3c0'}]
227
  ```
228
 
229
- Structured output:
230
- ```
231
- from typing import Optional
232
-
233
- from langchain_core.pydantic_v1 import BaseModel, Field
234
-
235
- class Joke(BaseModel):
236
- '''Joke to tell user.'''
237
-
238
- setup: str = Field(description="The setup of the joke")
239
- punchline: str = Field(description="The punchline to the joke")
240
- rating: Optional[int] = Field(description="How funny the joke is, from 1 to 10")
241
-
242
- structured_llm = llm.with_structured_output(Joke)
243
- structured_llm.invoke("Tell me a joke about cats")
244
- ```
245
- ```
246
- Joke(setup='Why was the cat sitting on the computer?', punchline='Because it wanted to be online!', rating=7)
247
- ```
248
- See `ToolCallingLLM.with_structured_output()` for more.
249
-
250
  Response metadata
251
  Refer to the documentation of the Chat Model you wish to extend with Tool Calling.
252
 
@@ -295,20 +230,26 @@ class ToolCallingLLM(BaseChatModel, ABC):
295
  )
296
  for fn in functions
297
  ]
298
- if "functions" in kwargs:
299
- del kwargs["functions"]
300
- if "function_call" in kwargs:
301
- functions = [
302
- fn for fn in functions if fn["name"] == kwargs["function_call"]["name"]
303
- ]
304
- if not functions:
305
- raise ValueError(
306
- "If `function_call` is specified, you must also pass a "
307
- "matching function in `functions`."
308
- )
309
- del kwargs["function_call"]
 
 
 
 
 
 
 
310
  functions = [convert_to_openai_tool(fn) for fn in functions]
311
- functions.append(DEFAULT_RESPONSE_FUNCTION)
312
  system_message_prompt_template = SystemMessagePromptTemplate.from_template(
313
  self.tool_system_prompt_template
314
  )
@@ -324,79 +265,63 @@ class ToolCallingLLM(BaseChatModel, ABC):
324
  def _process_response(
325
  self, response_message: BaseMessage, functions: List[Dict]
326
  ) -> AIMessage:
327
- chat_generation_content = response_message.content
328
- if not isinstance(chat_generation_content, str):
329
  raise ValueError("ToolCallingLLM does not support non-string output.")
330
 
331
  # Extract <think>...</think> content and text after </think> for further processing 20250726 jmd
332
- think_text, chat_generation_content = extract_think(chat_generation_content)
333
 
 
334
  try:
335
- parsed_chat_result = json.loads(chat_generation_content)
336
  except json.JSONDecodeError:
337
  try:
338
- parsed_chat_result = parse_json_garbage(chat_generation_content)
 
 
339
  except Exception:
340
- warnings.warn(f"Failed to parse JSON from {self.model} output")
341
- return AIMessage(content=chat_generation_content)
342
 
343
- print("parsed_chat_result")
344
- print(parsed_chat_result)
345
 
 
346
  called_tool_name = (
347
- parsed_chat_result["tool"]
348
- if "tool" in parsed_chat_result
349
- else parsed_chat_result["name"] if "name" in parsed_chat_result else None
350
  )
 
 
351
  called_tool = next(
352
  (fn for fn in functions if fn["function"]["name"] == called_tool_name), None
353
  )
354
- if (
355
- called_tool is None
356
- or called_tool["function"]["name"]
357
- == DEFAULT_RESPONSE_FUNCTION["function"]["name"]
358
- or called_tool["function"]["name"]
359
- == DEFAULT_RESPONSE_FUNCTION["function"]["name"][2:]
360
- ):
361
- if (
362
- "tool_input" in parsed_chat_result
363
- and "response" in parsed_chat_result["tool_input"]
364
- ):
365
- response = parsed_chat_result["tool_input"]["response"]
366
- elif (
367
- "parameters" in parsed_chat_result
368
- and "response" in parsed_chat_result["parameters"]
369
- ):
370
- response = parsed_chat_result["parameters"]["response"]
371
- elif "response" in parsed_chat_result:
372
- response = parsed_chat_result["response"]
373
- else:
374
- # raise ValueError(
375
- # f"Failed to parse a response from {self.model} output: " # type: ignore[attr-defined]
376
- # # Keep this commented for privacy in deployed app 20250727 jmd
377
- # # f"{chat_generation_content}"
378
- # )
379
- # Change to warning and return the generated content 20250727 jmd
380
- warnings.warn(f"Failed to parse a response from {self.model} output")
381
- response = chat_generation_content
382
- return AIMessage(content=response)
383
 
 
384
  called_tool_arguments = (
385
- parsed_chat_result["tool_input"]
386
- if "tool_input" in parsed_chat_result
387
  else (
388
- parsed_chat_result["parameters"]
389
- if "parameters" in parsed_chat_result
390
  else {}
391
  )
392
  )
393
 
 
394
  response_message_with_functions = AIMessage(
395
  content=f"<think>\n{think_text}\n</think>",
396
  tool_calls=[
397
  ToolCall(
398
  name=called_tool_name,
399
- args=called_tool_arguments if called_tool_arguments else {},
400
  id=f"call_{str(uuid.uuid4()).replace('-', '')}",
401
  )
402
  ],
 
3
  import uuid
4
  import warnings
5
  from abc import ABC
 
6
  from typing import (
7
  Any,
8
  AsyncIterator,
 
13
  Sequence,
14
  Tuple,
15
  Type,
 
 
16
  Union,
17
  cast,
18
  )
 
48
  }}
49
  """ # noqa: E501
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
  def _is_pydantic_class(obj: Any) -> bool:
53
  """
 
65
  return isinstance(obj, BaseModel)
66
 
67
 
 
 
 
 
 
 
68
  def RawJSONDecoder(index):
69
  class _RawJSONDecoder(json.JSONDecoder):
70
  end = None
 
94
  raise ValueError("Not a valid JSON string")
95
 
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  def extract_think(content):
98
  # Added by Cursor 20250726 jmd
99
  # Extract content within <think>...</think>
 
103
  if think_match:
104
  post_think = content[think_match.end() :].lstrip()
105
  else:
106
+ # Check if content starts with <think> but missing closing tag
107
+ if content.strip().startswith("<think>"):
108
+ # Extract everything after <think>
109
+ think_start = content.find("<think>") + len("<think>")
110
+ think_text = content[think_start:].strip()
111
+ post_think = ""
112
+ else:
113
+ # No <think> found, so return entire content as post_think
114
+ post_think = content
115
  return think_text, post_think
116
 
117
 
 
182
  [{'name': 'GetWeather', 'args': {'location': 'Austin, TX'}, 'id': 'call_25ed526917b94d8fa5db3fe30a8cf3c0'}]
183
  ```
184
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  Response metadata
186
  Refer to the documentation of the Chat Model you wish to extend with Tool Calling.
187
 
 
230
  )
231
  for fn in functions
232
  ]
233
+
234
+ # langchain_openai/chat_models/base.py:
235
+ # NOTE: Using bind_tools is recommended instead, as the `functions` and
236
+ # `function_call` request parameters are officially marked as
237
+ # deprecated by OpenAI.
238
+
239
+ # if "functions" in kwargs:
240
+ # del kwargs["functions"]
241
+ # if "function_call" in kwargs:
242
+ # functions = [
243
+ # fn for fn in functions if fn["name"] == kwargs["function_call"]["name"]
244
+ # ]
245
+ # if not functions:
246
+ # raise ValueError(
247
+ # "If `function_call` is specified, you must also pass a "
248
+ # "matching function in `functions`."
249
+ # )
250
+ # del kwargs["function_call"]
251
+
252
  functions = [convert_to_openai_tool(fn) for fn in functions]
 
253
  system_message_prompt_template = SystemMessagePromptTemplate.from_template(
254
  self.tool_system_prompt_template
255
  )
 
265
  def _process_response(
266
  self, response_message: BaseMessage, functions: List[Dict]
267
  ) -> AIMessage:
268
+ if not isinstance(response_message.content, str):
 
269
  raise ValueError("ToolCallingLLM does not support non-string output.")
270
 
271
  # Extract <think>...</think> content and text after </think> for further processing 20250726 jmd
272
+ think_text, post_think = extract_think(response_message.content)
273
 
274
+ # Parse output for JSON
275
  try:
276
+ parsed_json_result = json.loads(post_think)
277
  except json.JSONDecodeError:
278
  try:
279
+ print("parse_json_garbage for content:")
280
+ print(post_think)
281
+ parsed_json_result = parse_json_garbage(post_think)
282
  except Exception:
283
+ # Return entire response if JSON is missing or wasn't parsed
284
+ return AIMessage(content=response_message.content)
285
 
286
+ print("parsed_json_result")
287
+ print(parsed_json_result)
288
 
289
+ # Get tool name from output
290
  called_tool_name = (
291
+ parsed_json_result["tool"]
292
+ if "tool" in parsed_json_result
293
+ else parsed_json_result["name"] if "name" in parsed_json_result else None
294
  )
295
+
296
+ # Check if tool name is in functions list
297
  called_tool = next(
298
  (fn for fn in functions if fn["function"]["name"] == called_tool_name), None
299
  )
300
+ if called_tool is None:
301
+ # Issue a warning and return the generated content 20250727 jmd
302
+ warnings.warn(
303
+ f"Tool {called_tool} called from {self.model} output not in functions list"
304
+ )
305
+ return AIMessage(content=response_message.content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
 
307
+ # Get tool arguments from output
308
  called_tool_arguments = (
309
+ parsed_json_result["tool_input"]
310
+ if "tool_input" in parsed_json_result
311
  else (
312
+ parsed_json_result["parameters"]
313
+ if "parameters" in parsed_json_result
314
  else {}
315
  )
316
  )
317
 
318
+ # Put together response message
319
  response_message_with_functions = AIMessage(
320
  content=f"<think>\n{think_text}\n</think>",
321
  tool_calls=[
322
  ToolCall(
323
  name=called_tool_name,
324
+ args=called_tool_arguments,
325
  id=f"call_{str(uuid.uuid4()).replace('-', '')}",
326
  )
327
  ],
prompts.py CHANGED
@@ -15,17 +15,19 @@ def query_prompt(compute_mode):
15
 
16
  query_prompt = (
17
  f"Today Date: {date.today()}."
18
- "You are a helpful RAG chatbot designed to answer questions about R programming based on the R-help mailing list."
19
- "Do not ask the user for more information, but retrieve emails from the R-help mailing list archives."
 
20
  # gpt-4o-mini thinks last two months aren't available with this: "Emails from from {start} to {end} are available for retrieval."
21
  f"The emails available for retrieval are from {start} to {end}."
22
- "Write a search query based on the user's question, but do not answer the question just yet."
23
  "For questions about differences or comparison between X and Y, retrieve emails about X and Y."
24
  "For general summaries, use retrieve_emails(search_query='R')."
25
  "For specific questions, use retrieve_emails(search_query=<specific topic>)."
26
  "For questions about years, use retrieve_emails(search_query=, start_year=, end_year=) (this month is this year)."
27
  "For questions about months, use 3-letter abbreviations (Jan..Dec) for the 'month' argument."
28
- "Even if retrieved emails are already available, you should retrieve *more* emails to answer the most recent question." # Qwen
 
 
29
  # "If you decide not to retrieve emails, tell the user why and suggest how to improve their question to chat with the R-help mailing list."
30
  )
31
  # A sanity check that we don't have unassigned variables
@@ -40,16 +42,16 @@ def generate_prompt(with_tools=True, think=True):
40
  """Return system prompt for generate step"""
41
  generate_prompt = (
42
  f"Today Date: {date.today()}."
43
- "You are a helpful RAG chatbot designed to answer questions about R programming based on the R-help mailing list."
44
- "Summarize the retrieved emails from the R-help mailing list archives to answer the user's question or query."
45
  "If any of the retrieved emails are irrelevant (e.g. wrong dates), then do not use them."
46
  "Tell the user if there are no retrieved emails or if you are unable to answer the question based on the information in the emails."
47
  "Do not give an answer based on your own knowledge or memory, and do not include examples that aren't based on the retrieved emails."
48
- "Example: For a question about writing formulas for lm(), make your answer about formulas for lm() from the retrieved emails."
49
  # "Do not respond with packages that are only listed under sessionInfo, session info, or other attached packages."
50
  "Summarize the content of the emails rather than copying the headers." # Qwen
51
- "Include inline citations (email senders and dates) in your response."
52
- "Only answer general questions about R if the answer is given in the retrieved emails."
53
  "Respond with 300 words maximum and 30 lines of code maximum and include any relevant URLs from the retrieved emails."
54
  )
55
  if with_tools:
 
15
 
16
  query_prompt = (
17
  f"Today Date: {date.today()}."
18
+ "You are a helpful chatbot designed to get information about R programming from the R-help mailing list archives."
19
+ "Write a search query to retrieve emails relevant to the user's question."
20
+ "Do not answer the user's question and do not ask the user for more information."
21
  # gpt-4o-mini thinks last two months aren't available with this: "Emails from from {start} to {end} are available for retrieval."
22
  f"The emails available for retrieval are from {start} to {end}."
 
23
  "For questions about differences or comparison between X and Y, retrieve emails about X and Y."
24
  "For general summaries, use retrieve_emails(search_query='R')."
25
  "For specific questions, use retrieve_emails(search_query=<specific topic>)."
26
  "For questions about years, use retrieve_emails(search_query=, start_year=, end_year=) (this month is this year)."
27
  "For questions about months, use 3-letter abbreviations (Jan..Dec) for the 'month' argument."
28
+ "Even if retrieved emails are available, you should retrieve more emails to answer the most recent question." # Qwen
29
+ # "You must perform the search yourself. Do not tell the user how to retrieve emails." # Qwen
30
+ "Do not use your memory or knowledge to answer the user's question. Only retrieve emails based on the user's question." # Qwen
31
  # "If you decide not to retrieve emails, tell the user why and suggest how to improve their question to chat with the R-help mailing list."
32
  )
33
  # A sanity check that we don't have unassigned variables
 
42
  """Return system prompt for generate step"""
43
  generate_prompt = (
44
  f"Today Date: {date.today()}."
45
+ "You are a helpful chatbot designed to answer questions about R programming based on the R-help mailing list archives."
46
+ "Summarize the retrieved emails to answer the user's question or query."
47
  "If any of the retrieved emails are irrelevant (e.g. wrong dates), then do not use them."
48
  "Tell the user if there are no retrieved emails or if you are unable to answer the question based on the information in the emails."
49
  "Do not give an answer based on your own knowledge or memory, and do not include examples that aren't based on the retrieved emails."
50
+ "Example: For a question about using lm(), take examples of lm() from the retrieved emails to answer the user's question."
51
  # "Do not respond with packages that are only listed under sessionInfo, session info, or other attached packages."
52
  "Summarize the content of the emails rather than copying the headers." # Qwen
53
+ "You must include inline citations (email senders and dates) in each part of your response."
54
+ "Only answer general questions about R if the answer is in the retrieved emails."
55
  "Respond with 300 words maximum and 30 lines of code maximum and include any relevant URLs from the retrieved emails."
56
  )
57
  if with_tools: