jedick commited on
Commit
84ccc57
Β·
1 Parent(s): 1130c52

Display thinking output

Browse files
Files changed (2) hide show
  1. app.py +34 -15
  2. mods/tool_calling_llm.py +33 -7
app.py CHANGED
@@ -6,6 +6,7 @@ from langgraph.checkpoint.memory import MemorySaver
6
  from dotenv import load_dotenv
7
  from main import openai_model, model_id
8
  from util import get_sources, get_start_end_months
 
9
  import requests
10
  import zipfile
11
  import shutil
@@ -16,6 +17,8 @@ import torch
16
  import uuid
17
  import ast
18
  import os
 
 
19
 
20
  # Setup environment variables
21
  load_dotenv(dotenv_path=".env", override=True)
@@ -71,7 +74,7 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
71
  graph_instances[compute_mode][session_hash] = graph
72
  print(f"Set {compute_mode} graph for session {session_hash}")
73
  # Notify when model finishes loading
74
- gr.Success(f"{compute_mode}", duration=4, title=f"Model loaded")
75
 
76
  print(f"Using thread_id: {thread_id}")
77
 
@@ -94,6 +97,17 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
94
  if node == "query":
95
  # Get the message (AIMessage class in LangChain)
96
  chunk_messages = chunk["messages"]
 
 
 
 
 
 
 
 
 
 
 
97
  # Look for tool calls
98
  if chunk_messages.tool_calls:
99
  # Loop over tool calls
@@ -114,11 +128,6 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
114
  metadata={"title": f"πŸ” Running tool {tool_call['name']}"},
115
  )
116
  )
117
- if chunk_messages.content:
118
- # Display response made instead of or in addition to a tool call
119
- history.append(
120
- gr.ChatMessage(role="assistant", content=chunk_messages.content)
121
- )
122
  yield history, [], []
123
 
124
  if node == "retrieve_emails":
@@ -165,9 +174,18 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
165
  chunk_messages = chunk["messages"]
166
  # Chat response without citations
167
  if chunk_messages.content:
168
- history.append(
169
- gr.ChatMessage(role="assistant", content=chunk_messages.content)
170
- )
 
 
 
 
 
 
 
 
 
171
  # None is used for no change to the retrieved emails textbox
172
  yield history, None, []
173
 
@@ -267,7 +285,7 @@ with gr.Blocks(
267
  render=False,
268
  )
269
  data_error = gr.Textbox(
270
- value="App is unavailable because data could not be loaded. Try reloading the page, then contact the maintainer if the problem persists.",
271
  lines=1,
272
  label="Error downloading or extracting data",
273
  visible=False,
@@ -343,7 +361,7 @@ with gr.Blocks(
343
  ## πŸ‡·πŸ€πŸ’¬ R-help-chat
344
 
345
  **Chat with the [R-help mailing list archives](https://stat.ethz.ch/pipermail/r-help/).**
346
- An LLM turns your question into a search query, including year ranges, and generates an answer from the retrieved emails.
347
  You can ask follow-up questions with the chat history as context.
348
  ➑️ To clear the history and start a new chat, press the πŸ—‘οΈ clear button.
349
  **_Answers may be incorrect._**
@@ -361,7 +379,8 @@ with gr.Blocks(
361
  if compute_mode == "local":
362
  status_text = f"""
363
  πŸ“ Now in **local** mode, using ZeroGPU hardware<br>
364
- βŒ› Response time is about 1 minute<br>
 
365
  ✨ [nomic-embed-text-v1.5](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5) and [{model_id.split("/")[-1]}](https://huggingface.co/{model_id})<br>
366
  🏠 See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
367
  """
@@ -379,8 +398,8 @@ with gr.Blocks(
379
  end = None
380
  info_text = f"""
381
  **Database:** {len(sources)} emails from {start} to {end}.
382
- **Features:** RAG, today's date, hybrid search (dense+sparse), query analysis,
383
- multiple retrievals per turn (remote mode), answer with citations (remote mode), chat memory.
384
  **Tech:** LangChain + Hugging Face + Gradio; ChromaDB and BM25S-based retrievers.<br>
385
  """
386
  return info_text
@@ -410,7 +429,7 @@ with gr.Blocks(
410
  example_questions = [
411
  # "What is today's date?",
412
  "Summarize emails from the last two months",
413
- "How to use plotmath?",
414
  "When was has.HLC mentioned?",
415
  "Who reported installation problems in 2023-2024?",
416
  ]
 
6
  from dotenv import load_dotenv
7
  from main import openai_model, model_id
8
  from util import get_sources, get_start_end_months
9
+ from mods.tool_calling_llm import extract_think
10
  import requests
11
  import zipfile
12
  import shutil
 
17
  import uuid
18
  import ast
19
  import os
20
+ import re
21
+
22
 
23
  # Setup environment variables
24
  load_dotenv(dotenv_path=".env", override=True)
 
74
  graph_instances[compute_mode][session_hash] = graph
75
  print(f"Set {compute_mode} graph for session {session_hash}")
76
  # Notify when model finishes loading
77
+ gr.Success(f"{compute_mode}", duration=4, title=f"Model loaded!")
78
 
79
  print(f"Using thread_id: {thread_id}")
80
 
 
97
  if node == "query":
98
  # Get the message (AIMessage class in LangChain)
99
  chunk_messages = chunk["messages"]
100
+ # Display non-tool-call content
101
+ if chunk_messages.content:
102
+ content = chunk_messages.content
103
+ metadata = None
104
+ # Show thinking content in "metadata" message
105
+ if content.startswith("<think>"):
106
+ content, _ = extract_think(content)
107
+ metadata = {"title": f"🧠 Thinking about query"}
108
+ history.append(
109
+ gr.ChatMessage(role="assistant", content=content, metadata=metadata)
110
+ )
111
  # Look for tool calls
112
  if chunk_messages.tool_calls:
113
  # Loop over tool calls
 
128
  metadata={"title": f"πŸ” Running tool {tool_call['name']}"},
129
  )
130
  )
 
 
 
 
 
131
  yield history, [], []
132
 
133
  if node == "retrieve_emails":
 
174
  chunk_messages = chunk["messages"]
175
  # Chat response without citations
176
  if chunk_messages.content:
177
+ content = chunk_messages.content
178
+ # Show thinking content in "metadata" message
179
+ think_text, content = extract_think(content)
180
+ if think_text:
181
+ history.append(
182
+ gr.ChatMessage(
183
+ role="assistant",
184
+ content=think_text,
185
+ metadata={"title": f"🧠 Thinking about answer"},
186
+ )
187
+ )
188
+ history.append(gr.ChatMessage(role="assistant", content=content))
189
  # None is used for no change to the retrieved emails textbox
190
  yield history, None, []
191
 
 
285
  render=False,
286
  )
287
  data_error = gr.Textbox(
288
+ value="Email database is missing. Try reloading the page, then contact the maintainer if the problem persists.",
289
  lines=1,
290
  label="Error downloading or extracting data",
291
  visible=False,
 
361
  ## πŸ‡·πŸ€πŸ’¬ R-help-chat
362
 
363
  **Chat with the [R-help mailing list archives](https://stat.ethz.ch/pipermail/r-help/).**
364
+ An LLM turns your question into a search query, including year ranges and months, and generates an answer from the retrieved emails.
365
  You can ask follow-up questions with the chat history as context.
366
  ➑️ To clear the history and start a new chat, press the πŸ—‘οΈ clear button.
367
  **_Answers may be incorrect._**
 
379
  if compute_mode == "local":
380
  status_text = f"""
381
  πŸ“ Now in **local** mode, using ZeroGPU hardware<br>
382
+ βŒ› Response time is about one minute<br>
383
+ 🧠 Thinking is enabled for query; add **/think** to enable thinking for answer</br>
384
  ✨ [nomic-embed-text-v1.5](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5) and [{model_id.split("/")[-1]}](https://huggingface.co/{model_id})<br>
385
  🏠 See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
386
  """
 
398
  end = None
399
  info_text = f"""
400
  **Database:** {len(sources)} emails from {start} to {end}.
401
+ **Features:** RAG, today's date, hybrid search (dense+sparse), thinking display (local),
402
+ multiple retrievals per turn (remote), answer with citations (remote), chat memory.
403
  **Tech:** LangChain + Hugging Face + Gradio; ChromaDB and BM25S-based retrievers.<br>
404
  """
405
  return info_text
 
429
  example_questions = [
430
  # "What is today's date?",
431
  "Summarize emails from the last two months",
432
+ "Advice on using plotmath /think",
433
  "When was has.HLC mentioned?",
434
  "Who reported installation problems in 2023-2024?",
435
  ]
mods/tool_calling_llm.py CHANGED
@@ -1,6 +1,7 @@
1
  import re
2
  import json
3
  import uuid
 
4
  from abc import ABC
5
  from shutil import Error
6
  from typing import (
@@ -145,6 +146,19 @@ def parse_response(message: BaseMessage) -> str:
145
  raise ValueError(f"`message` is not an instance of `AIMessage`: {message}")
146
 
147
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  class ToolCallingLLM(BaseChatModel, ABC):
149
  """ToolCallingLLM mixin to enable tool calling features on non tool calling models.
150
 
@@ -239,7 +253,7 @@ class ToolCallingLLM(BaseChatModel, ABC):
239
  """ # noqa: E501
240
 
241
  tool_system_prompt_template: str = DEFAULT_SYSTEM_TEMPLATE
242
- # Suffix to add to the system prompt that is not templated (variable names are not interpreted)
243
  system_message_suffix: str = ""
244
 
245
  override_bind_tools: bool = True
@@ -301,7 +315,7 @@ class ToolCallingLLM(BaseChatModel, ABC):
301
  system_message = system_message_prompt_template.format(
302
  tools=json.dumps(functions, indent=2)
303
  )
304
- # Add extra context after the formatted system message
305
  system_message = SystemMessage(
306
  system_message.content + self.system_message_suffix
307
  )
@@ -313,14 +327,22 @@ class ToolCallingLLM(BaseChatModel, ABC):
313
  chat_generation_content = response_message.content
314
  if not isinstance(chat_generation_content, str):
315
  raise ValueError("ToolCallingLLM does not support non-string output.")
 
 
 
 
316
  try:
317
  parsed_chat_result = json.loads(chat_generation_content)
318
  except json.JSONDecodeError:
319
  try:
320
  parsed_chat_result = parse_json_garbage(chat_generation_content)
321
  except Exception:
 
322
  return AIMessage(content=chat_generation_content)
323
 
 
 
 
324
  called_tool_name = (
325
  parsed_chat_result["tool"]
326
  if "tool" in parsed_chat_result
@@ -349,10 +371,14 @@ class ToolCallingLLM(BaseChatModel, ABC):
349
  elif "response" in parsed_chat_result:
350
  response = parsed_chat_result["response"]
351
  else:
352
- raise ValueError(
353
- f"Failed to parse a response from {self.model} output: " # type: ignore[attr-defined]
354
- f"{chat_generation_content}"
355
- )
 
 
 
 
356
  return AIMessage(content=response)
357
 
358
  called_tool_arguments = (
@@ -366,7 +392,7 @@ class ToolCallingLLM(BaseChatModel, ABC):
366
  )
367
 
368
  response_message_with_functions = AIMessage(
369
- content="",
370
  tool_calls=[
371
  ToolCall(
372
  name=called_tool_name,
 
1
  import re
2
  import json
3
  import uuid
4
+ import warnings
5
  from abc import ABC
6
  from shutil import Error
7
  from typing import (
 
146
  raise ValueError(f"`message` is not an instance of `AIMessage`: {message}")
147
 
148
 
149
+ def extract_think(content):
150
+ # Added by Cursor 20250726 jmd
151
+ # Extract content within <think>...</think>
152
+ think_match = re.search(r"<think>(.*?)</think>", content, re.DOTALL)
153
+ think_text = think_match.group(1).strip() if think_match else ""
154
+ # Extract text after </think>
155
+ if think_match:
156
+ post_think = content[think_match.end() :].lstrip()
157
+ else:
158
+ post_think = content
159
+ return think_text, post_think
160
+
161
+
162
  class ToolCallingLLM(BaseChatModel, ABC):
163
  """ToolCallingLLM mixin to enable tool calling features on non tool calling models.
164
 
 
253
  """ # noqa: E501
254
 
255
  tool_system_prompt_template: str = DEFAULT_SYSTEM_TEMPLATE
256
+ # Suffix to add to the system prompt that is not templated 20250717 jmd
257
  system_message_suffix: str = ""
258
 
259
  override_bind_tools: bool = True
 
315
  system_message = system_message_prompt_template.format(
316
  tools=json.dumps(functions, indent=2)
317
  )
318
+ # Add extra context after the formatted system message 20250717 jmd
319
  system_message = SystemMessage(
320
  system_message.content + self.system_message_suffix
321
  )
 
327
  chat_generation_content = response_message.content
328
  if not isinstance(chat_generation_content, str):
329
  raise ValueError("ToolCallingLLM does not support non-string output.")
330
+
331
+ # Extract <think>...</think> content and text after </think> for further processing 20250726 jmd
332
+ think_text, chat_generation_content = extract_think(chat_generation_content)
333
+
334
  try:
335
  parsed_chat_result = json.loads(chat_generation_content)
336
  except json.JSONDecodeError:
337
  try:
338
  parsed_chat_result = parse_json_garbage(chat_generation_content)
339
  except Exception:
340
+ warnings.warn(f"Failed to parse JSON from {self.model} output")
341
  return AIMessage(content=chat_generation_content)
342
 
343
+ print("parsed_chat_result")
344
+ print(parsed_chat_result)
345
+
346
  called_tool_name = (
347
  parsed_chat_result["tool"]
348
  if "tool" in parsed_chat_result
 
371
  elif "response" in parsed_chat_result:
372
  response = parsed_chat_result["response"]
373
  else:
374
+ # raise ValueError(
375
+ # f"Failed to parse a response from {self.model} output: " # type: ignore[attr-defined]
376
+ # # Keep this commented for privacy in deployed app 20250727 jmd
377
+ # # f"{chat_generation_content}"
378
+ # )
379
+ # Change to warning and return the generated content 20250727 jmd
380
+ warnings.warn(f"Failed to parse a response from {self.model} output")
381
+ response = chat_generation_content
382
  return AIMessage(content=response)
383
 
384
  called_tool_arguments = (
 
392
  )
393
 
394
  response_message_with_functions = AIMessage(
395
+ content=f"<think>\n{think_text}\n</think>",
396
  tool_calls=[
397
  ToolCall(
398
  name=called_tool_name,