Spaces:
Running
on
Zero
Running
on
Zero
jedick
commited on
Commit
Β·
17ad0bb
1
Parent(s):
84ccc57
Clean up ToolCallingLLM
Browse files- README.md +1 -1
- app.py +26 -26
- main.py +1 -1
- mods/tool_calling_llm.py +58 -133
- prompts.py +11 -9
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
emoji: π·π€π¬
|
4 |
colorFrom: indigo
|
5 |
colorTo: red
|
|
|
1 |
---
|
2 |
+
title: "R-help chat: RAG for emails"
|
3 |
emoji: π·π€π¬
|
4 |
colorFrom: indigo
|
5 |
colorTo: red
|
app.py
CHANGED
@@ -40,6 +40,27 @@ def cleanup_graph(request: gr.Request):
|
|
40 |
print(f"Deleted remote graph for session {request.session_hash}")
|
41 |
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
def run_workflow(input, history, compute_mode, thread_id, session_hash):
|
44 |
"""The main function to run the chat workflow"""
|
45 |
|
@@ -97,17 +118,8 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
|
|
97 |
if node == "query":
|
98 |
# Get the message (AIMessage class in LangChain)
|
99 |
chunk_messages = chunk["messages"]
|
100 |
-
#
|
101 |
-
|
102 |
-
content = chunk_messages.content
|
103 |
-
metadata = None
|
104 |
-
# Show thinking content in "metadata" message
|
105 |
-
if content.startswith("<think>"):
|
106 |
-
content, _ = extract_think(content)
|
107 |
-
metadata = {"title": f"π§ Thinking about query"}
|
108 |
-
history.append(
|
109 |
-
gr.ChatMessage(role="assistant", content=content, metadata=metadata)
|
110 |
-
)
|
111 |
# Look for tool calls
|
112 |
if chunk_messages.tool_calls:
|
113 |
# Loop over tool calls
|
@@ -171,27 +183,15 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
|
|
171 |
yield history, retrieved_emails, []
|
172 |
|
173 |
if node == "generate":
|
|
|
174 |
chunk_messages = chunk["messages"]
|
175 |
-
|
176 |
-
if chunk_messages.content:
|
177 |
-
content = chunk_messages.content
|
178 |
-
# Show thinking content in "metadata" message
|
179 |
-
think_text, content = extract_think(content)
|
180 |
-
if think_text:
|
181 |
-
history.append(
|
182 |
-
gr.ChatMessage(
|
183 |
-
role="assistant",
|
184 |
-
content=think_text,
|
185 |
-
metadata={"title": f"π§ Thinking about answer"},
|
186 |
-
)
|
187 |
-
)
|
188 |
-
history.append(gr.ChatMessage(role="assistant", content=content))
|
189 |
# None is used for no change to the retrieved emails textbox
|
190 |
yield history, None, []
|
191 |
|
192 |
if node == "answer_with_citations":
|
193 |
-
chunk_messages = chunk["messages"][0]
|
194 |
# Parse the message for the answer and citations
|
|
|
195 |
try:
|
196 |
answer, citations = ast.literal_eval(chunk_messages.content)
|
197 |
except:
|
|
|
40 |
print(f"Deleted remote graph for session {request.session_hash}")
|
41 |
|
42 |
|
43 |
+
def append_content(chunk_messages, history, thinking_about):
|
44 |
+
"""Append thinking and non-thinking content to chatbot history"""
|
45 |
+
if chunk_messages.content:
|
46 |
+
think_text, post_think = extract_think(chunk_messages.content)
|
47 |
+
# Show thinking content in "metadata" message
|
48 |
+
if think_text:
|
49 |
+
history.append(
|
50 |
+
gr.ChatMessage(
|
51 |
+
role="assistant",
|
52 |
+
content=think_text,
|
53 |
+
metadata={"title": f"π§ Thinking about {thinking_about}"},
|
54 |
+
)
|
55 |
+
)
|
56 |
+
if not post_think and not chunk_messages.tool_calls:
|
57 |
+
gr.Warning("Response may be incomplete", title="Thinking-only response")
|
58 |
+
# Display non-thinking content
|
59 |
+
if post_think:
|
60 |
+
history.append(gr.ChatMessage(role="assistant", content=post_think))
|
61 |
+
return history
|
62 |
+
|
63 |
+
|
64 |
def run_workflow(input, history, compute_mode, thread_id, session_hash):
|
65 |
"""The main function to run the chat workflow"""
|
66 |
|
|
|
118 |
if node == "query":
|
119 |
# Get the message (AIMessage class in LangChain)
|
120 |
chunk_messages = chunk["messages"]
|
121 |
+
# Append thinking and non-thinking messages (if present)
|
122 |
+
history = append_content(chunk_messages, history, thinking_about="query")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
# Look for tool calls
|
124 |
if chunk_messages.tool_calls:
|
125 |
# Loop over tool calls
|
|
|
183 |
yield history, retrieved_emails, []
|
184 |
|
185 |
if node == "generate":
|
186 |
+
# Append messages (thinking and non-thinking) to history
|
187 |
chunk_messages = chunk["messages"]
|
188 |
+
history = append_content(chunk_messages, history, thinking_about="answer")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
# None is used for no change to the retrieved emails textbox
|
190 |
yield history, None, []
|
191 |
|
192 |
if node == "answer_with_citations":
|
|
|
193 |
# Parse the message for the answer and citations
|
194 |
+
chunk_messages = chunk["messages"][0]
|
195 |
try:
|
196 |
answer, citations = ast.literal_eval(chunk_messages.content)
|
197 |
except:
|
main.py
CHANGED
@@ -41,7 +41,7 @@ model_id = os.getenv("MODEL_ID")
|
|
41 |
if model_id is None:
|
42 |
# model_id = "HuggingFaceTB/SmolLM3-3B"
|
43 |
# model_id = "google/gemma-3-12b-it"
|
44 |
-
model_id = "Qwen/Qwen3-
|
45 |
|
46 |
# Suppress these messages:
|
47 |
# INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
|
|
|
41 |
if model_id is None:
|
42 |
# model_id = "HuggingFaceTB/SmolLM3-3B"
|
43 |
# model_id = "google/gemma-3-12b-it"
|
44 |
+
model_id = "Qwen/Qwen3-8B"
|
45 |
|
46 |
# Suppress these messages:
|
47 |
# INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
|
mods/tool_calling_llm.py
CHANGED
@@ -3,7 +3,6 @@ import json
|
|
3 |
import uuid
|
4 |
import warnings
|
5 |
from abc import ABC
|
6 |
-
from shutil import Error
|
7 |
from typing import (
|
8 |
Any,
|
9 |
AsyncIterator,
|
@@ -14,8 +13,6 @@ from typing import (
|
|
14 |
Sequence,
|
15 |
Tuple,
|
16 |
Type,
|
17 |
-
TypedDict,
|
18 |
-
TypeVar,
|
19 |
Union,
|
20 |
cast,
|
21 |
)
|
@@ -51,29 +48,6 @@ You must always select one of the above tools and respond with only a JSON objec
|
|
51 |
}}
|
52 |
""" # noqa: E501
|
53 |
|
54 |
-
DEFAULT_RESPONSE_FUNCTION = {
|
55 |
-
"type": "function",
|
56 |
-
"function": {
|
57 |
-
"name": "__conversational_response",
|
58 |
-
"description": (
|
59 |
-
"Respond conversationally if no other tools should be called for a given query."
|
60 |
-
),
|
61 |
-
"parameters": {
|
62 |
-
"type": "object",
|
63 |
-
"properties": {
|
64 |
-
"response": {
|
65 |
-
"type": "string",
|
66 |
-
"description": "Conversational response to the user.",
|
67 |
-
},
|
68 |
-
},
|
69 |
-
"required": ["response"],
|
70 |
-
},
|
71 |
-
},
|
72 |
-
}
|
73 |
-
|
74 |
-
_BM = TypeVar("_BM", bound=BaseModel)
|
75 |
-
_DictOrPydantic = Union[Dict, _BM]
|
76 |
-
|
77 |
|
78 |
def _is_pydantic_class(obj: Any) -> bool:
|
79 |
"""
|
@@ -91,12 +65,6 @@ def _is_pydantic_object(obj: Any) -> bool:
|
|
91 |
return isinstance(obj, BaseModel)
|
92 |
|
93 |
|
94 |
-
class _AllReturnType(TypedDict):
|
95 |
-
raw: BaseMessage
|
96 |
-
parsed: Optional[_DictOrPydantic]
|
97 |
-
parsing_error: Optional[BaseException]
|
98 |
-
|
99 |
-
|
100 |
def RawJSONDecoder(index):
|
101 |
class _RawJSONDecoder(json.JSONDecoder):
|
102 |
end = None
|
@@ -126,26 +94,6 @@ def parse_json_garbage(s: str) -> Any:
|
|
126 |
raise ValueError("Not a valid JSON string")
|
127 |
|
128 |
|
129 |
-
def parse_response(message: BaseMessage) -> str:
|
130 |
-
"""Extract `function_call` from `AIMessage`."""
|
131 |
-
if isinstance(message, AIMessage):
|
132 |
-
kwargs = message.additional_kwargs
|
133 |
-
tool_calls = message.tool_calls
|
134 |
-
if len(tool_calls) > 0:
|
135 |
-
tool_call = tool_calls[-1]
|
136 |
-
args = tool_call.get("args")
|
137 |
-
return json.dumps(args)
|
138 |
-
elif "function_call" in kwargs:
|
139 |
-
if "arguments" in kwargs["function_call"]:
|
140 |
-
return kwargs["function_call"]["arguments"]
|
141 |
-
raise ValueError(
|
142 |
-
f"`arguments` missing from `function_call` within AIMessage: {message}"
|
143 |
-
)
|
144 |
-
else:
|
145 |
-
raise ValueError("`tool_calls` missing from AIMessage: {message}")
|
146 |
-
raise ValueError(f"`message` is not an instance of `AIMessage`: {message}")
|
147 |
-
|
148 |
-
|
149 |
def extract_think(content):
|
150 |
# Added by Cursor 20250726 jmd
|
151 |
# Extract content within <think>...</think>
|
@@ -155,7 +103,15 @@ def extract_think(content):
|
|
155 |
if think_match:
|
156 |
post_think = content[think_match.end() :].lstrip()
|
157 |
else:
|
158 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
return think_text, post_think
|
160 |
|
161 |
|
@@ -226,27 +182,6 @@ class ToolCallingLLM(BaseChatModel, ABC):
|
|
226 |
[{'name': 'GetWeather', 'args': {'location': 'Austin, TX'}, 'id': 'call_25ed526917b94d8fa5db3fe30a8cf3c0'}]
|
227 |
```
|
228 |
|
229 |
-
Structured output:
|
230 |
-
```
|
231 |
-
from typing import Optional
|
232 |
-
|
233 |
-
from langchain_core.pydantic_v1 import BaseModel, Field
|
234 |
-
|
235 |
-
class Joke(BaseModel):
|
236 |
-
'''Joke to tell user.'''
|
237 |
-
|
238 |
-
setup: str = Field(description="The setup of the joke")
|
239 |
-
punchline: str = Field(description="The punchline to the joke")
|
240 |
-
rating: Optional[int] = Field(description="How funny the joke is, from 1 to 10")
|
241 |
-
|
242 |
-
structured_llm = llm.with_structured_output(Joke)
|
243 |
-
structured_llm.invoke("Tell me a joke about cats")
|
244 |
-
```
|
245 |
-
```
|
246 |
-
Joke(setup='Why was the cat sitting on the computer?', punchline='Because it wanted to be online!', rating=7)
|
247 |
-
```
|
248 |
-
See `ToolCallingLLM.with_structured_output()` for more.
|
249 |
-
|
250 |
Response metadata
|
251 |
Refer to the documentation of the Chat Model you wish to extend with Tool Calling.
|
252 |
|
@@ -295,20 +230,26 @@ class ToolCallingLLM(BaseChatModel, ABC):
|
|
295 |
)
|
296 |
for fn in functions
|
297 |
]
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
310 |
functions = [convert_to_openai_tool(fn) for fn in functions]
|
311 |
-
functions.append(DEFAULT_RESPONSE_FUNCTION)
|
312 |
system_message_prompt_template = SystemMessagePromptTemplate.from_template(
|
313 |
self.tool_system_prompt_template
|
314 |
)
|
@@ -324,79 +265,63 @@ class ToolCallingLLM(BaseChatModel, ABC):
|
|
324 |
def _process_response(
|
325 |
self, response_message: BaseMessage, functions: List[Dict]
|
326 |
) -> AIMessage:
|
327 |
-
|
328 |
-
if not isinstance(chat_generation_content, str):
|
329 |
raise ValueError("ToolCallingLLM does not support non-string output.")
|
330 |
|
331 |
# Extract <think>...</think> content and text after </think> for further processing 20250726 jmd
|
332 |
-
think_text,
|
333 |
|
|
|
334 |
try:
|
335 |
-
|
336 |
except json.JSONDecodeError:
|
337 |
try:
|
338 |
-
|
|
|
|
|
339 |
except Exception:
|
340 |
-
|
341 |
-
return AIMessage(content=
|
342 |
|
343 |
-
print("
|
344 |
-
print(
|
345 |
|
|
|
346 |
called_tool_name = (
|
347 |
-
|
348 |
-
if "tool" in
|
349 |
-
else
|
350 |
)
|
|
|
|
|
351 |
called_tool = next(
|
352 |
(fn for fn in functions if fn["function"]["name"] == called_tool_name), None
|
353 |
)
|
354 |
-
if
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
):
|
361 |
-
if (
|
362 |
-
"tool_input" in parsed_chat_result
|
363 |
-
and "response" in parsed_chat_result["tool_input"]
|
364 |
-
):
|
365 |
-
response = parsed_chat_result["tool_input"]["response"]
|
366 |
-
elif (
|
367 |
-
"parameters" in parsed_chat_result
|
368 |
-
and "response" in parsed_chat_result["parameters"]
|
369 |
-
):
|
370 |
-
response = parsed_chat_result["parameters"]["response"]
|
371 |
-
elif "response" in parsed_chat_result:
|
372 |
-
response = parsed_chat_result["response"]
|
373 |
-
else:
|
374 |
-
# raise ValueError(
|
375 |
-
# f"Failed to parse a response from {self.model} output: " # type: ignore[attr-defined]
|
376 |
-
# # Keep this commented for privacy in deployed app 20250727 jmd
|
377 |
-
# # f"{chat_generation_content}"
|
378 |
-
# )
|
379 |
-
# Change to warning and return the generated content 20250727 jmd
|
380 |
-
warnings.warn(f"Failed to parse a response from {self.model} output")
|
381 |
-
response = chat_generation_content
|
382 |
-
return AIMessage(content=response)
|
383 |
|
|
|
384 |
called_tool_arguments = (
|
385 |
-
|
386 |
-
if "tool_input" in
|
387 |
else (
|
388 |
-
|
389 |
-
if "parameters" in
|
390 |
else {}
|
391 |
)
|
392 |
)
|
393 |
|
|
|
394 |
response_message_with_functions = AIMessage(
|
395 |
content=f"<think>\n{think_text}\n</think>",
|
396 |
tool_calls=[
|
397 |
ToolCall(
|
398 |
name=called_tool_name,
|
399 |
-
args=called_tool_arguments
|
400 |
id=f"call_{str(uuid.uuid4()).replace('-', '')}",
|
401 |
)
|
402 |
],
|
|
|
3 |
import uuid
|
4 |
import warnings
|
5 |
from abc import ABC
|
|
|
6 |
from typing import (
|
7 |
Any,
|
8 |
AsyncIterator,
|
|
|
13 |
Sequence,
|
14 |
Tuple,
|
15 |
Type,
|
|
|
|
|
16 |
Union,
|
17 |
cast,
|
18 |
)
|
|
|
48 |
}}
|
49 |
""" # noqa: E501
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
def _is_pydantic_class(obj: Any) -> bool:
|
53 |
"""
|
|
|
65 |
return isinstance(obj, BaseModel)
|
66 |
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
def RawJSONDecoder(index):
|
69 |
class _RawJSONDecoder(json.JSONDecoder):
|
70 |
end = None
|
|
|
94 |
raise ValueError("Not a valid JSON string")
|
95 |
|
96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
def extract_think(content):
|
98 |
# Added by Cursor 20250726 jmd
|
99 |
# Extract content within <think>...</think>
|
|
|
103 |
if think_match:
|
104 |
post_think = content[think_match.end() :].lstrip()
|
105 |
else:
|
106 |
+
# Check if content starts with <think> but missing closing tag
|
107 |
+
if content.strip().startswith("<think>"):
|
108 |
+
# Extract everything after <think>
|
109 |
+
think_start = content.find("<think>") + len("<think>")
|
110 |
+
think_text = content[think_start:].strip()
|
111 |
+
post_think = ""
|
112 |
+
else:
|
113 |
+
# No <think> found, so return entire content as post_think
|
114 |
+
post_think = content
|
115 |
return think_text, post_think
|
116 |
|
117 |
|
|
|
182 |
[{'name': 'GetWeather', 'args': {'location': 'Austin, TX'}, 'id': 'call_25ed526917b94d8fa5db3fe30a8cf3c0'}]
|
183 |
```
|
184 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
185 |
Response metadata
|
186 |
Refer to the documentation of the Chat Model you wish to extend with Tool Calling.
|
187 |
|
|
|
230 |
)
|
231 |
for fn in functions
|
232 |
]
|
233 |
+
|
234 |
+
# langchain_openai/chat_models/base.py:
|
235 |
+
# NOTE: Using bind_tools is recommended instead, as the `functions` and
|
236 |
+
# `function_call` request parameters are officially marked as
|
237 |
+
# deprecated by OpenAI.
|
238 |
+
|
239 |
+
# if "functions" in kwargs:
|
240 |
+
# del kwargs["functions"]
|
241 |
+
# if "function_call" in kwargs:
|
242 |
+
# functions = [
|
243 |
+
# fn for fn in functions if fn["name"] == kwargs["function_call"]["name"]
|
244 |
+
# ]
|
245 |
+
# if not functions:
|
246 |
+
# raise ValueError(
|
247 |
+
# "If `function_call` is specified, you must also pass a "
|
248 |
+
# "matching function in `functions`."
|
249 |
+
# )
|
250 |
+
# del kwargs["function_call"]
|
251 |
+
|
252 |
functions = [convert_to_openai_tool(fn) for fn in functions]
|
|
|
253 |
system_message_prompt_template = SystemMessagePromptTemplate.from_template(
|
254 |
self.tool_system_prompt_template
|
255 |
)
|
|
|
265 |
def _process_response(
|
266 |
self, response_message: BaseMessage, functions: List[Dict]
|
267 |
) -> AIMessage:
|
268 |
+
if not isinstance(response_message.content, str):
|
|
|
269 |
raise ValueError("ToolCallingLLM does not support non-string output.")
|
270 |
|
271 |
# Extract <think>...</think> content and text after </think> for further processing 20250726 jmd
|
272 |
+
think_text, post_think = extract_think(response_message.content)
|
273 |
|
274 |
+
# Parse output for JSON
|
275 |
try:
|
276 |
+
parsed_json_result = json.loads(post_think)
|
277 |
except json.JSONDecodeError:
|
278 |
try:
|
279 |
+
print("parse_json_garbage for content:")
|
280 |
+
print(post_think)
|
281 |
+
parsed_json_result = parse_json_garbage(post_think)
|
282 |
except Exception:
|
283 |
+
# Return entire response if JSON is missing or wasn't parsed
|
284 |
+
return AIMessage(content=response_message.content)
|
285 |
|
286 |
+
print("parsed_json_result")
|
287 |
+
print(parsed_json_result)
|
288 |
|
289 |
+
# Get tool name from output
|
290 |
called_tool_name = (
|
291 |
+
parsed_json_result["tool"]
|
292 |
+
if "tool" in parsed_json_result
|
293 |
+
else parsed_json_result["name"] if "name" in parsed_json_result else None
|
294 |
)
|
295 |
+
|
296 |
+
# Check if tool name is in functions list
|
297 |
called_tool = next(
|
298 |
(fn for fn in functions if fn["function"]["name"] == called_tool_name), None
|
299 |
)
|
300 |
+
if called_tool is None:
|
301 |
+
# Issue a warning and return the generated content 20250727 jmd
|
302 |
+
warnings.warn(
|
303 |
+
f"Tool {called_tool} called from {self.model} output not in functions list"
|
304 |
+
)
|
305 |
+
return AIMessage(content=response_message.content)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
306 |
|
307 |
+
# Get tool arguments from output
|
308 |
called_tool_arguments = (
|
309 |
+
parsed_json_result["tool_input"]
|
310 |
+
if "tool_input" in parsed_json_result
|
311 |
else (
|
312 |
+
parsed_json_result["parameters"]
|
313 |
+
if "parameters" in parsed_json_result
|
314 |
else {}
|
315 |
)
|
316 |
)
|
317 |
|
318 |
+
# Put together response message
|
319 |
response_message_with_functions = AIMessage(
|
320 |
content=f"<think>\n{think_text}\n</think>",
|
321 |
tool_calls=[
|
322 |
ToolCall(
|
323 |
name=called_tool_name,
|
324 |
+
args=called_tool_arguments,
|
325 |
id=f"call_{str(uuid.uuid4()).replace('-', '')}",
|
326 |
)
|
327 |
],
|
prompts.py
CHANGED
@@ -15,17 +15,19 @@ def query_prompt(compute_mode):
|
|
15 |
|
16 |
query_prompt = (
|
17 |
f"Today Date: {date.today()}."
|
18 |
-
"You are a helpful
|
19 |
-
"
|
|
|
20 |
# gpt-4o-mini thinks last two months aren't available with this: "Emails from from {start} to {end} are available for retrieval."
|
21 |
f"The emails available for retrieval are from {start} to {end}."
|
22 |
-
"Write a search query based on the user's question, but do not answer the question just yet."
|
23 |
"For questions about differences or comparison between X and Y, retrieve emails about X and Y."
|
24 |
"For general summaries, use retrieve_emails(search_query='R')."
|
25 |
"For specific questions, use retrieve_emails(search_query=<specific topic>)."
|
26 |
"For questions about years, use retrieve_emails(search_query=, start_year=, end_year=) (this month is this year)."
|
27 |
"For questions about months, use 3-letter abbreviations (Jan..Dec) for the 'month' argument."
|
28 |
-
"Even if retrieved emails are
|
|
|
|
|
29 |
# "If you decide not to retrieve emails, tell the user why and suggest how to improve their question to chat with the R-help mailing list."
|
30 |
)
|
31 |
# A sanity check that we don't have unassigned variables
|
@@ -40,16 +42,16 @@ def generate_prompt(with_tools=True, think=True):
|
|
40 |
"""Return system prompt for generate step"""
|
41 |
generate_prompt = (
|
42 |
f"Today Date: {date.today()}."
|
43 |
-
"You are a helpful
|
44 |
-
"Summarize the retrieved emails
|
45 |
"If any of the retrieved emails are irrelevant (e.g. wrong dates), then do not use them."
|
46 |
"Tell the user if there are no retrieved emails or if you are unable to answer the question based on the information in the emails."
|
47 |
"Do not give an answer based on your own knowledge or memory, and do not include examples that aren't based on the retrieved emails."
|
48 |
-
"Example: For a question about
|
49 |
# "Do not respond with packages that are only listed under sessionInfo, session info, or other attached packages."
|
50 |
"Summarize the content of the emails rather than copying the headers." # Qwen
|
51 |
-
"
|
52 |
-
"Only answer general questions about R if the answer is
|
53 |
"Respond with 300 words maximum and 30 lines of code maximum and include any relevant URLs from the retrieved emails."
|
54 |
)
|
55 |
if with_tools:
|
|
|
15 |
|
16 |
query_prompt = (
|
17 |
f"Today Date: {date.today()}."
|
18 |
+
"You are a helpful chatbot designed to get information about R programming from the R-help mailing list archives."
|
19 |
+
"Write a search query to retrieve emails relevant to the user's question."
|
20 |
+
"Do not answer the user's question and do not ask the user for more information."
|
21 |
# gpt-4o-mini thinks last two months aren't available with this: "Emails from from {start} to {end} are available for retrieval."
|
22 |
f"The emails available for retrieval are from {start} to {end}."
|
|
|
23 |
"For questions about differences or comparison between X and Y, retrieve emails about X and Y."
|
24 |
"For general summaries, use retrieve_emails(search_query='R')."
|
25 |
"For specific questions, use retrieve_emails(search_query=<specific topic>)."
|
26 |
"For questions about years, use retrieve_emails(search_query=, start_year=, end_year=) (this month is this year)."
|
27 |
"For questions about months, use 3-letter abbreviations (Jan..Dec) for the 'month' argument."
|
28 |
+
"Even if retrieved emails are available, you should retrieve more emails to answer the most recent question." # Qwen
|
29 |
+
# "You must perform the search yourself. Do not tell the user how to retrieve emails." # Qwen
|
30 |
+
"Do not use your memory or knowledge to answer the user's question. Only retrieve emails based on the user's question." # Qwen
|
31 |
# "If you decide not to retrieve emails, tell the user why and suggest how to improve their question to chat with the R-help mailing list."
|
32 |
)
|
33 |
# A sanity check that we don't have unassigned variables
|
|
|
42 |
"""Return system prompt for generate step"""
|
43 |
generate_prompt = (
|
44 |
f"Today Date: {date.today()}."
|
45 |
+
"You are a helpful chatbot designed to answer questions about R programming based on the R-help mailing list archives."
|
46 |
+
"Summarize the retrieved emails to answer the user's question or query."
|
47 |
"If any of the retrieved emails are irrelevant (e.g. wrong dates), then do not use them."
|
48 |
"Tell the user if there are no retrieved emails or if you are unable to answer the question based on the information in the emails."
|
49 |
"Do not give an answer based on your own knowledge or memory, and do not include examples that aren't based on the retrieved emails."
|
50 |
+
"Example: For a question about using lm(), take examples of lm() from the retrieved emails to answer the user's question."
|
51 |
# "Do not respond with packages that are only listed under sessionInfo, session info, or other attached packages."
|
52 |
"Summarize the content of the emails rather than copying the headers." # Qwen
|
53 |
+
"You must include inline citations (email senders and dates) in each part of your response."
|
54 |
+
"Only answer general questions about R if the answer is in the retrieved emails."
|
55 |
"Respond with 300 words maximum and 30 lines of code maximum and include any relevant URLs from the retrieved emails."
|
56 |
)
|
57 |
if with_tools:
|