Spaces:

jedick
/

R-help-chat

Running on Zero

App Files Files Community

jedick commited on 27 days ago

Commit

17ad0bb

1 Parent(s): 84ccc57

Clean up ToolCallingLLM

Browse files

Files changed (5) hide show

README.md +1 -1
app.py +26 -26
main.py +1 -1
mods/tool_calling_llm.py +58 -133
prompts.py +11 -9

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: RAG Chat with R-help Emails
 emoji: 🇷🤝💬
 colorFrom: indigo
 colorTo: red

 ---
+title: "R-help chat: RAG for emails"
 emoji: 🇷🤝💬
 colorFrom: indigo
 colorTo: red

app.py CHANGED Viewed

@@ -40,6 +40,27 @@ def cleanup_graph(request: gr.Request):
         print(f"Deleted remote graph for session {request.session_hash}")
 def run_workflow(input, history, compute_mode, thread_id, session_hash):
     """The main function to run the chat workflow"""
@@ -97,17 +118,8 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
         if node == "query":
             # Get the message (AIMessage class in LangChain)
             chunk_messages = chunk["messages"]
-            # Display non-tool-call content
-            if chunk_messages.content:
-                content = chunk_messages.content
-                metadata = None
-                # Show thinking content in "metadata" message
-                if content.startswith("<think>"):
-                    content, _ = extract_think(content)
-                    metadata = {"title": f"🧠 Thinking about query"}
-                history.append(
-                    gr.ChatMessage(role="assistant", content=content, metadata=metadata)
-                )
             # Look for tool calls
             if chunk_messages.tool_calls:
                 # Loop over tool calls
@@ -171,27 +183,15 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
             yield history, retrieved_emails, []
         if node == "generate":
             chunk_messages = chunk["messages"]
-            # Chat response without citations
-            if chunk_messages.content:
-                content = chunk_messages.content
-                # Show thinking content in "metadata" message
-                think_text, content = extract_think(content)
-                if think_text:
-                    history.append(
-                        gr.ChatMessage(
-                            role="assistant",
-                            content=think_text,
-                            metadata={"title": f"🧠 Thinking about answer"},
-                        )
-                    )
-                history.append(gr.ChatMessage(role="assistant", content=content))
             # None is used for no change to the retrieved emails textbox
             yield history, None, []
         if node == "answer_with_citations":
-            chunk_messages = chunk["messages"][0]
             # Parse the message for the answer and citations
             try:
                 answer, citations = ast.literal_eval(chunk_messages.content)
             except:

         print(f"Deleted remote graph for session {request.session_hash}")
+def append_content(chunk_messages, history, thinking_about):
+    """Append thinking and non-thinking content to chatbot history"""
+    if chunk_messages.content:
+        think_text, post_think = extract_think(chunk_messages.content)
+        # Show thinking content in "metadata" message
+        if think_text:
+            history.append(
+                gr.ChatMessage(
+                    role="assistant",
+                    content=think_text,
+                    metadata={"title": f"🧠 Thinking about {thinking_about}"},
+                )
+            )
+            if not post_think and not chunk_messages.tool_calls:
+                gr.Warning("Response may be incomplete", title="Thinking-only response")
+        # Display non-thinking content
+        if post_think:
+            history.append(gr.ChatMessage(role="assistant", content=post_think))
+    return history
 def run_workflow(input, history, compute_mode, thread_id, session_hash):
     """The main function to run the chat workflow"""
         if node == "query":
             # Get the message (AIMessage class in LangChain)
             chunk_messages = chunk["messages"]
+            # Append thinking and non-thinking messages (if present)
+            history = append_content(chunk_messages, history, thinking_about="query")
             # Look for tool calls
             if chunk_messages.tool_calls:
                 # Loop over tool calls
             yield history, retrieved_emails, []
         if node == "generate":
+            # Append messages (thinking and non-thinking) to history
             chunk_messages = chunk["messages"]
+            history = append_content(chunk_messages, history, thinking_about="answer")
             # None is used for no change to the retrieved emails textbox
             yield history, None, []
         if node == "answer_with_citations":
             # Parse the message for the answer and citations
+            chunk_messages = chunk["messages"][0]
             try:
                 answer, citations = ast.literal_eval(chunk_messages.content)
             except:

main.py CHANGED Viewed

@@ -41,7 +41,7 @@ model_id = os.getenv("MODEL_ID")
 if model_id is None:
     # model_id = "HuggingFaceTB/SmolLM3-3B"
     # model_id = "google/gemma-3-12b-it"
-    model_id = "Qwen/Qwen3-14B"
 # Suppress these messages:
 # INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"

 if model_id is None:
     # model_id = "HuggingFaceTB/SmolLM3-3B"
     # model_id = "google/gemma-3-12b-it"
+    model_id = "Qwen/Qwen3-8B"
 # Suppress these messages:
 # INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"

mods/tool_calling_llm.py CHANGED Viewed

@@ -3,7 +3,6 @@ import json
 import uuid
 import warnings
 from abc import ABC
-from shutil import Error
 from typing import (
     Any,
     AsyncIterator,
@@ -14,8 +13,6 @@ from typing import (
     Sequence,
     Tuple,
     Type,
-    TypedDict,
-    TypeVar,
     Union,
     cast,
 )
@@ -51,29 +48,6 @@ You must always select one of the above tools and respond with only a JSON objec
 }}
 """  # noqa: E501
-DEFAULT_RESPONSE_FUNCTION = {
-    "type": "function",
-    "function": {
-        "name": "__conversational_response",
-        "description": (
-            "Respond conversationally if no other tools should be called for a given query."
-        ),
-        "parameters": {
-            "type": "object",
-            "properties": {
-                "response": {
-                    "type": "string",
-                    "description": "Conversational response to the user.",
-                },
-            },
-            "required": ["response"],
-        },
-    },
-}
-_BM = TypeVar("_BM", bound=BaseModel)
-_DictOrPydantic = Union[Dict, _BM]
 def _is_pydantic_class(obj: Any) -> bool:
     """
@@ -91,12 +65,6 @@ def _is_pydantic_object(obj: Any) -> bool:
     return isinstance(obj, BaseModel)
-class _AllReturnType(TypedDict):
-    raw: BaseMessage
-    parsed: Optional[_DictOrPydantic]
-    parsing_error: Optional[BaseException]
 def RawJSONDecoder(index):
     class _RawJSONDecoder(json.JSONDecoder):
         end = None
@@ -126,26 +94,6 @@ def parse_json_garbage(s: str) -> Any:
     raise ValueError("Not a valid JSON string")
-def parse_response(message: BaseMessage) -> str:
-    """Extract `function_call` from `AIMessage`."""
-    if isinstance(message, AIMessage):
-        kwargs = message.additional_kwargs
-        tool_calls = message.tool_calls
-        if len(tool_calls) > 0:
-            tool_call = tool_calls[-1]
-            args = tool_call.get("args")
-            return json.dumps(args)
-        elif "function_call" in kwargs:
-            if "arguments" in kwargs["function_call"]:
-                return kwargs["function_call"]["arguments"]
-            raise ValueError(
-                f"`arguments` missing from `function_call` within AIMessage: {message}"
-            )
-        else:
-            raise ValueError("`tool_calls` missing from AIMessage: {message}")
-    raise ValueError(f"`message` is not an instance of `AIMessage`: {message}")
 def extract_think(content):
     # Added by Cursor 20250726 jmd
     # Extract content within <think>...</think>
@@ -155,7 +103,15 @@ def extract_think(content):
     if think_match:
         post_think = content[think_match.end() :].lstrip()
     else:
-        post_think = content
     return think_text, post_think
@@ -226,27 +182,6 @@ class ToolCallingLLM(BaseChatModel, ABC):
       [{'name': 'GetWeather', 'args': {'location': 'Austin, TX'}, 'id': 'call_25ed526917b94d8fa5db3fe30a8cf3c0'}]
       ```
-    Structured output:
-      ```
-      from typing import Optional
-      from langchain_core.pydantic_v1 import BaseModel, Field
-      class Joke(BaseModel):
-          '''Joke to tell user.'''
-          setup: str = Field(description="The setup of the joke")
-          punchline: str = Field(description="The punchline to the joke")
-          rating: Optional[int] = Field(description="How funny the joke is, from 1 to 10")
-      structured_llm = llm.with_structured_output(Joke)
-      structured_llm.invoke("Tell me a joke about cats")
-      ```
-      ```
-      Joke(setup='Why was the cat sitting on the computer?', punchline='Because it wanted to be online!', rating=7)
-      ```
-      See `ToolCallingLLM.with_structured_output()` for more.
     Response metadata
       Refer to the documentation of the Chat Model you wish to extend with Tool Calling.
@@ -295,20 +230,26 @@ class ToolCallingLLM(BaseChatModel, ABC):
             )
             for fn in functions
         ]
-        if "functions" in kwargs:
-            del kwargs["functions"]
-        if "function_call" in kwargs:
-            functions = [
-                fn for fn in functions if fn["name"] == kwargs["function_call"]["name"]
-            ]
-            if not functions:
-                raise ValueError(
-                    "If `function_call` is specified, you must also pass a "
-                    "matching function in `functions`."
-                )
-            del kwargs["function_call"]
         functions = [convert_to_openai_tool(fn) for fn in functions]
-        functions.append(DEFAULT_RESPONSE_FUNCTION)
         system_message_prompt_template = SystemMessagePromptTemplate.from_template(
             self.tool_system_prompt_template
         )
@@ -324,79 +265,63 @@ class ToolCallingLLM(BaseChatModel, ABC):
     def _process_response(
         self, response_message: BaseMessage, functions: List[Dict]
     ) -> AIMessage:
-        chat_generation_content = response_message.content
-        if not isinstance(chat_generation_content, str):
             raise ValueError("ToolCallingLLM does not support non-string output.")
         # Extract <think>...</think> content and text after </think> for further processing 20250726 jmd
-        think_text, chat_generation_content = extract_think(chat_generation_content)
         try:
-            parsed_chat_result = json.loads(chat_generation_content)
         except json.JSONDecodeError:
             try:
-                parsed_chat_result = parse_json_garbage(chat_generation_content)
             except Exception:
-                warnings.warn(f"Failed to parse JSON from {self.model} output")
-                return AIMessage(content=chat_generation_content)
-        print("parsed_chat_result")
-        print(parsed_chat_result)
         called_tool_name = (
-            parsed_chat_result["tool"]
-            if "tool" in parsed_chat_result
-            else parsed_chat_result["name"] if "name" in parsed_chat_result else None
         )
         called_tool = next(
             (fn for fn in functions if fn["function"]["name"] == called_tool_name), None
         )
-        if (
-            called_tool is None
-            or called_tool["function"]["name"]
-            == DEFAULT_RESPONSE_FUNCTION["function"]["name"]
-            or called_tool["function"]["name"]
-            == DEFAULT_RESPONSE_FUNCTION["function"]["name"][2:]
-        ):
-            if (
-                "tool_input" in parsed_chat_result
-                and "response" in parsed_chat_result["tool_input"]
-            ):
-                response = parsed_chat_result["tool_input"]["response"]
-            elif (
-                "parameters" in parsed_chat_result
-                and "response" in parsed_chat_result["parameters"]
-            ):
-                response = parsed_chat_result["parameters"]["response"]
-            elif "response" in parsed_chat_result:
-                response = parsed_chat_result["response"]
-            else:
-                # raise ValueError(
-                #    f"Failed to parse a response from {self.model} output: "  # type: ignore[attr-defined]
-                #    # Keep this commented for privacy in deployed app 20250727 jmd
-                #    # f"{chat_generation_content}"
-                # )
-                # Change to warning and return the generated content 20250727 jmd
-                warnings.warn(f"Failed to parse a response from {self.model} output")
-                response = chat_generation_content
-            return AIMessage(content=response)
         called_tool_arguments = (
-            parsed_chat_result["tool_input"]
-            if "tool_input" in parsed_chat_result
             else (
-                parsed_chat_result["parameters"]
-                if "parameters" in parsed_chat_result
                 else {}
             )
         )
         response_message_with_functions = AIMessage(
             content=f"<think>\n{think_text}\n</think>",
             tool_calls=[
                 ToolCall(
                     name=called_tool_name,
-                    args=called_tool_arguments if called_tool_arguments else {},
                     id=f"call_{str(uuid.uuid4()).replace('-', '')}",
                 )
             ],

 import uuid
 import warnings
 from abc import ABC
 from typing import (
     Any,
     AsyncIterator,
     Sequence,
     Tuple,
     Type,
     Union,
     cast,
 )
 }}
 """  # noqa: E501
 def _is_pydantic_class(obj: Any) -> bool:
     """
     return isinstance(obj, BaseModel)
 def RawJSONDecoder(index):
     class _RawJSONDecoder(json.JSONDecoder):
         end = None
     raise ValueError("Not a valid JSON string")
 def extract_think(content):
     # Added by Cursor 20250726 jmd
     # Extract content within <think>...</think>
     if think_match:
         post_think = content[think_match.end() :].lstrip()
     else:
+        # Check if content starts with <think> but missing closing tag
+        if content.strip().startswith("<think>"):
+            # Extract everything after <think>
+            think_start = content.find("<think>") + len("<think>")
+            think_text = content[think_start:].strip()
+            post_think = ""
+        else:
+            # No <think> found, so return entire content as post_think
+            post_think = content
     return think_text, post_think
       [{'name': 'GetWeather', 'args': {'location': 'Austin, TX'}, 'id': 'call_25ed526917b94d8fa5db3fe30a8cf3c0'}]
       ```
     Response metadata
       Refer to the documentation of the Chat Model you wish to extend with Tool Calling.
             )
             for fn in functions
         ]
+        # langchain_openai/chat_models/base.py:
+        # NOTE: Using bind_tools is recommended instead, as the `functions` and
+        # `function_call` request parameters are officially marked as
+        # deprecated by OpenAI.
+        # if "functions" in kwargs:
+        #    del kwargs["functions"]
+        # if "function_call" in kwargs:
+        #    functions = [
+        #        fn for fn in functions if fn["name"] == kwargs["function_call"]["name"]
+        #    ]
+        #    if not functions:
+        #        raise ValueError(
+        #            "If `function_call` is specified, you must also pass a "
+        #            "matching function in `functions`."
+        #        )
+        #    del kwargs["function_call"]
         functions = [convert_to_openai_tool(fn) for fn in functions]
         system_message_prompt_template = SystemMessagePromptTemplate.from_template(
             self.tool_system_prompt_template
         )
     def _process_response(
         self, response_message: BaseMessage, functions: List[Dict]
     ) -> AIMessage:
+        if not isinstance(response_message.content, str):
             raise ValueError("ToolCallingLLM does not support non-string output.")
         # Extract <think>...</think> content and text after </think> for further processing 20250726 jmd
+        think_text, post_think = extract_think(response_message.content)
+        # Parse output for JSON
         try:
+            parsed_json_result = json.loads(post_think)
         except json.JSONDecodeError:
             try:
+                print("parse_json_garbage for content:")
+                print(post_think)
+                parsed_json_result = parse_json_garbage(post_think)
             except Exception:
+                # Return entire response if JSON is missing or wasn't parsed
+                return AIMessage(content=response_message.content)
+        print("parsed_json_result")
+        print(parsed_json_result)
+        # Get tool name from output
         called_tool_name = (
+            parsed_json_result["tool"]
+            if "tool" in parsed_json_result
+            else parsed_json_result["name"] if "name" in parsed_json_result else None
         )
+        # Check if tool name is in functions list
         called_tool = next(
             (fn for fn in functions if fn["function"]["name"] == called_tool_name), None
         )
+        if called_tool is None:
+            # Issue a warning and return the generated content 20250727 jmd
+            warnings.warn(
+                f"Tool {called_tool} called from {self.model} output not in functions list"
+            )
+            return AIMessage(content=response_message.content)
+        # Get tool arguments from output
         called_tool_arguments = (
+            parsed_json_result["tool_input"]
+            if "tool_input" in parsed_json_result
             else (
+                parsed_json_result["parameters"]
+                if "parameters" in parsed_json_result
                 else {}
             )
         )
+        # Put together response message
         response_message_with_functions = AIMessage(
             content=f"<think>\n{think_text}\n</think>",
             tool_calls=[
                 ToolCall(
                     name=called_tool_name,
+                    args=called_tool_arguments,
                     id=f"call_{str(uuid.uuid4()).replace('-', '')}",
                 )
             ],

prompts.py CHANGED Viewed

@@ -15,17 +15,19 @@ def query_prompt(compute_mode):
     query_prompt = (
         f"Today Date: {date.today()}."
-        "You are a helpful RAG chatbot designed to answer questions about R programming based on the R-help mailing list."
-        "Do not ask the user for more information, but retrieve emails from the R-help mailing list archives."
         # gpt-4o-mini thinks last two months aren't available with this: "Emails from from {start} to {end} are available for retrieval."
         f"The emails available for retrieval are from {start} to {end}."
-        "Write a search query based on the user's question, but do not answer the question just yet."
         "For questions about differences or comparison between X and Y, retrieve emails about X and Y."
         "For general summaries, use retrieve_emails(search_query='R')."
         "For specific questions, use retrieve_emails(search_query=<specific topic>)."
         "For questions about years, use retrieve_emails(search_query=, start_year=, end_year=) (this month is this year)."
         "For questions about months, use 3-letter abbreviations (Jan..Dec) for the 'month' argument."
-        "Even if retrieved emails are already available, you should retrieve *more* emails to answer the most recent question."  # Qwen
         # "If you decide not to retrieve emails, tell the user why and suggest how to improve their question to chat with the R-help mailing list."
     )
     # A sanity check that we don't have unassigned variables
@@ -40,16 +42,16 @@ def generate_prompt(with_tools=True, think=True):
     """Return system prompt for generate step"""
     generate_prompt = (
         f"Today Date: {date.today()}."
-        "You are a helpful RAG chatbot designed to answer questions about R programming based on the R-help mailing list."
-        "Summarize the retrieved emails from the R-help mailing list archives to answer the user's question or query."
         "If any of the retrieved emails are irrelevant (e.g. wrong dates), then do not use them."
         "Tell the user if there are no retrieved emails or if you are unable to answer the question based on the information in the emails."
         "Do not give an answer based on your own knowledge or memory, and do not include examples that aren't based on the retrieved emails."
-        "Example: For a question about writing formulas for lm(), make your answer about formulas for lm() from the retrieved emails."
         # "Do not respond with packages that are only listed under sessionInfo, session info, or other attached packages."
         "Summarize the content of the emails rather than copying the headers."  # Qwen
-        "Include inline citations (email senders and dates) in your response."
-        "Only answer general questions about R if the answer is given in the retrieved emails."
         "Respond with 300 words maximum and 30 lines of code maximum and include any relevant URLs from the retrieved emails."
     )
     if with_tools:

     query_prompt = (
         f"Today Date: {date.today()}."
+        "You are a helpful chatbot designed to get information about R programming from the R-help mailing list archives."
+        "Write a search query to retrieve emails relevant to the user's question."
+        "Do not answer the user's question and do not ask the user for more information."
         # gpt-4o-mini thinks last two months aren't available with this: "Emails from from {start} to {end} are available for retrieval."
         f"The emails available for retrieval are from {start} to {end}."
         "For questions about differences or comparison between X and Y, retrieve emails about X and Y."
         "For general summaries, use retrieve_emails(search_query='R')."
         "For specific questions, use retrieve_emails(search_query=<specific topic>)."
         "For questions about years, use retrieve_emails(search_query=, start_year=, end_year=) (this month is this year)."
         "For questions about months, use 3-letter abbreviations (Jan..Dec) for the 'month' argument."
+        "Even if retrieved emails are available, you should retrieve more emails to answer the most recent question."  # Qwen
+        # "You must perform the search yourself. Do not tell the user how to retrieve emails."  # Qwen
+        "Do not use your memory or knowledge to answer the user's question. Only retrieve emails based on the user's question."  # Qwen
         # "If you decide not to retrieve emails, tell the user why and suggest how to improve their question to chat with the R-help mailing list."
     )
     # A sanity check that we don't have unassigned variables
     """Return system prompt for generate step"""
     generate_prompt = (
         f"Today Date: {date.today()}."
+        "You are a helpful chatbot designed to answer questions about R programming based on the R-help mailing list archives."
+        "Summarize the retrieved emails to answer the user's question or query."
         "If any of the retrieved emails are irrelevant (e.g. wrong dates), then do not use them."
         "Tell the user if there are no retrieved emails or if you are unable to answer the question based on the information in the emails."
         "Do not give an answer based on your own knowledge or memory, and do not include examples that aren't based on the retrieved emails."
+        "Example: For a question about using lm(), take examples of lm() from the retrieved emails to answer the user's question."
         # "Do not respond with packages that are only listed under sessionInfo, session info, or other attached packages."
         "Summarize the content of the emails rather than copying the headers."  # Qwen
+        "You must include inline citations (email senders and dates) in each part of your response."
+        "Only answer general questions about R if the answer is in the retrieved emails."
         "Respond with 300 words maximum and 30 lines of code maximum and include any relevant URLs from the retrieved emails."
     )
     if with_tools: