File size: 4,675 Bytes
3943768
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import re
from typing import Union

from langchain.agents.mrkl.output_parser import MRKLOutputParser
from langchain.schema import AgentAction, AgentFinish, OutputParserException

FORMAT_INSTRUCTIONS0 = """Use the following format and be sure to use new lines after each task.

Question: the input question you must answer

Thought: you should always think about what to do

Action: Exactly only one word out of: {tool_names}

Action Input: the input to the action

Observation: the result of the action

... (this Thought/Action/Action Input/Observation can repeat N times)

Thought: I now know the final answer

Final Answer: the final answer to the original input question"""

FORMAT_INSTRUCTIONS = """List of tools, use exactly one word when choosing Action: {tool_names}

Only user asks a question, not you.  For example user might ask: What is the latest news?

Here is an example sequence you can follow:
Thought: I should search online for the latest news.
Action: Search
Action Input: What is the latest news?
Observation: X is going away.  Z is again happening.
Thought: That is interesting, I should search for more information about X and Z and also search about Q.
Action: Search
Action Input: How is X impacting things.  Why is Z happening again, and what are the consequences?
Observation: X is causing Y.  Z may be caused by P and will lead to H.
Thought: I now know the final answer
Final Answer: The latest news is:
* X is going away, and this is caused by Y.
* Z is happening again, and the cause is P and will lead to H.
Overall, X and Z are important problems.
"""

FORMAT_INSTRUCTIONS_PYTHON = """List of tools, use exactly one word when choosing Action: {tool_names}

Only user asks a question, not you.  For example user might ask: How many rows are in the dataset?

Here is an example sequence you can follow.  You can repeat Thoughts, but as soon as possible you should try to answer the original user question.  Once you an answer the user question, just say: Thought: I now know the final answer
Thought: I should use python_repl_ast tool.
Action: python_repl_ast
Action Input: df.shape
Observation: (25, 10)
Thought: I now know the final answer
Final Answer: There are 25 rows in the dataset.
"""


FINAL_ANSWER_ACTION = "Final Answer:"
MISSING_ACTION_AFTER_THOUGHT_ERROR_MESSAGE = (
    "Invalid Format: Missing 'Action:' after 'Thought:"
)
MISSING_ACTION_INPUT_AFTER_ACTION_ERROR_MESSAGE = (
    "Invalid Format: Missing 'Action Input:' after 'Action:'"
)
FINAL_ANSWER_AND_PARSABLE_ACTION_ERROR_MESSAGE = (
    "Parsing LLM output produced both a final answer and a parse-able action:"
)


class H2OMRKLOutputParser(MRKLOutputParser):
    """MRKL Output parser for the chat agent."""

    def get_format_instructions(self) -> str:
        return FORMAT_INSTRUCTIONS

    def parse(self, text: str) -> Union[AgentAction, AgentFinish]:
        includes_answer = FINAL_ANSWER_ACTION in text
        regex = (
            r"Action\s*\d*\s*:[\s]*(.*?)[\s]*Action\s*\d*\s*Input\s*\d*\s*:[\s]*(.*)"
        )
        action_match = re.search(regex, text, re.DOTALL)
        if includes_answer:
            return AgentFinish(
                {"output": text.split(FINAL_ANSWER_ACTION)[-1].strip()}, text
            )
        elif action_match:
            action = action_match.group(1).strip()
            action_input = action_match.group(2)
            tool_input = action_input.strip(" ")
            # ensure if its a well formed SQL query we don't remove any trailing " chars
            if tool_input.startswith("SELECT ") is False:
                tool_input = tool_input.strip('"')

            return AgentAction(action, tool_input, text)

        if not re.search(r"Action\s*\d*\s*:[\s]*(.*?)", text, re.DOTALL):
            raise OutputParserException(
                f"Could not parse LLM output: `{text}`",
                observation=MISSING_ACTION_AFTER_THOUGHT_ERROR_MESSAGE,
                llm_output=text,
                send_to_llm=True,
            )
        elif not re.search(
            r"[\s]*Action\s*\d*\s*Input\s*\d*\s*:[\s]*(.*)", text, re.DOTALL
        ):
            raise OutputParserException(
                f"Could not parse LLM output: `{text}`",
                observation=MISSING_ACTION_INPUT_AFTER_ACTION_ERROR_MESSAGE,
                llm_output=text,
                send_to_llm=True,
            )
        else:
            raise OutputParserException(f"Could not parse LLM output: `{text}`")

    @property
    def _type(self) -> str:
        return "mrkl"


class H2OPythonMRKLOutputParser(H2OMRKLOutputParser):
    def get_format_instructions(self) -> str:
        return FORMAT_INSTRUCTIONS_PYTHON