File size: 16,357 Bytes
fcaac18
e5e33ac
f41c216
 
b7f160b
fcaac18
675dd1e
62fb408
f41c216
a291864
675dd1e
fcaac18
 
2d1b8d7
 
fcaac18
e8de3ce
fcaac18
e8de3ce
 
 
f41c216
 
 
e8de3ce
0e80df8
 
 
 
fcaac18
0e80df8
fcaac18
 
 
f41c216
 
 
fcaac18
675dd1e
fcaac18
 
f41c216
ec72229
f41c216
fcaac18
e8de3ce
fcaac18
e8de3ce
 
 
f41c216
 
 
e8de3ce
0e80df8
 
 
fcaac18
0e80df8
fcaac18
 
 
 
 
 
 
f41c216
fcaac18
 
 
 
f41c216
ec72229
f41c216
 
ec72229
f41c216
 
ec72229
f41c216
fcaac18
e8de3ce
fcaac18
e8de3ce
f41c216
 
e8de3ce
f41c216
 
 
e8de3ce
0e80df8
 
 
 
 
 
 
f41c216
ec72229
 
 
 
f41c216
fcaac18
ec72229
fcaac18
e8de3ce
675dd1e
e8de3ce
 
 
f41c216
 
e8de3ce
675dd1e
 
 
 
e8de3ce
fcaac18
e8de3ce
 
 
 
 
 
 
a291864
 
fcaac18
a291864
 
 
 
 
0e80df8
 
a291864
 
 
 
2d1b8d7
 
f41c216
 
ec72229
 
f41c216
2d1b8d7
 
 
f41c216
 
 
 
 
 
 
 
 
 
 
 
2d1b8d7
f41c216
a291864
2d1b8d7
f41c216
2d1b8d7
 
e8de3ce
2d1b8d7
e8de3ce
 
 
 
 
 
 
f41c216
a291864
f41c216
 
a291864
f41c216
2d1b8d7
fcaac18
675dd1e
b090732
675dd1e
 
 
a291864
f41c216
fcaac18
 
2d1b8d7
fcaac18
ec72229
fcaac18
f41c216
 
b090732
f41c216
fcaac18
 
 
f41c216
 
 
 
 
 
 
 
 
 
 
 
2d1b8d7
f41c216
2d1b8d7
 
f41c216
2d1b8d7
e8de3ce
2d1b8d7
b7f160b
e8de3ce
 
 
 
 
 
 
b7f160b
 
0e80df8
b7f160b
0e80df8
 
 
b7f160b
0e80df8
 
b7f160b
 
 
 
 
ec72229
 
b7f160b
 
 
 
 
 
 
e5e33ac
e8de3ce
e5e33ac
 
e8de3ce
 
 
 
 
 
 
 
e5e33ac
 
0e80df8
e5e33ac
0e80df8
 
 
 
 
 
 
 
e5e33ac
0e80df8
e5e33ac
 
 
 
 
ec72229
 
e5e33ac
 
 
 
 
 
 
 
 
62fb408
 
 
 
 
 
 
 
 
a291864
62fb408
 
590b8c3
62fb408
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79b1523
62fb408
f41c216
62fb408
f41c216
 
62fb408
0e80df8
 
 
 
62fb408
 
 
 
590b8c3
62fb408
 
f41c216
62fb408
 
f41c216
62fb408
e8de3ce
79b1523
f41c216
79b1523
f41c216
 
79b1523
0e80df8
 
 
 
79b1523
590b8c3
79b1523
 
 
 
 
 
 
 
fcaac18
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
import unittest
from unittest.mock import MagicMock, Mock
import functools
import pprint
from langchain_core.language_models import BaseLanguageModel
from langchain_openai import ChatOpenAI
from meta_prompt import *
from meta_prompt.consts import NODE_ACCEPTANCE_CRITERIA_DEVELOPER
from langgraph.graph import END
import os

class TestMetaPromptGraph(unittest.TestCase):
    def setUp(self):
        # logging.basicConfig(level=logging.DEBUG)
        pass


    def test_prompt_node(self):
        """
        Test the _prompt_node method of MetaPromptGraph.

        This test case sets up a mock language model that returns a response content
        and verifies that the updated state has the output attribute updated with
        the mocked response content.
        """
        llm = Mock(spec=BaseLanguageModel)
        llm.config_specs = []
        llm.invoke = lambda x, y=None: "Mocked response content"

        llms = {
            NODE_PROMPT_INITIAL_DEVELOPER: llm
        }

        graph = MetaPromptGraph(llms=llms)
        state = AgentState(
            user_message="Test message", expected_output="Expected output"
        )
        updated_state = graph._prompt_node(
            NODE_PROMPT_INITIAL_DEVELOPER, "output", state
        )

        assert (
            updated_state['output'] == "Mocked response content"
        ), "The output attribute should be updated with the mocked response content"


    def test_output_history_analyzer(self):
        """
        Test the _output_history_analyzer method of MetaPromptGraph.

        This test case sets up a mock language model that returns an analysis
        response and verifies that the updated state has the best output, best
        system message, and best output age updated correctly.
        """
        llm = Mock(spec=BaseLanguageModel)
        llm.config_specs = []
        llm.invoke = lambda x, y: "{\"closerOutputID\": 2, \"analysis\": \"The output should use the `reverse()` method.\"}"
        prompts = {}
        meta_prompt_graph = MetaPromptGraph(llms=llm, prompts=prompts)
        state = AgentState(
            user_message="How do I reverse a list in Python?",
            expected_output="Use the `[::-1]` slicing technique or the `list.reverse()` method.",
            output="To reverse a list in Python, you can use the `[::-1]` slicing.",
            system_message="To reverse a list, use slicing or the reverse method.",
            best_output="To reverse a list in Python, use the `reverse()` method.",
            best_system_message="To reverse a list, use the `reverse()` method.",
            acceptance_criteria="The output should correctly describe how to reverse a list in Python.",
        )

        updated_state = meta_prompt_graph._output_history_analyzer(state)

        assert (
            updated_state['best_output'] == state['output']
        ), "Best output should be updated to the current output."
        assert (
            updated_state['best_system_message'] == state['system_message']
        ), "Best system message should be updated to the current system message."
        assert (
            updated_state['best_output_age'] == 0
        ), "Best output age should be reset to 0."


    def test_prompt_analyzer_accept(self):
        """
        Test the _prompt_analyzer method of MetaPromptGraph when the prompt analyzer
        accepts the output.

        This test case sets up a mock language model that returns an acceptance
        response and verifies that the updated state has the accepted attribute
        set to True.
        """
        # llms = {
        #     NODE_PROMPT_ANALYZER: lambda prompt: "{\"Accept\": \"Yes\"}"
        # }
        llm = Mock(spec=BaseLanguageModel)
        llm.config_specs = []
        llm.invoke = lambda x, y: "{\"Accept\": \"Yes\"}"
        meta_prompt_graph = MetaPromptGraph(llms=llm)
        state = AgentState(
            output="Test output", expected_output="Expected output",
            acceptance_criteria="Acceptance criteria: ...",
            system_message="System message: ...",
            max_output_age=2
        )
        updated_state = meta_prompt_graph._prompt_analyzer(state)
        assert updated_state['accepted'] is True


    def test_get_node_names(self):
        """
        Test the get_node_names method of MetaPromptGraph.

        This test case verifies that the get_node_names method returns the
        correct list of node names.
        """
        graph = MetaPromptGraph()
        node_names = graph.get_node_names()
        self.assertEqual(node_names, META_PROMPT_NODES)


    def test_workflow_execution(self):
        """
        Test the workflow execution of the MetaPromptGraph.

        This test case sets up a MetaPromptGraph with a single language model and
        executes it with a given input state. It then verifies that the output
        state contains the expected keys and values.
        """
        model_name = os.getenv("TEST_MODEL_NAME_EXECUTOR")
        raw_llm = ChatOpenAI(model_name=model_name)

        llms = {
            NODE_PROMPT_INITIAL_DEVELOPER: raw_llm,
            NODE_ACCEPTANCE_CRITERIA_DEVELOPER: raw_llm,
            NODE_PROMPT_DEVELOPER: raw_llm,
            NODE_PROMPT_EXECUTOR: raw_llm,
            NODE_OUTPUT_HISTORY_ANALYZER: raw_llm,
            NODE_PROMPT_ANALYZER: raw_llm,
            NODE_PROMPT_SUGGESTER: raw_llm,
        }

        meta_prompt_graph = MetaPromptGraph(llms=llms)
        input_state = AgentState(
            user_message="How do I reverse a list in Python?",
            expected_output="Use the `[::-1]` slicing technique or the "
                            "`list.reverse()` method.",
            acceptance_criteria="Similar in meaning, text length and style.",
            max_output_age=2
        )
        output_state = meta_prompt_graph(input_state, recursion_limit=25)

        pprint.pp(output_state)
        assert (
            "best_system_message" in output_state
        ), "The output state should contain the key 'best_system_message'"
        assert (
            output_state["best_system_message"] is not None
        ), "The best system message should not be None"
        if (
            "best_system_message" in output_state
            and output_state["best_system_message"] is not None
        ):
            print(output_state["best_system_message"])

        user_message = "How can I create a list of numbers in Python?"
        messages = [("system", output_state["best_system_message"]), ("human", user_message)]
        result = raw_llm.invoke(messages)

        assert hasattr(result, "content"), "The result should have the attribute 'content'"
        print(result.content)


    def test_workflow_execution_with_llms(self):
        """
        Test the workflow execution of the MetaPromptGraph with multiple LLMs.

        This test case sets up a MetaPromptGraph with multiple language models and
        executes it with a given input state. It then verifies that the output
        state contains the expected keys and values.
        """
        optimizer_llm = ChatOpenAI(
            model_name=os.getenv("TEST_MODEL_NAME_OPTIMIZER"), temperature=0.5
        )
        executor_llm = ChatOpenAI(
            model_name=os.getenv("TEST_MODEL_NAME_EXECUTOR"), temperature=0.01
        )

        llms = {
            NODE_PROMPT_INITIAL_DEVELOPER: optimizer_llm,
            NODE_ACCEPTANCE_CRITERIA_DEVELOPER: optimizer_llm,
            NODE_PROMPT_DEVELOPER: optimizer_llm,
            NODE_PROMPT_EXECUTOR: executor_llm,
            NODE_OUTPUT_HISTORY_ANALYZER: optimizer_llm,
            NODE_PROMPT_ANALYZER: optimizer_llm.bind(response_format={"type": "json_object"}),
            NODE_PROMPT_SUGGESTER: optimizer_llm,
        }

        meta_prompt_graph = MetaPromptGraph(llms=llms)
        input_state = AgentState(
            max_output_age=2,
            user_message="How do I reverse a list in Python?",
            expected_output="Use the `[::-1]` slicing technique or the "
                            "`list.reverse()` method.",
            # acceptance_criteria="Similar in meaning, text length and style."
        )
        output_state = meta_prompt_graph(input_state, recursion_limit=25)

        pprint.pp(output_state)
        assert (
            "best_system_message" in output_state
        ), "The output state should contain the key 'best_system_message'"
        assert (
            output_state["best_system_message"] is not None
        ), "The best system message should not be None"
        if (
            "best_system_message" in output_state
            and output_state["best_system_message"] is not None
        ):
            print(output_state["best_system_message"])

        user_message = "How can I create a list of numbers in Python?"
        messages = [("system", output_state["best_system_message"]), ("human", user_message)]
        result = executor_llm.invoke(messages)

        assert hasattr(result, "content"), "The result should have the attribute 'content'"
        print(result.content)
        

    def test_simple_workflow_execution(self):
        """
        Test the simple workflow execution of the MetaPromptGraph.

        This test case sets up a MetaPromptGraph with a mock LLM and executes it
        with a given input state. It then verifies that the output state contains
        the expected keys and values.
        """
        # Create a mock LLM that returns predefined responses based on the input messages
        llm = Mock(spec=BaseLanguageModel)
        llm.config_specs = []
        responses = [
            "Explain how to reverse a list in Python.",  # NODE_PROMPT_INITIAL_DEVELOPER
            "Here's one way: `my_list[::-1]`",  # NODE_PROMPT_EXECUTOR
            "{\"Accept\": \"Yes\"}",  # NODE_PPROMPT_ANALYZER
        ]
        # everytime llm.invoke was called, it returns a item in responses
        llm.invoke = lambda x, y=None: responses.pop(0)

        meta_prompt_graph = MetaPromptGraph(llms=llm)
        input_state = AgentState(
            user_message="How do I reverse a list in Python?",
            expected_output="The output should use the `reverse()` method.",
            acceptance_criteria="The output should be correct and efficient.",
            max_output_age=2
        )

        output_state = meta_prompt_graph(input_state)

        self.assertIsNotNone(output_state['best_system_message'])
        self.assertIsNotNone(output_state['best_output'])

        pprint.pp(output_state["best_output"])
        

    def test_iterated_workflow_execution(self):
        """
        Test the iterated workflow execution of the MetaPromptGraph.

        This test case sets up a MetaPromptGraph with a mock LLM and executes it
        with a given input state. It then verifies that the output state contains
        the expected keys and values. The test case simulates an iterated workflow
        where the LLM provides multiple responses based on the input messages.
        """
        # Create a mock LLM that returns predefined responses based on the input messages
        llm = Mock(spec=BaseLanguageModel)
        llm.config_specs = []
        responses = [
            "Explain how to reverse a list in Python.",  # NODE_PROMPT_INITIAL_DEVELOPER
            "Here's one way: `my_list[::-1]`",  # NODE_PROMPT_EXECUTOR
            "{\"Accept\": \"No\"}",  # NODE_PPROMPT_ANALYZER
            "Try using the `reverse()` method instead.",  # NODE_PROMPT_SUGGESTER
            "Explain how to reverse a list in Python. Output in a Markdown List.",  # NODE_PROMPT_DEVELOPER
            "Here's one way: `my_list.reverse()`",  # NODE_PROMPT_EXECUTOR
            "{\"closerOutputID\": 2, \"analysis\": \"The output should use the `reverse()` method.\"}", # NODE_OUTPUT_HISTORY_ANALYZER
            "{\"Accept\": \"Yes\"}",  # NODE_PPROMPT_ANALYZER
        ]
        llm.invoke = lambda x, y = None: responses.pop(0)

        meta_prompt_graph = MetaPromptGraph(llms=llm)
        input_state = AgentState(
            user_message="How do I reverse a list in Python?",
            expected_output="The output should use the `reverse()` method.",
            acceptance_criteria="The output should be correct and efficient.",
            max_output_age=2
        )

        output_state = meta_prompt_graph(input_state)

        self.assertIsNotNone(output_state['best_system_message'])
        self.assertIsNotNone(output_state['best_output'])

        pprint.pp(output_state["best_output"])

    def test_create_acceptance_criteria_workflow(self):
        """
        Test the _create_acceptance_criteria_workflow method of MetaPromptGraph.

        This test case verifies that the workflow created by the _create_acceptance_criteria_workflow method
        contains the correct node and edge.
        """

        llms = {
            NODE_ACCEPTANCE_CRITERIA_DEVELOPER: ChatOpenAI(model_name=os.getenv("TEST_MODEL_NAME_ACCEPTANCE_CRITERIA_DEVELOPER"))
        }
        meta_prompt_graph = MetaPromptGraph(llms=llms)
        workflow = meta_prompt_graph._create_workflow_for_node(NODE_ACCEPTANCE_CRITERIA_DEVELOPER)

        # Check if the workflow contains the correct node
        self.assertIn(NODE_ACCEPTANCE_CRITERIA_DEVELOPER, workflow.nodes)

        # Check if the workflow contains the correct edge
        self.assertIn((NODE_ACCEPTANCE_CRITERIA_DEVELOPER, END), workflow.edges)

        # compile the workflow
        graph = workflow.compile()
        print(graph)

        # invoke the workflow
        state = AgentState(
            user_message="How do I reverse a list in Python?",
            expected_output="The output should use the `reverse()` method.",
            # system_message="Create acceptance criteria for the task of reversing a list in Python."
        )
        output_state = graph.invoke(state)

        # check if the output state contains the acceptance criteria
        self.assertIsNotNone(output_state['acceptance_criteria'])

        # check if the acceptance criteria includes string '`reverse()`'
        self.assertIn('`reverse()`', output_state['acceptance_criteria'])

        pprint.pp(output_state["acceptance_criteria"])


    def test_run_acceptance_criteria_graph(self):
        """Test the run_acceptance_criteria_graph method of MetaPromptGraph.

        This test case verifies that the run_acceptance_criteria_graph method
        returns a state with acceptance criteria.
        """
        llm = Mock(spec=BaseLanguageModel)
        llm.config_specs = []
        llm.invoke = lambda x, y: "{\"Acceptance criteria\": \"Acceptance criteria: ...\"}"
        meta_prompt_graph = MetaPromptGraph(llms=llm)
        state = AgentState(
            user_message="How do I reverse a list in Python?",
            expected_output="The output should use the `reverse()` method.",
        )
        output_state = meta_prompt_graph.run_node_graph(NODE_ACCEPTANCE_CRITERIA_DEVELOPER, state)

        # Check if the output state contains the acceptance criteria
        self.assertIsNotNone(output_state["acceptance_criteria"])

        # Check if the acceptance criteria includes the expected content
        self.assertIn("Acceptance criteria: ...", output_state["acceptance_criteria"])


    def test_run_prompt_initial_developer_graph(self):
        """Test the run_prompt_initial_developer_graph method of MetaPromptGraph.

        This test case verifies that the run_prompt_initial_developer_graph method
        returns a state with an initial developer prompt.
        """
        llm = Mock(spec=BaseLanguageModel)
        llm.config_specs = []
        llm.invoke = lambda x, y: "{\"Initial developer prompt\": \"Initial developer prompt: ...\"}"
        meta_prompt_graph = MetaPromptGraph(llms=llm)
        state = AgentState(user_message="How do I reverse a list in Python?")
        output_state = meta_prompt_graph.run_node_graph(NODE_PROMPT_INITIAL_DEVELOPER, state)

        # Check if the output state contains the initial developer prompt
        self.assertIsNotNone(output_state['system_message'])

        # Check if the initial developer prompt includes the expected content
        self.assertIn("Initial developer prompt: ...", output_state['system_message'])


if __name__ == '__main__':
    unittest.main()