Spaces:
Running
Running
Updated unit test. Updated UI.
Browse files- app/gradio_meta_prompt.py +23 -10
- config.yml +43 -31
- meta_prompt/consts.py +44 -30
- meta_prompt/meta_prompt.py +24 -7
- tests/meta_prompt_graph_test.py +42 -46
app/gradio_meta_prompt.py
CHANGED
@@ -301,14 +301,16 @@ with gr.Blocks(title='Meta Prompt') as demo:
|
|
301 |
with gr.Row():
|
302 |
evaluate_initial_system_message_button = gr.Button(
|
303 |
value="Evaluate",
|
304 |
-
variant="secondary"
|
|
|
305 |
)
|
306 |
generate_initial_system_message_button = gr.Button(
|
307 |
value="Generate",
|
308 |
-
variant="secondary"
|
|
|
309 |
)
|
310 |
pull_task_description_output_button = gr.Button(
|
311 |
-
value="→ Pull
|
312 |
pull_system_message_output_button = gr.Button(
|
313 |
value="Pull Output ←", variant="secondary")
|
314 |
|
@@ -318,10 +320,15 @@ with gr.Blocks(title='Meta Prompt') as demo:
|
|
318 |
show_copy_button=True
|
319 |
)
|
320 |
with gr.Row():
|
321 |
-
evaluate_acceptance_criteria_input_button = gr.Button(
|
|
|
|
|
|
|
|
|
322 |
generate_acceptance_criteria_button = gr.Button(
|
323 |
value="Generate",
|
324 |
-
variant="secondary"
|
|
|
325 |
)
|
326 |
pull_acceptance_criteria_output_button = gr.Button(
|
327 |
value="Pull Output ←", variant="secondary")
|
@@ -454,18 +461,18 @@ with gr.Blocks(title='Meta Prompt') as demo:
|
|
454 |
label="System Message", show_copy_button=True)
|
455 |
with gr.Row():
|
456 |
evaluate_system_message_button = gr.Button(
|
457 |
-
value="Evaluate", variant="secondary")
|
458 |
output_output = gr.Textbox(
|
459 |
label="Output", show_copy_button=True)
|
460 |
with gr.Group():
|
461 |
acceptance_criteria_output = gr.Textbox(
|
462 |
label="Acceptance Criteria", show_copy_button=True)
|
463 |
evaluate_acceptance_criteria_output_button = gr.Button(
|
464 |
-
value="Evaluate", variant="secondary")
|
465 |
analysis_output = gr.Textbox(
|
466 |
label="Analysis", show_copy_button=True)
|
467 |
flag_button = gr.Button(
|
468 |
-
value="Flag", variant="secondary", visible=config.allow_flagging)
|
469 |
with gr.Accordion("Details", open=False, visible=config.verbose):
|
470 |
logs_chatbot = gr.Chatbot(
|
471 |
label='Messages', show_copy_button=True, layout='bubble',
|
@@ -713,9 +720,15 @@ with gr.Blocks(title='Meta Prompt') as demo:
|
|
713 |
)
|
714 |
|
715 |
prompt_inputs_ready_state.change(
|
716 |
-
fn=lambda x: gr.update(interactive=x),
|
717 |
inputs=[prompt_inputs_ready_state],
|
718 |
-
outputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
719 |
)
|
720 |
|
721 |
simple_llm_tab.select(
|
|
|
301 |
with gr.Row():
|
302 |
evaluate_initial_system_message_button = gr.Button(
|
303 |
value="Evaluate",
|
304 |
+
variant="secondary",
|
305 |
+
interactive=False
|
306 |
)
|
307 |
generate_initial_system_message_button = gr.Button(
|
308 |
value="Generate",
|
309 |
+
variant="secondary",
|
310 |
+
interactive=False
|
311 |
)
|
312 |
pull_task_description_output_button = gr.Button(
|
313 |
+
value="→ Pull Description", variant="secondary")
|
314 |
pull_system_message_output_button = gr.Button(
|
315 |
value="Pull Output ←", variant="secondary")
|
316 |
|
|
|
320 |
show_copy_button=True
|
321 |
)
|
322 |
with gr.Row():
|
323 |
+
evaluate_acceptance_criteria_input_button = gr.Button(
|
324 |
+
value="Evaluate",
|
325 |
+
variant="secondary",
|
326 |
+
interactive=False
|
327 |
+
)
|
328 |
generate_acceptance_criteria_button = gr.Button(
|
329 |
value="Generate",
|
330 |
+
variant="secondary",
|
331 |
+
interactive=False
|
332 |
)
|
333 |
pull_acceptance_criteria_output_button = gr.Button(
|
334 |
value="Pull Output ←", variant="secondary")
|
|
|
461 |
label="System Message", show_copy_button=True)
|
462 |
with gr.Row():
|
463 |
evaluate_system_message_button = gr.Button(
|
464 |
+
value="Evaluate", variant="secondary", interactive=False)
|
465 |
output_output = gr.Textbox(
|
466 |
label="Output", show_copy_button=True)
|
467 |
with gr.Group():
|
468 |
acceptance_criteria_output = gr.Textbox(
|
469 |
label="Acceptance Criteria", show_copy_button=True)
|
470 |
evaluate_acceptance_criteria_output_button = gr.Button(
|
471 |
+
value="Evaluate", variant="secondary", interactive=False)
|
472 |
analysis_output = gr.Textbox(
|
473 |
label="Analysis", show_copy_button=True)
|
474 |
flag_button = gr.Button(
|
475 |
+
value="Flag", variant="secondary", visible=config.allow_flagging, interactive=False)
|
476 |
with gr.Accordion("Details", open=False, visible=config.verbose):
|
477 |
logs_chatbot = gr.Chatbot(
|
478 |
label='Messages', show_copy_button=True, layout='bubble',
|
|
|
720 |
)
|
721 |
|
722 |
prompt_inputs_ready_state.change(
|
723 |
+
fn=lambda x: [gr.update(interactive=x)] * 8,
|
724 |
inputs=[prompt_inputs_ready_state],
|
725 |
+
outputs=[
|
726 |
+
prompt_submit_button,
|
727 |
+
evaluate_initial_system_message_button, generate_initial_system_message_button,
|
728 |
+
evaluate_system_message_button, evaluate_acceptance_criteria_input_button,
|
729 |
+
generate_acceptance_criteria_button, evaluate_acceptance_criteria_output_button,
|
730 |
+
flag_button
|
731 |
+
],
|
732 |
)
|
733 |
|
734 |
simple_llm_tab.select(
|
config.yml
CHANGED
@@ -336,49 +336,61 @@ prompt_templates:
|
|
336 |
prompt_analyzer:
|
337 |
- role: system
|
338 |
message: |
|
339 |
-
**TASK:** Compare the Expected Output with the Actual Output according to the Acceptance Criteria. Provide a JSON output with your analysis.
|
340 |
-
|
341 |
-
**Requirements:**
|
342 |
-
- Compare Expected and Actual Outputs strictly following the Acceptance Criteria.
|
343 |
-
- Set `Accept` to "Yes" only if all criteria are met; otherwise, set it to "No."
|
344 |
-
- List acceptable and unacceptable differences based on the criteria.
|
345 |
-
|
346 |
-
**Output Format:** JSON with:
|
347 |
-
- `Accept: (Yes/No)`
|
348 |
-
- `Acceptable Differences: []`
|
349 |
-
- `Unacceptable Differences: []`
|
350 |
-
|
351 |
-
**Example Output:**
|
352 |
-
```json
|
353 |
{{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
354 |
"Accept": "No",
|
355 |
"Acceptable Differences": [
|
356 |
-
|
357 |
],
|
358 |
"Unacceptable Differences": [
|
359 |
-
|
360 |
-
|
361 |
]
|
|
|
362 |
}}
|
363 |
-
```
|
364 |
-
|
365 |
-
# Acceptance Criteria
|
366 |
-
|
367 |
-
{acceptance_criteria}
|
368 |
|
369 |
- role: human
|
370 |
message: |
|
371 |
-
|
372 |
-
|
373 |
-
```
|
374 |
{expected_output}
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
{output}
|
381 |
-
|
|
|
|
|
|
|
382 |
|
383 |
prompt_suggester:
|
384 |
- role: system
|
|
|
336 |
prompt_analyzer:
|
337 |
- role: system
|
338 |
message: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
339 |
{{
|
340 |
+
"task_description": "Compare the Expected Output with the Actual Output according to the Acceptance Criteria and provide a JSON output with the analysis.",
|
341 |
+
"requirements": [
|
342 |
+
"Strictly follow the Acceptance Criteria to compare Expected and Actual Outputs",
|
343 |
+
"Set 'Accept' to 'Yes' only if all criteria are met, otherwise set it to 'No'",
|
344 |
+
"List acceptable and unacceptable differences based on the criteria"
|
345 |
+
],
|
346 |
+
"output_format": {{
|
347 |
+
"type": "object",
|
348 |
+
"properties": {{
|
349 |
+
"Accept": {{
|
350 |
+
"type": "string",
|
351 |
+
"enum": ["Yes", "No"]
|
352 |
+
}},
|
353 |
+
"Acceptable Differences": {{
|
354 |
+
"type": "array",
|
355 |
+
"items": {{
|
356 |
+
"type": "string"
|
357 |
+
}}
|
358 |
+
}},
|
359 |
+
"Unacceptable Differences": {{
|
360 |
+
"type": "array",
|
361 |
+
"items": {{
|
362 |
+
"type": "string"
|
363 |
+
}}
|
364 |
+
}}
|
365 |
+
}},
|
366 |
+
"required": ["Accept", "Acceptable Differences", "Unacceptable Differences"]
|
367 |
+
}},
|
368 |
+
"output_example": {{
|
369 |
"Accept": "No",
|
370 |
"Acceptable Differences": [
|
371 |
+
"Spelling variations: 'colour' vs 'color'"
|
372 |
],
|
373 |
"Unacceptable Differences": [
|
374 |
+
"Missing section: 'Conclusion'",
|
375 |
+
"Incorrect date format: '2023/10/12' vs '12-10-2023'"
|
376 |
]
|
377 |
+
}}
|
378 |
}}
|
|
|
|
|
|
|
|
|
|
|
379 |
|
380 |
- role: human
|
381 |
message: |
|
382 |
+
<|Start_Expected_Output|>
|
|
|
|
|
383 |
{expected_output}
|
384 |
+
<|End_Expected_Output|>
|
385 |
+
<|Start_Actual_Output|>
|
386 |
+
{expected_output}
|
387 |
+
<|End_Expected_Output|>
|
388 |
+
<|Start_Actual_Output|>
|
389 |
{output}
|
390 |
+
<|End_Actual_Output|>
|
391 |
+
<|Start_Acceptance_Criteria|>
|
392 |
+
{acceptance_criteria}
|
393 |
+
<|End_Acceptance_Criteria|>
|
394 |
|
395 |
prompt_suggester:
|
396 |
- role: system
|
meta_prompt/consts.py
CHANGED
@@ -222,46 +222,60 @@ Create a [name], Here's the descriptions [description]. Start with "GPT Descript
|
|
222 |
""")
|
223 |
]),
|
224 |
NODE_PROMPT_ANALYZER: ChatPromptTemplate.from_messages([
|
225 |
-
("system", """
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
{{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
240 |
"Accept": "No",
|
241 |
"Acceptable Differences": [
|
242 |
-
|
243 |
],
|
244 |
"Unacceptable Differences": [
|
245 |
-
|
246 |
-
|
247 |
]
|
|
|
248 |
}}
|
249 |
```
|
250 |
-
|
251 |
-
# Acceptance Criteria
|
252 |
-
|
253 |
-
{acceptance_criteria}
|
254 |
"""),
|
255 |
-
("human", """
|
256 |
-
|
257 |
-
```
|
258 |
{expected_output}
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
{output}
|
|
|
|
|
|
|
|
|
265 |
```
|
266 |
""")
|
267 |
]),
|
|
|
222 |
""")
|
223 |
]),
|
224 |
NODE_PROMPT_ANALYZER: ChatPromptTemplate.from_messages([
|
225 |
+
("system", """{{
|
226 |
+
"task_description": "Compare the Expected Output with the Actual Output according to the Acceptance Criteria and provide a JSON output with the analysis.",
|
227 |
+
"requirements": [
|
228 |
+
"Strictly follow the Acceptance Criteria to compare Expected and Actual Outputs",
|
229 |
+
"Set 'Accept' to 'Yes' only if all criteria are met, otherwise set it to 'No'",
|
230 |
+
"List acceptable and unacceptable differences based on the criteria"
|
231 |
+
],
|
232 |
+
"output_format": {{
|
233 |
+
"type": "object",
|
234 |
+
"properties": {{
|
235 |
+
"Accept": {{
|
236 |
+
"type": "string",
|
237 |
+
"enum": ["Yes", "No"]
|
238 |
+
}},
|
239 |
+
"Acceptable Differences": {{
|
240 |
+
"type": "array",
|
241 |
+
"items": {{
|
242 |
+
"type": "string"
|
243 |
+
}}
|
244 |
+
}},
|
245 |
+
"Unacceptable Differences": {{
|
246 |
+
"type": "array",
|
247 |
+
"items": {{
|
248 |
+
"type": "string"
|
249 |
+
}}
|
250 |
+
}}
|
251 |
+
}},
|
252 |
+
"required": ["Accept", "Acceptable Differences", "Unacceptable Differences"]
|
253 |
+
}},
|
254 |
+
"output_example": {{
|
255 |
"Accept": "No",
|
256 |
"Acceptable Differences": [
|
257 |
+
"Spelling variations: 'colour' vs 'color'"
|
258 |
],
|
259 |
"Unacceptable Differences": [
|
260 |
+
"Missing section: 'Conclusion'",
|
261 |
+
"Incorrect date format: '2023/10/12' vs '12-10-2023'"
|
262 |
]
|
263 |
+
}}
|
264 |
}}
|
265 |
```
|
|
|
|
|
|
|
|
|
266 |
"""),
|
267 |
+
("human", """<|Start_Expected_Output|>
|
|
|
|
|
268 |
{expected_output}
|
269 |
+
<|End_Expected_Output|>
|
270 |
+
<|Start_Actual_Output|>
|
271 |
+
{expected_output}
|
272 |
+
<|End_Expected_Output|>
|
273 |
+
<|Start_Actual_Output|>
|
274 |
{output}
|
275 |
+
<|End_Actual_Output|>
|
276 |
+
<|Start_Acceptance_Criteria|>
|
277 |
+
{acceptance_criteria}
|
278 |
+
<|End_Acceptance_Criteria|>
|
279 |
```
|
280 |
""")
|
281 |
]),
|
meta_prompt/meta_prompt.py
CHANGED
@@ -7,7 +7,9 @@ from langgraph.checkpoint.memory import MemorySaver
|
|
7 |
from langgraph.errors import GraphRecursionError
|
8 |
from langgraph.graph import StateGraph, START, END
|
9 |
from langchain_core.runnables.base import RunnableLike
|
10 |
-
from langchain_core.output_parsers import JsonOutputParser
|
|
|
|
|
11 |
from pydantic import BaseModel
|
12 |
from typing import Annotated, Dict, Optional, Union, TypedDict
|
13 |
from .consts import *
|
@@ -400,17 +402,17 @@ class MetaPromptGraph:
|
|
400 |
}
|
401 |
)
|
402 |
|
403 |
-
|
|
|
404 |
logger.debug(
|
405 |
{
|
406 |
'node': node,
|
407 |
'action': 'response',
|
408 |
-
'
|
409 |
-
'message': response.content
|
410 |
}
|
411 |
)
|
412 |
|
413 |
-
return {target_attribute: response
|
414 |
|
415 |
|
416 |
def _output_history_analyzer(self, state: AgentState) -> AgentState:
|
@@ -451,7 +453,14 @@ class MetaPromptGraph:
|
|
451 |
|
452 |
chain = (
|
453 |
self.prompt_templates[NODE_OUTPUT_HISTORY_ANALYZER] | self.llms[NODE_OUTPUT_HISTORY_ANALYZER] | JsonOutputParser()
|
454 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
455 |
analysis_dict = chain.invoke(state)
|
456 |
|
457 |
logger.debug({
|
@@ -511,7 +520,15 @@ class MetaPromptGraph:
|
|
511 |
|
512 |
chain = (
|
513 |
self.prompt_templates[NODE_PROMPT_ANALYZER] | self.llms[NODE_PROMPT_ANALYZER] | JsonOutputParser()
|
514 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
515 |
result = chain.invoke(state)
|
516 |
|
517 |
logger.debug({
|
|
|
7 |
from langgraph.errors import GraphRecursionError
|
8 |
from langgraph.graph import StateGraph, START, END
|
9 |
from langchain_core.runnables.base import RunnableLike
|
10 |
+
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
|
11 |
+
from langchain_core.runnables import RunnableLambda
|
12 |
+
from openai import BadRequestError
|
13 |
from pydantic import BaseModel
|
14 |
from typing import Annotated, Dict, Optional, Union, TypedDict
|
15 |
from .consts import *
|
|
|
402 |
}
|
403 |
)
|
404 |
|
405 |
+
chain = self.llms[node] | StrOutputParser()
|
406 |
+
response = chain.invoke(formatted_messages)
|
407 |
logger.debug(
|
408 |
{
|
409 |
'node': node,
|
410 |
'action': 'response',
|
411 |
+
'message': response
|
|
|
412 |
}
|
413 |
)
|
414 |
|
415 |
+
return {target_attribute: response}
|
416 |
|
417 |
|
418 |
def _output_history_analyzer(self, state: AgentState) -> AgentState:
|
|
|
453 |
|
454 |
chain = (
|
455 |
self.prompt_templates[NODE_OUTPUT_HISTORY_ANALYZER] | self.llms[NODE_OUTPUT_HISTORY_ANALYZER] | JsonOutputParser()
|
456 |
+
).with_retry(
|
457 |
+
retry_if_exception_type=(BadRequestError,), # Retry only on ValueError
|
458 |
+
wait_exponential_jitter=True, # Add jitter to the exponential backoff
|
459 |
+
stop_after_attempt=2 # Try twice
|
460 |
+
).with_fallbacks([RunnableLambda(lambda x: {
|
461 |
+
"analysis": "",
|
462 |
+
"closerOutputID": 0
|
463 |
+
})])
|
464 |
analysis_dict = chain.invoke(state)
|
465 |
|
466 |
logger.debug({
|
|
|
520 |
|
521 |
chain = (
|
522 |
self.prompt_templates[NODE_PROMPT_ANALYZER] | self.llms[NODE_PROMPT_ANALYZER] | JsonOutputParser()
|
523 |
+
).with_retry(
|
524 |
+
retry_if_exception_type=(BadRequestError,), # Retry only on ValueError
|
525 |
+
wait_exponential_jitter=True, # Add jitter to the exponential backoff
|
526 |
+
stop_after_attempt=2 # Try twice
|
527 |
+
).with_fallbacks([RunnableLambda(lambda x: {
|
528 |
+
"Accept": "No",
|
529 |
+
"Acceptable Differences": [],
|
530 |
+
"Unacceptable Differences": []
|
531 |
+
})])
|
532 |
result = chain.invoke(state)
|
533 |
|
534 |
logger.debug({
|
tests/meta_prompt_graph_test.py
CHANGED
@@ -23,12 +23,12 @@ class TestMetaPromptGraph(unittest.TestCase):
|
|
23 |
and verifies that the updated state has the output attribute updated with
|
24 |
the mocked response content.
|
25 |
"""
|
|
|
|
|
|
|
|
|
26 |
llms = {
|
27 |
-
NODE_PROMPT_INITIAL_DEVELOPER:
|
28 |
-
invoke=MagicMock(
|
29 |
-
return_value=MagicMock(content="Mocked response content")
|
30 |
-
)
|
31 |
-
)
|
32 |
}
|
33 |
|
34 |
graph = MetaPromptGraph(llms=llms)
|
@@ -52,15 +52,11 @@ class TestMetaPromptGraph(unittest.TestCase):
|
|
52 |
response and verifies that the updated state has the best output, best
|
53 |
system message, and best output age updated correctly.
|
54 |
"""
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
content="{\"closerOutputID\": 2, \"analysis\": \"The output should use the `reverse()` method.\"}"
|
59 |
-
)
|
60 |
-
)
|
61 |
-
}
|
62 |
prompts = {}
|
63 |
-
meta_prompt_graph = MetaPromptGraph(llms=
|
64 |
state = AgentState(
|
65 |
user_message="How do I reverse a list in Python?",
|
66 |
expected_output="Use the `[::-1]` slicing technique or the `list.reverse()` method.",
|
@@ -93,12 +89,13 @@ class TestMetaPromptGraph(unittest.TestCase):
|
|
93 |
response and verifies that the updated state has the accepted attribute
|
94 |
set to True.
|
95 |
"""
|
96 |
-
llms = {
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
|
|
102 |
state = AgentState(
|
103 |
output="Test output", expected_output="Expected output",
|
104 |
acceptance_criteria="Acceptance criteria: ...",
|
@@ -137,8 +134,8 @@ class TestMetaPromptGraph(unittest.TestCase):
|
|
137 |
NODE_ACCEPTANCE_CRITERIA_DEVELOPER: raw_llm,
|
138 |
NODE_PROMPT_DEVELOPER: raw_llm,
|
139 |
NODE_PROMPT_EXECUTOR: raw_llm,
|
140 |
-
NODE_OUTPUT_HISTORY_ANALYZER: raw_llm
|
141 |
-
NODE_PROMPT_ANALYZER: raw_llm
|
142 |
NODE_PROMPT_SUGGESTER: raw_llm,
|
143 |
}
|
144 |
|
@@ -239,12 +236,14 @@ class TestMetaPromptGraph(unittest.TestCase):
|
|
239 |
"""
|
240 |
# Create a mock LLM that returns predefined responses based on the input messages
|
241 |
llm = Mock(spec=BaseLanguageModel)
|
|
|
242 |
responses = [
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
]
|
247 |
-
llm.invoke
|
|
|
248 |
|
249 |
meta_prompt_graph = MetaPromptGraph(llms=llm)
|
250 |
input_state = AgentState(
|
@@ -273,17 +272,18 @@ class TestMetaPromptGraph(unittest.TestCase):
|
|
273 |
"""
|
274 |
# Create a mock LLM that returns predefined responses based on the input messages
|
275 |
llm = Mock(spec=BaseLanguageModel)
|
|
|
276 |
responses = [
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
]
|
286 |
-
llm.invoke = lambda
|
287 |
|
288 |
meta_prompt_graph = MetaPromptGraph(llms=llm)
|
289 |
input_state = AgentState(
|
@@ -347,12 +347,10 @@ class TestMetaPromptGraph(unittest.TestCase):
|
|
347 |
This test case verifies that the run_acceptance_criteria_graph method
|
348 |
returns a state with acceptance criteria.
|
349 |
"""
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
}
|
355 |
-
meta_prompt_graph = MetaPromptGraph(llms=llms)
|
356 |
state = AgentState(
|
357 |
user_message="How do I reverse a list in Python?",
|
358 |
expected_output="The output should use the `reverse()` method.",
|
@@ -372,12 +370,10 @@ class TestMetaPromptGraph(unittest.TestCase):
|
|
372 |
This test case verifies that the run_prompt_initial_developer_graph method
|
373 |
returns a state with an initial developer prompt.
|
374 |
"""
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
}
|
380 |
-
meta_prompt_graph = MetaPromptGraph(llms=llms)
|
381 |
state = AgentState(user_message="How do I reverse a list in Python?")
|
382 |
output_state = meta_prompt_graph.run_node_graph(NODE_PROMPT_INITIAL_DEVELOPER, state)
|
383 |
|
|
|
23 |
and verifies that the updated state has the output attribute updated with
|
24 |
the mocked response content.
|
25 |
"""
|
26 |
+
llm = Mock(spec=BaseLanguageModel)
|
27 |
+
llm.config_specs = []
|
28 |
+
llm.invoke = lambda x, y=None: "Mocked response content"
|
29 |
+
|
30 |
llms = {
|
31 |
+
NODE_PROMPT_INITIAL_DEVELOPER: llm
|
|
|
|
|
|
|
|
|
32 |
}
|
33 |
|
34 |
graph = MetaPromptGraph(llms=llms)
|
|
|
52 |
response and verifies that the updated state has the best output, best
|
53 |
system message, and best output age updated correctly.
|
54 |
"""
|
55 |
+
llm = Mock(spec=BaseLanguageModel)
|
56 |
+
llm.config_specs = []
|
57 |
+
llm.invoke = lambda x, y: "{\"closerOutputID\": 2, \"analysis\": \"The output should use the `reverse()` method.\"}"
|
|
|
|
|
|
|
|
|
58 |
prompts = {}
|
59 |
+
meta_prompt_graph = MetaPromptGraph(llms=llm, prompts=prompts)
|
60 |
state = AgentState(
|
61 |
user_message="How do I reverse a list in Python?",
|
62 |
expected_output="Use the `[::-1]` slicing technique or the `list.reverse()` method.",
|
|
|
89 |
response and verifies that the updated state has the accepted attribute
|
90 |
set to True.
|
91 |
"""
|
92 |
+
# llms = {
|
93 |
+
# NODE_PROMPT_ANALYZER: lambda prompt: "{\"Accept\": \"Yes\"}"
|
94 |
+
# }
|
95 |
+
llm = Mock(spec=BaseLanguageModel)
|
96 |
+
llm.config_specs = []
|
97 |
+
llm.invoke = lambda x, y: "{\"Accept\": \"Yes\"}"
|
98 |
+
meta_prompt_graph = MetaPromptGraph(llms=llm)
|
99 |
state = AgentState(
|
100 |
output="Test output", expected_output="Expected output",
|
101 |
acceptance_criteria="Acceptance criteria: ...",
|
|
|
134 |
NODE_ACCEPTANCE_CRITERIA_DEVELOPER: raw_llm,
|
135 |
NODE_PROMPT_DEVELOPER: raw_llm,
|
136 |
NODE_PROMPT_EXECUTOR: raw_llm,
|
137 |
+
NODE_OUTPUT_HISTORY_ANALYZER: raw_llm,
|
138 |
+
NODE_PROMPT_ANALYZER: raw_llm,
|
139 |
NODE_PROMPT_SUGGESTER: raw_llm,
|
140 |
}
|
141 |
|
|
|
236 |
"""
|
237 |
# Create a mock LLM that returns predefined responses based on the input messages
|
238 |
llm = Mock(spec=BaseLanguageModel)
|
239 |
+
llm.config_specs = []
|
240 |
responses = [
|
241 |
+
"Explain how to reverse a list in Python.", # NODE_PROMPT_INITIAL_DEVELOPER
|
242 |
+
"Here's one way: `my_list[::-1]`", # NODE_PROMPT_EXECUTOR
|
243 |
+
"{\"Accept\": \"Yes\"}", # NODE_PPROMPT_ANALYZER
|
244 |
]
|
245 |
+
# everytime llm.invoke was called, it returns a item in responses
|
246 |
+
llm.invoke = lambda x, y=None: responses.pop(0)
|
247 |
|
248 |
meta_prompt_graph = MetaPromptGraph(llms=llm)
|
249 |
input_state = AgentState(
|
|
|
272 |
"""
|
273 |
# Create a mock LLM that returns predefined responses based on the input messages
|
274 |
llm = Mock(spec=BaseLanguageModel)
|
275 |
+
llm.config_specs = []
|
276 |
responses = [
|
277 |
+
"Explain how to reverse a list in Python.", # NODE_PROMPT_INITIAL_DEVELOPER
|
278 |
+
"Here's one way: `my_list[::-1]`", # NODE_PROMPT_EXECUTOR
|
279 |
+
"{\"Accept\": \"No\"}", # NODE_PPROMPT_ANALYZER
|
280 |
+
"Try using the `reverse()` method instead.", # NODE_PROMPT_SUGGESTER
|
281 |
+
"Explain how to reverse a list in Python. Output in a Markdown List.", # NODE_PROMPT_DEVELOPER
|
282 |
+
"Here's one way: `my_list.reverse()`", # NODE_PROMPT_EXECUTOR
|
283 |
+
"{\"closerOutputID\": 2, \"analysis\": \"The output should use the `reverse()` method.\"}", # NODE_OUTPUT_HISTORY_ANALYZER
|
284 |
+
"{\"Accept\": \"Yes\"}", # NODE_PPROMPT_ANALYZER
|
285 |
]
|
286 |
+
llm.invoke = lambda x, y = None: responses.pop(0)
|
287 |
|
288 |
meta_prompt_graph = MetaPromptGraph(llms=llm)
|
289 |
input_state = AgentState(
|
|
|
347 |
This test case verifies that the run_acceptance_criteria_graph method
|
348 |
returns a state with acceptance criteria.
|
349 |
"""
|
350 |
+
llm = Mock(spec=BaseLanguageModel)
|
351 |
+
llm.config_specs = []
|
352 |
+
llm.invoke = lambda x, y: "{\"Acceptance criteria\": \"Acceptance criteria: ...\"}"
|
353 |
+
meta_prompt_graph = MetaPromptGraph(llms=llm)
|
|
|
|
|
354 |
state = AgentState(
|
355 |
user_message="How do I reverse a list in Python?",
|
356 |
expected_output="The output should use the `reverse()` method.",
|
|
|
370 |
This test case verifies that the run_prompt_initial_developer_graph method
|
371 |
returns a state with an initial developer prompt.
|
372 |
"""
|
373 |
+
llm = Mock(spec=BaseLanguageModel)
|
374 |
+
llm.config_specs = []
|
375 |
+
llm.invoke = lambda x, y: "{\"Initial developer prompt\": \"Initial developer prompt: ...\"}"
|
376 |
+
meta_prompt_graph = MetaPromptGraph(llms=llm)
|
|
|
|
|
377 |
state = AgentState(user_message="How do I reverse a list in Python?")
|
378 |
output_state = meta_prompt_graph.run_node_graph(NODE_PROMPT_INITIAL_DEVELOPER, state)
|
379 |
|