Spaces:

yaleh
/

meta-prompt

Running

App Files Files Community

yaleh commited on Jul 10, 2024

Commit

3b1cdbf

1 Parent(s): 1fcb0dd

Refactored code.

Browse files

Files changed (4) hide show

app/gradio_meta_prompt.py +111 -56
meta_prompt/__init__.py +10 -1
meta_prompt/consts.py +220 -0
meta_prompt/meta_prompt.py +78 -301

app/gradio_meta_prompt.py CHANGED Viewed

@@ -3,7 +3,6 @@ import io
 import json
 import logging
 from pathlib import Path
-import pprint
 from typing import Any, Dict, Union
 import gradio as gr
 from gradio import CSVLogger, Button, utils
@@ -13,8 +12,10 @@ from confz import BaseConfig, CLArgSource, EnvSource, FileSource
 from app.config import MetaPromptConfig
 from langchain_core.language_models import BaseLanguageModel
 from langchain_openai import ChatOpenAI
-from meta_prompt import MetaPromptGraph, AgentState
 from pythonjsonlogger import jsonlogger
 class SimplifiedCSVLogger(CSVLogger):
     """
@@ -65,15 +66,19 @@ class SimplifiedCSVLogger(CSVLogger):
             line_count = len(list(csv.reader(csvfile))) - 1
         return line_count
 class LLMModelFactory:
-    def __init__(self):
-        pass
     def create(self, model_type: str, **kwargs):
         model_class = globals()[model_type]
         return model_class(**kwargs)
-llm_model_factory = LLMModelFactory()
 def chat_log_2_chatbot_list(chat_log: str):
     chatbot_list = []
@@ -95,8 +100,10 @@ def chat_log_2_chatbot_list(chat_log: str):
             print(line)
     return chatbot_list
-def process_message(user_message, expected_output, acceptance_criteria, initial_system_message,
-                    recursion_limit: int, max_output_age: int,
                     llms: Union[BaseLanguageModel, Dict[str, BaseLanguageModel]]):
     # Create the input state
     input_state = AgentState(
@@ -106,7 +113,7 @@ def process_message(user_message, expected_output, acceptance_criteria, initial_
         system_message=initial_system_message,
         max_output_age=max_output_age
     )
     # Get the output state from MetaPromptGraph
     log_stream = io.StringIO()
     log_handler = None
@@ -114,10 +121,12 @@ def process_message(user_message, expected_output, acceptance_criteria, initial_
     if config.verbose:
         log_handler = logging.StreamHandler(log_stream)
         logger = logging.getLogger(MetaPromptGraph.__name__)
-        log_handler.setFormatter(jsonlogger.JsonFormatter('%(asctime)s %(name)s %(levelname)s %(message)s'))
         logger.addHandler(log_handler)
-    meta_prompt_graph = MetaPromptGraph(llms=llms, verbose=config.verbose, logger=logger)
     output_state = meta_prompt_graph(input_state, recursion_limit=recursion_limit)
     if config.verbose:
@@ -125,7 +134,7 @@ def process_message(user_message, expected_output, acceptance_criteria, initial_
         log_output = log_stream.getvalue()
     else:
         log_output = None
     # Validate the output state
     system_message = ''
     output = ''
@@ -146,7 +155,8 @@ def process_message(user_message, expected_output, acceptance_criteria, initial_
     else:
         analysis = "Error: The output state does not contain a valid 'analysis'"
-    return system_message, output, analysis, chat_log_2_chatbot_list(log_output)
 def process_message_with_single_llm(user_message, expected_output, acceptance_criteria, initial_system_message,
@@ -155,31 +165,33 @@ def process_message_with_single_llm(user_message, expected_output, acceptance_cr
     # Get the output state from MetaPromptGraph
     type = config.llms[model_name].type
     args = config.llms[model_name].model_dump(exclude={'type'})
-    llm = llm_model_factory.create(type, **args)
     return process_message(user_message, expected_output, acceptance_criteria, initial_system_message,
                            recursion_limit, max_output_age, llm)
 def process_message_with_2_llms(user_message, expected_output, acceptance_criteria, initial_system_message,
-                                       recursion_limit: int, max_output_age: int,
-                                       optimizer_model_name: str, executor_model_name: str,):
     # Get the output state from MetaPromptGraph
-    optimizer_model = llm_model_factory.create(config.llms[optimizer_model_name].type,
                                                **config.llms[optimizer_model_name].model_dump(exclude={'type'}))
-    executor_model = llm_model_factory.create(config.llms[executor_model_name].type,
                                               **config.llms[executor_model_name].model_dump(exclude={'type'}))
     llms = {
-        MetaPromptGraph.NODE_PROMPT_INITIAL_DEVELOPER: optimizer_model,
-        MetaPromptGraph.NODE_PROMPT_DEVELOPER: optimizer_model,
-        MetaPromptGraph.NODE_PROMPT_EXECUTOR: executor_model,
-        MetaPromptGraph.NODE_OUTPUT_HISTORY_ANALYZER: optimizer_model,
-        MetaPromptGraph.NODE_PROMPT_ANALYZER: optimizer_model,
-        MetaPromptGraph.NODE_PROMPT_SUGGESTER: optimizer_model
     }
     return process_message(user_message, expected_output, acceptance_criteria, initial_system_message,
                            recursion_limit, max_output_age, llms)
 class FileConfig(BaseConfig):
     config_file: str = 'config.yml'  # default path
@@ -216,10 +228,12 @@ with gr.Blocks(title='Meta Prompt') as demo:
                 label="Acceptance Criteria", show_copy_button=True)
             initial_system_message_input = gr.Textbox(
                 label="Initial System Message", show_copy_button=True, value="")
-            recursion_limit_input = gr.Number(label="Recursion Limit", value=config.recursion_limit,
-                                              precision=0, minimum=1, maximum=config.recursion_limit_max, step=1)
-            max_output_age = gr.Number(label="Max Output Age", value=config.max_output_age,
-                                       precision=0, minimum=1, maximum=config.max_output_age_max, step=1)
             with gr.Row():
                 with gr.Tab('Simple'):
                     model_name_input = gr.Dropdown(
@@ -229,7 +243,8 @@ with gr.Blocks(title='Meta Prompt') as demo:
                     )
                     # Connect the inputs and outputs to the function
                     with gr.Row():
-                        submit_button = gr.Button(value="Submit", variant="primary")
                         clear_button = gr.ClearButton(
                             [user_message_input, expected_output_input,
                              acceptance_criteria_input, initial_system_message_input],
@@ -247,45 +262,85 @@ with gr.Blocks(title='Meta Prompt') as demo:
                     )
                     # Connect the inputs and outputs to the function
                     with gr.Row():
-                        multiple_submit_button = gr.Button(value="Submit", variant="primary")
-                        multiple_clear_button = gr.ClearButton(components=[user_message_input,
-                                                                           expected_output_input,
-                                                                           acceptance_criteria_input,
-                                                                           initial_system_message_input],
-                                                               value='Clear All')
         with gr.Column():
-            system_message_output = gr.Textbox(
-                label="System Message", show_copy_button=True)
             output_output = gr.Textbox(label="Output", show_copy_button=True)
-            analysis_output = gr.Textbox(
-                label="Analysis", show_copy_button=True)
             flag_button = gr.Button(value="Flag", variant="secondary", visible=config.allow_flagging)
             with gr.Accordion("Details", open=False, visible=config.verbose):
-                logs_chatbot = gr.Chatbot(label='Messages', show_copy_button=True, layout='bubble',
-                                          bubble_full_width=False, render_markdown=False)
                 clear_logs_button = gr.ClearButton([logs_chatbot], value='Clear Logs')
         clear_button.add([system_message_output, output_output,
                          analysis_output, logs_chatbot])
         multiple_clear_button.add([system_message_output, output_output,
-                                    analysis_output, logs_chatbot])
-    submit_button.click(process_message_with_single_llm,
-                        inputs=[user_message_input, expected_output_input, acceptance_criteria_input,
-                                initial_system_message_input, recursion_limit_input, max_output_age,
-                                model_name_input],
-                        outputs=[system_message_output, output_output, analysis_output, logs_chatbot])
-    multiple_submit_button.click(process_message_with_2_llms,
-                                    inputs=[user_message_input, expected_output_input, acceptance_criteria_input,
-                                            initial_system_message_input, recursion_limit_input, max_output_age,
-                                            optimizer_model_name_input, executor_model_name_input],
-                                    outputs=[system_message_output, output_output, analysis_output, logs_chatbot])
     # Load examples
     examples = config.examples_path
-    gr.Examples(examples, inputs=[user_message_input, expected_output_input, acceptance_criteria_input, initial_system_message_input, recursion_limit_input, model_name_input])
-    flagging_inputs = [user_message_input, expected_output_input, acceptance_criteria_input, initial_system_message_input]
     # Configure flagging
     if config.allow_flagging:

 import json
 import logging
 from pathlib import Path
 from typing import Any, Dict, Union
 import gradio as gr
 from gradio import CSVLogger, Button, utils
 from app.config import MetaPromptConfig
 from langchain_core.language_models import BaseLanguageModel
 from langchain_openai import ChatOpenAI
+from meta_prompt import *
 from pythonjsonlogger import jsonlogger
+import pprint
 class SimplifiedCSVLogger(CSVLogger):
     """
             line_count = len(list(csv.reader(csvfile))) - 1
         return line_count
 class LLMModelFactory:
+    _instance = None
+    def __new__(cls):
+        if not cls._instance:
+            cls._instance = super(LLMModelFactory, cls).__new__(cls)
+        return cls._instance
     def create(self, model_type: str, **kwargs):
         model_class = globals()[model_type]
         return model_class(**kwargs)
 def chat_log_2_chatbot_list(chat_log: str):
     chatbot_list = []
             print(line)
     return chatbot_list
+def process_message(user_message, expected_output, acceptance_criteria,
+                    initial_system_message, recursion_limit: int,
+                    max_output_age: int,
                     llms: Union[BaseLanguageModel, Dict[str, BaseLanguageModel]]):
     # Create the input state
     input_state = AgentState(
         system_message=initial_system_message,
         max_output_age=max_output_age
     )
     # Get the output state from MetaPromptGraph
     log_stream = io.StringIO()
     log_handler = None
     if config.verbose:
         log_handler = logging.StreamHandler(log_stream)
         logger = logging.getLogger(MetaPromptGraph.__name__)
+        log_handler.setFormatter(jsonlogger.JsonFormatter(
+            '%(asctime)s %(name)s %(levelname)s %(message)s'))
         logger.addHandler(log_handler)
+    meta_prompt_graph = MetaPromptGraph(
+        llms=llms, verbose=config.verbose, logger=logger)
     output_state = meta_prompt_graph(input_state, recursion_limit=recursion_limit)
     if config.verbose:
         log_output = log_stream.getvalue()
     else:
         log_output = None
     # Validate the output state
     system_message = ''
     output = ''
     else:
         analysis = "Error: The output state does not contain a valid 'analysis'"
+    return (system_message, output, analysis,
+            chat_log_2_chatbot_list(log_output))
 def process_message_with_single_llm(user_message, expected_output, acceptance_criteria, initial_system_message,
     # Get the output state from MetaPromptGraph
     type = config.llms[model_name].type
     args = config.llms[model_name].model_dump(exclude={'type'})
+    llm = LLMModelFactory().create(type, **args)
     return process_message(user_message, expected_output, acceptance_criteria, initial_system_message,
                            recursion_limit, max_output_age, llm)
 def process_message_with_2_llms(user_message, expected_output, acceptance_criteria, initial_system_message,
+                                recursion_limit: int, max_output_age: int,
+                                optimizer_model_name: str, executor_model_name: str,):
     # Get the output state from MetaPromptGraph
+    optimizer_model = LLMModelFactory().create(config.llms[optimizer_model_name].type,
                                                **config.llms[optimizer_model_name].model_dump(exclude={'type'}))
+    executor_model = LLMModelFactory().create(config.llms[executor_model_name].type,
                                               **config.llms[executor_model_name].model_dump(exclude={'type'}))
     llms = {
+        NODE_PROMPT_INITIAL_DEVELOPER: optimizer_model,
+        NODE_PROMPT_DEVELOPER: optimizer_model,
+        NODE_PROMPT_EXECUTOR: executor_model,
+        NODE_OUTPUT_HISTORY_ANALYZER: optimizer_model,
+        NODE_PROMPT_ANALYZER: optimizer_model,
+        NODE_PROMPT_SUGGESTER: optimizer_model
     }
     return process_message(user_message, expected_output, acceptance_criteria, initial_system_message,
                            recursion_limit, max_output_age, llms)
 class FileConfig(BaseConfig):
     config_file: str = 'config.yml'  # default path
                 label="Acceptance Criteria", show_copy_button=True)
             initial_system_message_input = gr.Textbox(
                 label="Initial System Message", show_copy_button=True, value="")
+            recursion_limit_input = gr.Number(
+                label="Recursion Limit", value=config.recursion_limit,
+                precision=0, minimum=1, maximum=config.recursion_limit_max, step=1)
+            max_output_age = gr.Number(
+                label="Max Output Age", value=config.max_output_age,
+                precision=0, minimum=1, maximum=config.max_output_age_max, step=1)
             with gr.Row():
                 with gr.Tab('Simple'):
                     model_name_input = gr.Dropdown(
                     )
                     # Connect the inputs and outputs to the function
                     with gr.Row():
+                        submit_button = gr.Button(
+                            value="Submit", variant="primary")
                         clear_button = gr.ClearButton(
                             [user_message_input, expected_output_input,
                              acceptance_criteria_input, initial_system_message_input],
                     )
                     # Connect the inputs and outputs to the function
                     with gr.Row():
+                        multiple_submit_button = gr.Button(
+                            value="Submit", variant="primary")
+                        multiple_clear_button = gr.ClearButton(
+                            components=[user_message_input, expected_output_input,
+                                        acceptance_criteria_input, initial_system_message_input],
+                            value='Clear All')
         with gr.Column():
+            system_message_output = gr.Textbox(label="System Message", show_copy_button=True)
             output_output = gr.Textbox(label="Output", show_copy_button=True)
+            analysis_output = gr.Textbox(label="Analysis", show_copy_button=True)
             flag_button = gr.Button(value="Flag", variant="secondary", visible=config.allow_flagging)
             with gr.Accordion("Details", open=False, visible=config.verbose):
+                logs_chatbot = gr.Chatbot(
+                    label='Messages', show_copy_button=True, layout='bubble',
+                    bubble_full_width=False, render_markdown=False
+                )
                 clear_logs_button = gr.ClearButton([logs_chatbot], value='Clear Logs')
         clear_button.add([system_message_output, output_output,
                          analysis_output, logs_chatbot])
         multiple_clear_button.add([system_message_output, output_output,
+                                   analysis_output, logs_chatbot])
+    submit_button.click(
+        process_message_with_single_llm,
+        inputs=[
+            user_message_input,
+            expected_output_input,
+            acceptance_criteria_input,
+            initial_system_message_input,
+            recursion_limit_input,
+            max_output_age,
+            model_name_input
+        ],
+        outputs=[
+            system_message_output,
+            output_output,
+            analysis_output,
+            logs_chatbot
+        ]
+    )
+    multiple_submit_button.click(
+        process_message_with_2_llms,
+        inputs=[
+            user_message_input,
+            expected_output_input,
+            acceptance_criteria_input,
+            initial_system_message_input,
+            recursion_limit_input,
+            max_output_age,
+            optimizer_model_name_input,
+            executor_model_name_input
+        ],
+        outputs=[
+            system_message_output,
+            output_output,
+            analysis_output,
+            logs_chatbot
+        ]
+    )
     # Load examples
     examples = config.examples_path
+    gr.Examples(examples, inputs=[
+        user_message_input,
+        expected_output_input,
+        acceptance_criteria_input,
+        initial_system_message_input,
+        recursion_limit_input,
+        model_name_input
+    ])
+    flagging_inputs = [
+        user_message_input,
+        expected_output_input,
+        acceptance_criteria_input,
+        initial_system_message_input
+    ]
     # Configure flagging
     if config.allow_flagging:

meta_prompt/__init__.py CHANGED Viewed

@@ -1,4 +1,13 @@
 __version__ = '0.1.0'
 from .meta_prompt import AgentState, MetaPromptGraph

 __version__ = '0.1.0'
 from .meta_prompt import AgentState, MetaPromptGraph
+from .consts import (
+    META_PROMPT_NODES,
+    NODE_PROMPT_INITIAL_DEVELOPER,
+    NODE_PROMPT_DEVELOPER,
+    NODE_PROMPT_EXECUTOR,
+    NODE_OUTPUT_HISTORY_ANALYZER,
+    NODE_PROMPT_ANALYZER,
+    NODE_PROMPT_SUGGESTER,
+    DEFAULT_PROMPT_TEMPLATES,
+)

meta_prompt/consts.py ADDED Viewed

	@@ -0,0 +1,220 @@

+from langchain_core.prompts import ChatPromptTemplate
+NODE_PROMPT_INITIAL_DEVELOPER = "prompt_initial_developer"
+NODE_PROMPT_DEVELOPER = "prompt_developer"
+NODE_PROMPT_EXECUTOR = "prompt_executor"
+NODE_OUTPUT_HISTORY_ANALYZER = "output_history_analyzer"
+NODE_PROMPT_ANALYZER = "prompt_analyzer"
+NODE_PROMPT_SUGGESTER = "prompt_suggester"
+META_PROMPT_NODES = [
+    NODE_PROMPT_INITIAL_DEVELOPER,
+    NODE_PROMPT_DEVELOPER,
+    NODE_PROMPT_EXECUTOR,
+    NODE_OUTPUT_HISTORY_ANALYZER,
+    NODE_PROMPT_ANALYZER,
+    NODE_PROMPT_SUGGESTER
+]
+DEFAULT_PROMPT_TEMPLATES = {
+    NODE_PROMPT_INITIAL_DEVELOPER: ChatPromptTemplate.from_messages([
+        ("system", """# Expert Prompt Engineer
+You are an expert prompt engineer tasked with creating system messages for AI assistants.
+## Instructions
+1. Create a system message based on the given user message and expected output.
+2. Ensure the system message can handle similar user messages.
+3. Output only the system message, without any additional content.
+4. Expected Output text should not appear in System Message as an example. But it's OK to use some similar text as an example instead.
+5. Format the system message well, with no more than 80 characters per line (except for raw text).
+## Output
+Provide only the system message, adhering to the above guidelines.
+"""),
+        ("human", """# User Message
+{user_message}
+# Expected Output
+{expected_output}
+""")
+    ]),
+    NODE_PROMPT_DEVELOPER: ChatPromptTemplate.from_messages([
+        ("system", """# Expert Prompt Engineer
+You are an expert prompt engineer tasked with updating system messages for AI assistants. You Update System Message according to Suggestions, to improve Output and match Expected Output more closely.
+## Instructions
+1. Update the system message based on the given Suggestion, User Message, and Expected Output.
+2. Ensure the updated system message can handle similar user messages.
+3. Modify only the content mentioned in the Suggestion. Do not change the parts that are not related to the Suggestion.
+4. Output only the updated system message, without any additional content.
+5. Avoiding the behavior should be explicitly requested (e.g. `Don't ...`) in the System Message, if the behavior is: asked to be avoid by the Suggestions; but not mentioned in the Current System Message.
+6. Expected Output text should not appear in System Message as an example. But it's OK to use some similar text as an example instead.
+* Remove the Expected Output text or text highly similar to Expected Output from System Message, if it's present.
+7. Format the system message well, with no more than 80 characters per line (except for raw text).
+## Output
+Provide only the updated System Message, adhering to the above guidelines.
+"""),
+        ("human", """# Current system message
+{system_message}
+# User Message
+{user_message}
+# Expected Output
+{expected_output}
+# Suggestions
+{suggestions}
+""")
+    ]),
+    NODE_PROMPT_EXECUTOR: ChatPromptTemplate.from_messages([
+        ("system", "{system_message}"),
+        ("human", "{user_message}")
+    ]),
+    NODE_OUTPUT_HISTORY_ANALYZER: ChatPromptTemplate.from_messages([
+        ("system", """You are a text comparing program. You read the Acceptance Criteria, compare the compare the exptected output with two different outputs, and decide which one is more consistent with the expected output. When comparing the outputs, ignore the differences which are acceptable or ignorable according to the Acceptance Criteria.
+You output the following analysis according to the Acceptance Criteria:
+* Your analysis in a Markdown list.
+* The ID of the output that is more consistent with the Expected Output as Preferred Output ID, with the following format:
+```
+# Analysis
+...
+# Preferred Output ID: [ID]
+```
+If both outputs are equally similar to the expected output, output the following:
+```
+# Analysis
+...
+# Draw
+```
+"""),
+        ("human", """
+# Output ID: A
+```
+{best_output}
+```
+# Output ID: B
+```
+{output}
+```
+# Acceptance Criteria
+{acceptance_criteria}
+# Expected Output
+```
+{expected_output}
+```
+""")
+    ]),
+    NODE_PROMPT_ANALYZER: ChatPromptTemplate.from_messages([
+        ("system", """You are a text comparing program. You compare the following output texts, analysis the System Message and provide a detailed analysis according to `Acceptance Criteria`. Then you decide whether `Actual Output` is acceptable.
+Provide your analysis in the following format:
+```
+- Acceptable Differences: [List acceptable differences succinctly]
+- Unacceptable Differences: [List unacceptable differences succinctly]
+- Accept: [Yes/No]
+```
+* Compare Expected Output and Actual Output with the guidance of Accept Criteria.
+* Only set 'Accept' to 'Yes', if Accept Criteria are all met. Otherwise, set 'Accept' to 'No'.
+* List only the acceptable differences according to Accept Criteria in 'acceptable Differences' section.
+* List only the unacceptable differences according to Accept Criteria in 'Unacceptable Differences' section.
+# Acceptance Criteria
+```
+{acceptance_criteria}
+```
+"""),
+        ("human", """
+# System Message
+```
+{system_message}
+```
+# Expected Output
+```
+{expected_output}
+```
+# Actual Output
+```
+{output}
+```
+""")
+    ]),
+    NODE_PROMPT_SUGGESTER: ChatPromptTemplate.from_messages([
+        ("system", """Read the following inputs and outputs of an LLM prompt, and also analysis about them. Then suggest how to improve System Message.
+* The goal is to improve the System Message to match the Expected Output better.
+* Ignore all Acceptable Differences and focus on Unacceptable Differences.
+* Suggest formal changes first, then semantic changes.
+* Provide your suggestions in a Markdown list, nothing else. Output only the suggestions related with Unacceptable Differences.
+* Start every suggestion with `The System Message should ...`.
+* Figue out the contexts of the System Message that conflict with the suggestions, and suggest modification or deletion.
+* Avoiding the behavior should be explicitly requested (e.g. `The System Message should explicitly state that the output shoud not ...`) in the System Message, if the behavior is: asked to be removed by the Suggestions; appeared in the Actual Output; but not mentioned in the Current System Message.
+* Expected Output text should not appear in System Message as an example. But it's OK to use some similar but distinct text as an example instead.
+* Ask to remove the Expected Output text or text highly similar to Expected Output from System Message, if it's present.
+* Provide format examples or detected format name, if System Message does not.
+* Specify the detected format name (e.g. XML, JSON, etc.) of Expected Output, if System Message does not mention it.
+"""),
+        ("human", """
+<|Start_System_Message|>
+{system_message}
+<|End_System_Message|>
+<|Start_User_Message|>
+{user_message}
+<|End_User_Message|>
+<|Start_Expected_Output|>
+{expected_output}
+<|End_Expected_Output|>
+<|Start_Actual_Output|>
+{output}
+<|End_Actual_Output|>
+<|Start_Acceptance Criteria|>
+{acceptance_criteria}
+<|End_Acceptance Criteria|>
+<|Start_Analysis|>
+{analysis}
+<|End_Analysis|>
+""")
+    ])
+}

meta_prompt/meta_prompt.py CHANGED Viewed

@@ -9,6 +9,7 @@ from langgraph.graph import StateGraph, END
 from langgraph.checkpoint.memory import MemorySaver
 from langgraph.errors import GraphRecursionError
 from pydantic import BaseModel
 class AgentState(BaseModel):
     max_output_age: int = 0
@@ -25,261 +26,16 @@ class AgentState(BaseModel):
     best_output_age: int = 0
 class MetaPromptGraph:
-    NODE_PROMPT_INITIAL_DEVELOPER = "prompt_initial_developer"
-    NODE_PROMPT_DEVELOPER = "prompt_developer"
-    NODE_PROMPT_EXECUTOR = "prompt_executor"
-    NODE_OUTPUT_HISTORY_ANALYZER = "output_history_analyzer"
-    NODE_PROMPT_ANALYZER = "prompt_analyzer"
-    NODE_PROMPT_SUGGESTER = "prompt_suggester"
-    DEFAULT_PROMPT_TEMPLATES = {
-        NODE_PROMPT_INITIAL_DEVELOPER: ChatPromptTemplate.from_messages([
-            ("system", """# Expert Prompt Engineer
-You are an expert prompt engineer tasked with creating system messages for AI
-assistants.
-## Instructions
-1. Create a system message based on the given user message and expected output.
-2. Ensure the system message can handle similar user messages.
-3. Output only the system message, without any additional content.
-4. Expected Output text should not appear in System Message as an example. But
-   it's OK to use some similar text as an example instead.
-5. Format the system message well, with no more than 80 characters per line
-   (except for raw text).
-## Output
-Provide only the system message, adhering to the above guidelines.
-"""),
-            ("human", """# User Message
-{user_message}
-# Expected Output
-{expected_output}
-""")
-        ]),
-        NODE_PROMPT_DEVELOPER: ChatPromptTemplate.from_messages([
-            ("system", """# Expert Prompt Engineer
-You are an expert prompt engineer tasked with updating system messages for AI
-assistants. You Update System Message according to Suggestions, to improve
-Output and match Expected Output more closely.
-## Instructions
-1. Update the system message based on the given Suggestion, User Message, and
-   Expected Output.
-2. Ensure the updated system message can handle similar user messages.
-3. Modify only the content mentioned in the Suggestion. Do not change the
-   parts that are not related to the Suggestion.
-4. Output only the updated system message, without any additional content.
-5. Avoiding the behavior should be explicitly requested (e.g. `Don't ...`) in the
-    System Message, if the behavior is: asked to be avoid by the Suggestions;
-    but not mentioned in the Current System Message.
-6. Expected Output text should not appear in System Message as an example. But
-   it's OK to use some similar text as an example instead.
-   * Remove the Expected Output text or text highly similar to Expected Output
-     from System Message, if it's present.
-7. Format the system message well, with no more than 80 characters per line
-   (except for raw text).
-## Output
-Provide only the updated System Message, adhering to the above guidelines.
-"""),
-            ("human", """# Current system message
-{system_message}
-# User Message
-{user_message}
-# Expected Output
-{expected_output}
-# Suggestions
-{suggestions}
-""")
-        ]),
-        NODE_PROMPT_EXECUTOR: ChatPromptTemplate.from_messages([
-            ("system", "{system_message}"),
-            ("human", "{user_message}")
-        ]),
-        NODE_OUTPUT_HISTORY_ANALYZER: ChatPromptTemplate.from_messages([
-            ("system", """You are a text comparing program. You read the Acceptance Criteria, compare the
-compare the exptected output with two different outputs, and decide which one is
-more consistent with the expected output. When comparing the outputs, ignore the
-differences which are acceptable or ignorable according to the Acceptance Criteria.
-You output the following analysis according to the Acceptance Criteria:
-* Your analysis in a Markdown list.
-* The ID of the output that is more consistent with the Expected Output as Preferred
-    Output ID, with the following format:
-```
-# Analysis
-...
-# Preferred Output ID: [ID]
-```
-If both outputs are equally similar to the expected output, output the following:
-```
-# Analysis
-...
-# Draw
-```
-"""),
-            ("human", """
-# Output ID: A
-```
-{best_output}
-```
-# Output ID: B
-```
-{output}
-```
-# Acceptance Criteria
-{acceptance_criteria}
-# Expected Output
-```
-{expected_output}
-```
-""")
-        ]),
-        NODE_PROMPT_ANALYZER: ChatPromptTemplate.from_messages([
-            ("system", """
-You are a text comparing program. You compare the following output texts,
-analysis the System Message and provide a detailed analysis according to
-`Acceptance Criteria`. Then you decide whether `Actual Output` is acceptable.
-Provide your analysis in the following format:
-```
-- Acceptable Differences: [List acceptable differences succinctly]
-- Unacceptable Differences: [List unacceptable differences succinctly]
-- Accept: [Yes/No]
-```
-* Compare Expected Output and Actual Output with the guidance of Accept Criteria.
-* Only set 'Accept' to 'Yes', if Accept Criteria are all met. Otherwise, set 'Accept' to 'No'.
-* List only the acceptable differences according to Accept Criteria in 'acceptable Differences' section.
-* List only the unacceptable differences according to Accept Criteria in 'Unacceptable Differences' section.
-# Acceptance Criteria
-```
-{acceptance_criteria}
-```
-"""),
-            ("human", """
-# System Message
-```
-{system_message}
-```
-# Expected Output
-```
-{expected_output}
-```
-# Actual Output
-```
-{output}
-```
-""")
-        ]),
-        NODE_PROMPT_SUGGESTER: ChatPromptTemplate.from_messages([
-            ("system", """
-Read the following inputs and outputs of an LLM prompt, and also analysis about them.
-Then suggest how to improve System Message.
-* The goal is to improve the System Message to match the Expected Output better.
-* Ignore all Acceptable Differences and focus on Unacceptable Differences.
-* Suggest formal changes first, then semantic changes.
-* Provide your suggestions in a Markdown list, nothing else. Output only the
-    suggestions related with Unacceptable Differences.
-    * Start every suggestion with `The System Message should ...`.
-    * Figue out the contexts of the System Message that conflict with the suggestions,
-    and suggest modification or deletion.
-    * Avoiding the behavior should be explicitly requested (e.g. `The System Message
-    should explicitly state that the output shoud not ...`) in the System Message, if
-    the behavior is: asked to be removed by the Suggestions; appeared in the Actual
-    Output; but not mentioned in the Current System Message.
-* Expected Output text should not appear in System Message as an example. But
-    it's OK to use some similar but distinct text as an example instead.
-    * Ask to remove the Expected Output text or text highly similar to Expected Output
-    from System Message, if it's present.
-* Provide format examples or detected format name, if System Message does not.
-    * Specify the detected format name (e.g. XML, JSON, etc.) of Expected Output, if
-    System Message does not mention it.
-"""),
-            ("human", """
-<|Start_System_Message|>
-{system_message}
-<|End_System_Message|>
-<|Start_User_Message|>
-{user_message}
-<|End_User_Message|>
-<|Start_Expected_Output|>
-{expected_output}
-<|End_Expected_Output|>
-<|Start_Actual_Output|>
-{output}
-<|End_Actual_Output|>
-<|Start_Acceptance Criteria|>
-{acceptance_criteria}
-<|End_Acceptance Criteria|>
-<|Start_Analysis|>
-{analysis}
-<|End_Analysis|>
-""")
-        ])
-    }
     @classmethod
     def get_node_names(cls):
-        return [
-            cls.NODE_PROMPT_INITIAL_DEVELOPER,
-            cls.NODE_PROMPT_DEVELOPER,
-            cls.NODE_PROMPT_EXECUTOR,
-            cls.NODE_OUTPUT_HISTORY_ANALYZER,
-            cls.NODE_PROMPT_ANALYZER,
-            cls.NODE_PROMPT_SUGGESTER
-        ]
     def __init__(self,
-                 llms: Union[BaseLanguageModel, Dict[str, BaseLanguageModel]] = {},
                  prompts: Dict[str, ChatPromptTemplate] = {},
                  logger: Optional[logging.Logger] = None,
-                 verbose = False):
         self.logger = logger or logging.getLogger(__name__)
         if self.logger is not None:
             if verbose:
@@ -288,80 +44,86 @@ Then suggest how to improve System Message.
                 self.logger.setLevel(logging.INFO)
         if isinstance(llms, BaseLanguageModel):
-            self.llms: Dict[str, BaseLanguageModel] = {node: llms for node in self.get_node_names()}
         else:
             self.llms: Dict[str, BaseLanguageModel] = llms
-        self.prompt_templates: Dict[str, ChatPromptTemplate] = self.DEFAULT_PROMPT_TEMPLATES.copy()
         self.prompt_templates.update(prompts)
     def _create_workflow(self, including_initial_developer: bool = True) -> StateGraph:
         workflow = StateGraph(AgentState)
-        workflow.add_node(self.NODE_PROMPT_DEVELOPER,
                           lambda x: self._prompt_node(
-                              self.NODE_PROMPT_DEVELOPER,
                               "system_message",
                               x))
-        workflow.add_node(self.NODE_PROMPT_EXECUTOR,
                           lambda x: self._prompt_node(
-                              self.NODE_PROMPT_EXECUTOR,
                               "output",
                               x))
-        workflow.add_node(self.NODE_OUTPUT_HISTORY_ANALYZER,
                           lambda x: self._output_history_analyzer(x))
-        workflow.add_node(self.NODE_PROMPT_ANALYZER,
                           lambda x: self._prompt_analyzer(x))
-        workflow.add_node(self.NODE_PROMPT_SUGGESTER,
                           lambda x: self._prompt_node(
-                              self.NODE_PROMPT_SUGGESTER,
                               "suggestions",
                               x))
-        workflow.add_edge(self.NODE_PROMPT_DEVELOPER, self.NODE_PROMPT_EXECUTOR)
-        workflow.add_edge(self.NODE_PROMPT_EXECUTOR, self.NODE_OUTPUT_HISTORY_ANALYZER)
-        workflow.add_edge(self.NODE_PROMPT_SUGGESTER, self.NODE_PROMPT_DEVELOPER)
         workflow.add_conditional_edges(
-            self.NODE_OUTPUT_HISTORY_ANALYZER,
             lambda x: self._should_exit_on_max_age(x),
             {
-                "continue": self.NODE_PROMPT_ANALYZER,
-                "rerun": self.NODE_PROMPT_SUGGESTER,
                 END: END
             }
         )
         workflow.add_conditional_edges(
-            self.NODE_PROMPT_ANALYZER,
             lambda x: self._should_exit_on_acceptable_output(x),
             {
-                "continue": self.NODE_PROMPT_SUGGESTER,
                 END: END
             }
         )
         if including_initial_developer:
-            workflow.add_node(self.NODE_PROMPT_INITIAL_DEVELOPER,
-                            lambda x: self._prompt_node(
-                                self.NODE_PROMPT_INITIAL_DEVELOPER,
-                                "system_message",
-                                x))
-            workflow.add_edge(self.NODE_PROMPT_INITIAL_DEVELOPER, self.NODE_PROMPT_EXECUTOR)
-            workflow.set_entry_point(self.NODE_PROMPT_INITIAL_DEVELOPER)
         else:
-            workflow.set_entry_point(self.NODE_PROMPT_EXECUTOR)
         return workflow
     def __call__(self, state: AgentState, recursion_limit: int = 25) -> AgentState:
-        workflow = self._create_workflow(including_initial_developer=(state.system_message is None or state.system_message == ""))
         memory = MemorySaver()
         graph = workflow.compile(checkpointer=memory)
-        config = {"configurable": {"thread_id": "1"}, "recursion_limit": recursion_limit}
         try:
-            self.logger.debug("Invoking graph with state: %s", pprint.pformat(state))
             output_state = graph.invoke(state, config)
@@ -369,7 +131,8 @@ Then suggest how to improve System Message.
             return output_state
         except GraphRecursionError as e:
-            self.logger.info("Recursion limit reached. Returning the best state found so far.")
             checkpoint_states = graph.get_state(config)
             # if the length of states is bigger than 0, print the best system message and output
@@ -377,41 +140,50 @@ Then suggest how to improve System Message.
                 output_state = checkpoint_states[0]
                 return output_state
             else:
-                self.logger.info("No checkpoint states found. Returning the input state.")
         return state
     def _prompt_node(self, node, target_attribute: str, state: AgentState) -> AgentState:
         logger = self.logger.getChild(node)
-        prompt = self.prompt_templates[node].format_messages(**state.model_dump())
         for message in prompt:
-            logger.debug({'node': node, 'action': 'invoke', 'type': message.type, 'message': message.content})
-        response = self.llms[node].invoke(self.prompt_templates[node].format_messages(**state.model_dump()))
-        logger.debug({'node': node, 'action': 'response', 'type': response.type, 'message': response.content})
         setattr(state, target_attribute, response.content)
         return state
     def _output_history_analyzer(self, state: AgentState) -> AgentState:
-        logger = self.logger.getChild(self.NODE_OUTPUT_HISTORY_ANALYZER)
         if state.best_output is None:
             state.best_output = state.output
             state.best_system_message = state.system_message
             state.best_output_age = 0
-            logger.debug("Best output initialized to the current output: \n %s", state.output)
             return state
-        prompt = self.prompt_templates[self.NODE_OUTPUT_HISTORY_ANALYZER].format_messages(**state.model_dump())
         for message in prompt:
-            logger.debug({'node': self.NODE_OUTPUT_HISTORY_ANALYZER, 'action': 'invoke', 'type': message.type, 'message': message.content})
-        response = self.llms[self.NODE_OUTPUT_HISTORY_ANALYZER].invoke(prompt)
-        logger.debug({'node': self.NODE_OUTPUT_HISTORY_ANALYZER, 'action': 'response', 'type': response.type, 'message': response.content})
         analysis = response.content
@@ -420,23 +192,28 @@ Then suggest how to improve System Message.
             state.best_system_message = state.system_message
             state.best_output_age = 0
-            logger.debug("Best output updated to the current output: \n %s", state.output)
         else:
             state.best_output_age += 1
-            logger.debug("Best output age incremented to %s", state.best_output_age)
         return state
     def _prompt_analyzer(self, state: AgentState) -> AgentState:
-        logger = self.logger.getChild(self.NODE_PROMPT_ANALYZER)
-        prompt = self.prompt_templates[self.NODE_PROMPT_ANALYZER].format_messages(**state.model_dump())
         for message in prompt:
-            logger.debug({'node': self.NODE_PROMPT_ANALYZER, 'action': 'invoke', 'type': message.type, 'message': message.content})
-        response = self.llms[self.NODE_PROMPT_ANALYZER].invoke(prompt)
-        logger.debug({'node': self.NODE_PROMPT_ANALYZER, 'action': 'response', 'type': response.type, 'message': response.content})
         state.analysis = response.content
         state.accepted = "Accept: Yes" in response.content
@@ -446,7 +223,7 @@ Then suggest how to improve System Message.
         return state
     def _should_exit_on_max_age(self, state: AgentState) -> str:
-        if state.max_output_age <=0:
             # always continue if max age is 0
             return "continue"

 from langgraph.checkpoint.memory import MemorySaver
 from langgraph.errors import GraphRecursionError
 from pydantic import BaseModel
+from .consts import *
 class AgentState(BaseModel):
     max_output_age: int = 0
     best_output_age: int = 0
 class MetaPromptGraph:
     @classmethod
     def get_node_names(cls):
+        return META_PROMPT_NODES
     def __init__(self,
+                 llms: Union[BaseLanguageModel,
+                             Dict[str, BaseLanguageModel]] = {},
                  prompts: Dict[str, ChatPromptTemplate] = {},
                  logger: Optional[logging.Logger] = None,
+                 verbose=False):
         self.logger = logger or logging.getLogger(__name__)
         if self.logger is not None:
             if verbose:
                 self.logger.setLevel(logging.INFO)
         if isinstance(llms, BaseLanguageModel):
+            self.llms: Dict[str, BaseLanguageModel] = {
+                node: llms for node in self.get_node_names()}
         else:
             self.llms: Dict[str, BaseLanguageModel] = llms
+        self.prompt_templates: Dict[str,
+                                    ChatPromptTemplate] = DEFAULT_PROMPT_TEMPLATES.copy()
         self.prompt_templates.update(prompts)
     def _create_workflow(self, including_initial_developer: bool = True) -> StateGraph:
         workflow = StateGraph(AgentState)
+        workflow.add_node(NODE_PROMPT_DEVELOPER,
                           lambda x: self._prompt_node(
+                              NODE_PROMPT_DEVELOPER,
                               "system_message",
                               x))
+        workflow.add_node(NODE_PROMPT_EXECUTOR,
                           lambda x: self._prompt_node(
+                              NODE_PROMPT_EXECUTOR,
                               "output",
                               x))
+        workflow.add_node(NODE_OUTPUT_HISTORY_ANALYZER,
                           lambda x: self._output_history_analyzer(x))
+        workflow.add_node(NODE_PROMPT_ANALYZER,
                           lambda x: self._prompt_analyzer(x))
+        workflow.add_node(NODE_PROMPT_SUGGESTER,
                           lambda x: self._prompt_node(
+                              NODE_PROMPT_SUGGESTER,
                               "suggestions",
                               x))
+        workflow.add_edge(NODE_PROMPT_DEVELOPER, NODE_PROMPT_EXECUTOR)
+        workflow.add_edge(NODE_PROMPT_EXECUTOR, NODE_OUTPUT_HISTORY_ANALYZER)
+        workflow.add_edge(NODE_PROMPT_SUGGESTER, NODE_PROMPT_DEVELOPER)
         workflow.add_conditional_edges(
+            NODE_OUTPUT_HISTORY_ANALYZER,
             lambda x: self._should_exit_on_max_age(x),
             {
+                "continue": NODE_PROMPT_ANALYZER,
+                "rerun": NODE_PROMPT_SUGGESTER,
                 END: END
             }
         )
         workflow.add_conditional_edges(
+            NODE_PROMPT_ANALYZER,
             lambda x: self._should_exit_on_acceptable_output(x),
             {
+                "continue": NODE_PROMPT_SUGGESTER,
                 END: END
             }
         )
         if including_initial_developer:
+            workflow.add_node(NODE_PROMPT_INITIAL_DEVELOPER,
+                              lambda x: self._prompt_node(
+                                  NODE_PROMPT_INITIAL_DEVELOPER,
+                                  "system_message",
+                                  x))
+            workflow.add_edge(NODE_PROMPT_INITIAL_DEVELOPER,
+                              NODE_PROMPT_EXECUTOR)
+            workflow.set_entry_point(NODE_PROMPT_INITIAL_DEVELOPER)
         else:
+            workflow.set_entry_point(NODE_PROMPT_EXECUTOR)
         return workflow
     def __call__(self, state: AgentState, recursion_limit: int = 25) -> AgentState:
+        workflow = self._create_workflow(including_initial_developer=(
+            state.system_message is None or state.system_message == ""))
         memory = MemorySaver()
         graph = workflow.compile(checkpointer=memory)
+        config = {"configurable": {"thread_id": "1"},
+                  "recursion_limit": recursion_limit}
         try:
+            self.logger.debug("Invoking graph with state: %s",
+                              pprint.pformat(state))
             output_state = graph.invoke(state, config)
             return output_state
         except GraphRecursionError as e:
+            self.logger.info(
+                "Recursion limit reached. Returning the best state found so far.")
             checkpoint_states = graph.get_state(config)
             # if the length of states is bigger than 0, print the best system message and output
                 output_state = checkpoint_states[0]
                 return output_state
             else:
+                self.logger.info(
+                    "No checkpoint states found. Returning the input state.")
         return state
     def _prompt_node(self, node, target_attribute: str, state: AgentState) -> AgentState:
         logger = self.logger.getChild(node)
+        prompt = self.prompt_templates[node].format_messages(
+            **state.model_dump())
         for message in prompt:
+            logger.debug({'node': node, 'action': 'invoke',
+                         'type': message.type, 'message': message.content})
+        response = self.llms[node].invoke(
+            self.prompt_templates[node].format_messages(**state.model_dump()))
+        logger.debug({'node': node, 'action': 'response',
+                     'type': response.type, 'message': response.content})
         setattr(state, target_attribute, response.content)
         return state
     def _output_history_analyzer(self, state: AgentState) -> AgentState:
+        logger = self.logger.getChild(NODE_OUTPUT_HISTORY_ANALYZER)
         if state.best_output is None:
             state.best_output = state.output
             state.best_system_message = state.system_message
             state.best_output_age = 0
+            logger.debug(
+                "Best output initialized to the current output:\n%s", state.output)
             return state
+        prompt = self.prompt_templates[NODE_OUTPUT_HISTORY_ANALYZER].format_messages(
+            **state.model_dump())
         for message in prompt:
+            logger.debug({'node': NODE_OUTPUT_HISTORY_ANALYZER, 'action': 'invoke',
+                         'type': message.type, 'message': message.content})
+        response = self.llms[NODE_OUTPUT_HISTORY_ANALYZER].invoke(prompt)
+        logger.debug({'node': NODE_OUTPUT_HISTORY_ANALYZER, 'action': 'response',
+                     'type': response.type, 'message': response.content})
         analysis = response.content
             state.best_system_message = state.system_message
             state.best_output_age = 0
+            logger.debug(
+                "Best output updated to the current output:\n%s", state.output)
         else:
             state.best_output_age += 1
+            logger.debug("Best output age incremented to %s",
+                         state.best_output_age)
         return state
     def _prompt_analyzer(self, state: AgentState) -> AgentState:
+        logger = self.logger.getChild(NODE_PROMPT_ANALYZER)
+        prompt = self.prompt_templates[NODE_PROMPT_ANALYZER].format_messages(
+            **state.model_dump())
         for message in prompt:
+            logger.debug({'node': NODE_PROMPT_ANALYZER, 'action': 'invoke',
+                         'type': message.type, 'message': message.content})
+        response = self.llms[NODE_PROMPT_ANALYZER].invoke(prompt)
+        logger.debug({'node': NODE_PROMPT_ANALYZER, 'action': 'response',
+                     'type': response.type, 'message': response.content})
         state.analysis = response.content
         state.accepted = "Accept: Yes" in response.content
         return state
     def _should_exit_on_max_age(self, state: AgentState) -> str:
+        if state.max_output_age <= 0:
             # always continue if max age is 0
             return "continue"