Spaces:

yaleh
/

meta-prompt

Running

App Files Files Community

yaleh commited on Sep 7, 2024

Commit

590b8c3

1 Parent(s): a291864

Updated unit test.

Browse files

Files changed (4) hide show

app/gradio_meta_prompt.py +35 -3
app/gradio_meta_prompt_utils.py +47 -5
meta_prompt/meta_prompt.py +36 -51
tests/meta_prompt_graph_test.py +3 -6

app/gradio_meta_prompt.py CHANGED Viewed

@@ -318,6 +318,7 @@ with gr.Blocks(title='Meta Prompt') as demo:
                                 show_copy_button=True
                             )
                             with gr.Row():
                                 generate_acceptance_criteria_button = gr.Button(
                                     value="Generate",
                                     variant="secondary"
@@ -456,8 +457,11 @@ with gr.Blocks(title='Meta Prompt') as demo:
                                 value="Evaluate", variant="secondary")
                     output_output = gr.Textbox(
                         label="Output", show_copy_button=True)
-                    acceptance_criteria_output = gr.Textbox(
-                        label="Acceptance Criteria", show_copy_button=True)
                     analysis_output = gr.Textbox(
                         label="Analysis", show_copy_button=True)
                     flag_button = gr.Button(
@@ -802,12 +806,40 @@ with gr.Blocks(title='Meta Prompt') as demo:
     generate_acceptance_criteria_button.click(
         generate_acceptance_criteria,
-        inputs=[config_state, selected_example_input, selected_example_output,
                 model_name_states["acceptance_criteria"],
                 model_temperature_states["acceptance_criteria"],
                 prompt_template_group],
         outputs=[acceptance_criteria_input, logs_chatbot]
     )
     generate_initial_system_message_button.click(
         generate_initial_system_message,
         inputs=[config_state, selected_example_input, selected_example_output,

                                 show_copy_button=True
                             )
                             with gr.Row():
+                                evaluate_acceptance_criteria_input_button = gr.Button("Evaluate")
                                 generate_acceptance_criteria_button = gr.Button(
                                     value="Generate",
                                     variant="secondary"
                                 value="Evaluate", variant="secondary")
                     output_output = gr.Textbox(
                         label="Output", show_copy_button=True)
+                    with gr.Group():
+                        acceptance_criteria_output = gr.Textbox(
+                            label="Acceptance Criteria", show_copy_button=True)
+                        evaluate_acceptance_criteria_output_button = gr.Button(
+                            value="Evaluate", variant="secondary")
                     analysis_output = gr.Textbox(
                         label="Analysis", show_copy_button=True)
                     flag_button = gr.Button(
     generate_acceptance_criteria_button.click(
         generate_acceptance_criteria,
+        inputs=[config_state, initial_system_message_input,
+                selected_example_input, selected_example_output,
                 model_name_states["acceptance_criteria"],
                 model_temperature_states["acceptance_criteria"],
                 prompt_template_group],
         outputs=[acceptance_criteria_input, logs_chatbot]
     )
+    evaluate_acceptance_criteria_input_button.click(
+        fn=evaluate_output,
+        inputs=[
+            config_state,
+            selected_example_output,
+            output_output,
+            acceptance_criteria_input,
+            model_name_states["analyzer"],
+            model_temperature_states["analyzer"],
+            prompt_template_group
+        ],
+        outputs=[analysis_output]
+    )
+    evaluate_acceptance_criteria_output_button.click(
+        fn=evaluate_output,
+        inputs=[
+            config_state,
+            selected_example_output,
+            output_output,
+            acceptance_criteria_output,
+            model_name_states["analyzer"],
+            model_temperature_states["analyzer"],
+            prompt_template_group
+        ],
+        outputs=[analysis_output]
+    )
     generate_initial_system_message_button.click(
         generate_initial_system_message,
         inputs=[config_state, selected_example_input, selected_example_output,

app/gradio_meta_prompt_utils.py CHANGED Viewed

@@ -281,18 +281,20 @@ def evaluate_system_message(config, system_message, user_message, executor_model
         raise gr.Error(f"Error: {e}")
-def generate_acceptance_criteria(config, user_message, expected_output, acceptance_criteria_model_name, acceptance_criteria_temperature, prompt_template_group):
     """
-    Generate acceptance criteria based on the user message and expected output.
     This function uses the MetaPromptGraph's run_acceptance_criteria_graph method
     to generate acceptance criteria.
     Args:
         user_message (str): The user's input message.
         expected_output (str): The anticipated response or outcome from the language
             model based on the user's message.
         acceptance_criteria_model_name (str): The name of the acceptance criteria model to use.
         prompt_template_group (Optional[str], optional): The group of prompt templates
             to use. Defaults to None.
@@ -321,10 +323,11 @@ def generate_acceptance_criteria(config, user_message, expected_output, acceptan
     }, prompts=prompt_templates,
     verbose=config.verbose, logger=logger)
     state = AgentState(
         user_message=user_message,
         expected_output=expected_output
     )
-    output_state = acceptance_criteria_graph.run_acceptance_criteria_graph(state)
     if log_handler:
         log_handler.close()
@@ -386,7 +389,7 @@ def generate_initial_system_message(
         expected_output=expected_output
     )
-    output_state = initial_system_message_graph.run_prompt_initial_developer_graph(state)
     if log_handler:
         log_handler.close()
@@ -713,7 +716,7 @@ def append_example_to_input_dataframe(
 ):
     try:
         if input_dataframe.empty or (input_dataframe.iloc[-1] == ['', '']).all():
-            input_dataframe.iloc[-1] = [new_example_input, new_example_output]
         else:
             input_dataframe = pd.concat([input_dataframe, pd.DataFrame([[new_example_input, new_example_output]], columns=["Input", "Output"])], ignore_index=True)
         return input_dataframe, None, None, None, None
@@ -773,3 +776,42 @@ def apply_suggestions(config, description, suggestions, examples, model_name, te
         return result["description"], gr.update(choices=result["suggestions"], value=[])
     except Exception as e:
         raise gr.Error(f"An error occurred: {str(e)}")

         raise gr.Error(f"Error: {e}")
+def generate_acceptance_criteria(config, system_message, user_message, expected_output, acceptance_criteria_model_name, acceptance_criteria_temperature, prompt_template_group):
     """
+    Generate acceptance criteria based on the system message, user message, and expected output.
     This function uses the MetaPromptGraph's run_acceptance_criteria_graph method
     to generate acceptance criteria.
     Args:
+        system_message (str): The system message to use when generating acceptance criteria.
         user_message (str): The user's input message.
         expected_output (str): The anticipated response or outcome from the language
             model based on the user's message.
         acceptance_criteria_model_name (str): The name of the acceptance criteria model to use.
+        acceptance_criteria_temperature (float): The temperature to use for the acceptance criteria model.
         prompt_template_group (Optional[str], optional): The group of prompt templates
             to use. Defaults to None.
     }, prompts=prompt_templates,
     verbose=config.verbose, logger=logger)
     state = AgentState(
+        system_message=system_message,
         user_message=user_message,
         expected_output=expected_output
     )
+    output_state = acceptance_criteria_graph.run_node_graph(NODE_ACCEPTANCE_CRITERIA_DEVELOPER, state)
     if log_handler:
         log_handler.close()
         expected_output=expected_output
     )
+    output_state = initial_system_message_graph.run_node_graph(NODE_PROMPT_INITIAL_DEVELOPER, state)
     if log_handler:
         log_handler.close()
 ):
     try:
         if input_dataframe.empty or (input_dataframe.iloc[-1] == ['', '']).all():
+            input_dataframe = pd.DataFrame([[new_example_input, new_example_output]], columns=["Input", "Output"])
         else:
             input_dataframe = pd.concat([input_dataframe, pd.DataFrame([[new_example_input, new_example_output]], columns=["Input", "Output"])], ignore_index=True)
         return input_dataframe, None, None, None, None
         return result["description"], gr.update(choices=result["suggestions"], value=[])
     except Exception as e:
         raise gr.Error(f"An error occurred: {str(e)}")
+def evaluate_output(
+    config,
+    expected_output: str,
+    output: str,
+    acceptance_criteria: str,
+    prompt_analyzer_model_name: str,
+    prompt_analyzer_temperature: float,
+    prompt_template_group: Optional[str] = None
+) -> str:
+    # Package the required variables into an AgentState dictionary
+    state = AgentState(
+        acceptance_criteria=acceptance_criteria,
+        expected_output=expected_output,
+        output=output
+    )
+    # Initialize the acceptance criteria model
+    llm = initialize_llm(config, prompt_analyzer_model_name, {'temperature': prompt_analyzer_temperature}).bind(response_format={"type": "json_object"})
+    # Get the prompt templates
+    if prompt_template_group is None:
+        prompt_template_group = 'default'
+    prompt_templates = prompt_templates_confz2langchain(
+        config.prompt_templates[prompt_template_group]
+    )
+    # Create the MetaPromptGraph instance
+    acceptance_criteria_graph = MetaPromptGraph(
+        llms={NODE_PROMPT_ANALYZER: llm},
+        prompts=prompt_templates,
+        verbose=config.verbose
+    )
+    # Run the node graph for evaluation
+    output_state = acceptance_criteria_graph.run_node_graph(NODE_PROMPT_ANALYZER, state)
+    # Return the evaluation result
+    return output_state.get('analysis', "Error: The output state does not contain a valid 'analysis'")

meta_prompt/meta_prompt.py CHANGED Viewed

@@ -117,46 +117,50 @@ class MetaPromptGraph:
         self.aggressive_exploration = aggressive_exploration
-    def _create_acceptance_criteria_workflow(self) -> StateGraph:
-        """
-        Create a workflow state graph for acceptance criteria.
         Returns:
             StateGraph: A state graph representing the workflow.
         """
         workflow = StateGraph(AgentState)
         workflow.add_node(
-            NODE_ACCEPTANCE_CRITERIA_DEVELOPER,
             lambda x: self._prompt_node(
-                NODE_ACCEPTANCE_CRITERIA_DEVELOPER,
-                "acceptance_criteria",
                 x
             )
         )
-        workflow.add_edge(NODE_ACCEPTANCE_CRITERIA_DEVELOPER, END)
-        workflow.set_entry_point(NODE_ACCEPTANCE_CRITERIA_DEVELOPER)
         return workflow
-    def _create_prompt_initial_developer_workflow(self) -> StateGraph:
-        """
-        Create a workflow state graph for the initial developer prompt.
         Returns:
-            StateGraph: A state graph representing the workflow.
         """
-        workflow = StateGraph(AgentState)
-        workflow.add_node(
-            NODE_PROMPT_INITIAL_DEVELOPER,
-            lambda x: self._prompt_node(
-                NODE_PROMPT_INITIAL_DEVELOPER,
-                "system_message",
-                x
-            )
-        )
-        workflow.add_edge(NODE_PROMPT_INITIAL_DEVELOPER, END)
-        workflow.set_entry_point(NODE_PROMPT_INITIAL_DEVELOPER)
-        return workflow
     def _create_workflow(self) -> StateGraph:
@@ -251,45 +255,26 @@ class MetaPromptGraph:
         return workflow
-    def run_acceptance_criteria_graph(self, state: AgentState) -> AgentState:
-        """Run the acceptance criteria graph with the given state.
-        Args:
-            state (AgentState): The current state of the agent.
-        Returns:
-            AgentState: The output state of the agent after invoking the graph.
-        """
-        self.logger.debug("Creating acceptance criteria workflow")
-        workflow = self._create_acceptance_criteria_workflow()
-        memory = MemorySaver()
-        graph = workflow.compile(checkpointer=memory)
-        config = {"configurable": {"thread_id": "1"}}
-        self.logger.debug("Invoking graph with state: %s", pprint.pformat(state))
-        output_state = graph.invoke(state, config)
-        self.logger.debug("Output state: %s", pprint.pformat(output_state))
-        return output_state
-    def run_prompt_initial_developer_graph(self, state: AgentState) -> AgentState:
-        """Run the prompt initial developer graph with the given state.
         Args:
             state (AgentState): The current state of the agent.
         Returns:
             AgentState: The output state of the agent after invoking the graph.
         """
-        self.logger.debug("Creating prompt initial developer workflow")
-        workflow = self._create_prompt_initial_developer_workflow()
         memory = MemorySaver()
         graph = workflow.compile(checkpointer=memory)
         config = {"configurable": {"thread_id": "1"}}
-        self.logger.debug("Invoking graph with state: %s", pprint.pformat(state))
         output_state = graph.invoke(state, config)
-        self.logger.debug("Output state: %s", pprint.pformat(output_state))
-        return output_state
     def run_meta_prompt_graph(

         self.aggressive_exploration = aggressive_exploration
+    def _create_workflow_for_node(self, node: str) -> StateGraph:
+        """Create a workflow state graph for the specified node.
+        Args:
+            node (str): The node name to create the workflow for.
         Returns:
             StateGraph: A state graph representing the workflow.
         """
         workflow = StateGraph(AgentState)
         workflow.add_node(
+            node,
             lambda x: self._prompt_node(
+                node,
+                self._get_target_attribute_for_node(node),
                 x
             )
         )
+        workflow.add_edge(node, END)
+        workflow.set_entry_point(node)
         return workflow
+    def _get_target_attribute_for_node(self, node: str) -> str:
+        """Get the target attribute for the specified node.
+        Args:
+            node (str): The node name.
         Returns:
+            str: The target attribute for the node.
         """
+        # Define a mapping of nodes to their target attributes
+        node_to_attribute = {
+            NODE_ACCEPTANCE_CRITERIA_DEVELOPER: "acceptance_criteria",
+            NODE_PROMPT_INITIAL_DEVELOPER: "system_message",
+            NODE_PROMPT_DEVELOPER: "system_message",
+            NODE_PROMPT_EXECUTOR: "output",
+            NODE_OUTPUT_HISTORY_ANALYZER: "analysis",
+            NODE_PROMPT_ANALYZER: "analysis",
+            NODE_PROMPT_SUGGESTER: "suggestions"
+        }
+        return node_to_attribute.get(node, "")
     def _create_workflow(self) -> StateGraph:
         return workflow
+    def run_node_graph(self, node: str, state: AgentState) -> AgentState:
+        """Run the graph for the specified node with the given state.
         Args:
+            node (str): The node name to run.
             state (AgentState): The current state of the agent.
         Returns:
             AgentState: The output state of the agent after invoking the graph.
         """
+        self.logger.debug(f"Creating workflow for node: {node}")
+        workflow = self._create_workflow_for_node(node)
         memory = MemorySaver()
         graph = workflow.compile(checkpointer=memory)
         config = {"configurable": {"thread_id": "1"}}
+        self.logger.debug(f"Invoking graph for node {node} with state: %s", pprint.pformat(state))
         output_state = graph.invoke(state, config)
+        self.logger.debug(f"Output state for node {node}: %s", pprint.pformat(output_state))
+        return output_state
     def run_meta_prompt_graph(

tests/meta_prompt_graph_test.py CHANGED Viewed

@@ -8,9 +8,6 @@ from meta_prompt import *
 from meta_prompt.consts import NODE_ACCEPTANCE_CRITERIA_DEVELOPER
 from langgraph.graph import END
 import os
-# from dotenv import load_dotenv
-# load_dotenv()
 class TestMetaPromptGraph(unittest.TestCase):
     def setUp(self):
@@ -315,7 +312,7 @@ class TestMetaPromptGraph(unittest.TestCase):
             NODE_ACCEPTANCE_CRITERIA_DEVELOPER: ChatOpenAI(model_name=os.getenv("TEST_MODEL_NAME_ACCEPTANCE_CRITERIA_DEVELOPER"))
         }
         meta_prompt_graph = MetaPromptGraph(llms=llms)
-        workflow = meta_prompt_graph._create_acceptance_criteria_workflow()
         # Check if the workflow contains the correct node
         self.assertIn(NODE_ACCEPTANCE_CRITERIA_DEVELOPER, workflow.nodes)
@@ -360,7 +357,7 @@ class TestMetaPromptGraph(unittest.TestCase):
             user_message="How do I reverse a list in Python?",
             expected_output="The output should use the `reverse()` method.",
         )
-        output_state = meta_prompt_graph.run_acceptance_criteria_graph(state)
         # Check if the output state contains the acceptance criteria
         self.assertIsNotNone(output_state["acceptance_criteria"])
@@ -382,7 +379,7 @@ class TestMetaPromptGraph(unittest.TestCase):
         }
         meta_prompt_graph = MetaPromptGraph(llms=llms)
         state = AgentState(user_message="How do I reverse a list in Python?")
-        output_state = meta_prompt_graph.run_prompt_initial_developer_graph(state)
         # Check if the output state contains the initial developer prompt
         self.assertIsNotNone(output_state['system_message'])

 from meta_prompt.consts import NODE_ACCEPTANCE_CRITERIA_DEVELOPER
 from langgraph.graph import END
 import os
 class TestMetaPromptGraph(unittest.TestCase):
     def setUp(self):
             NODE_ACCEPTANCE_CRITERIA_DEVELOPER: ChatOpenAI(model_name=os.getenv("TEST_MODEL_NAME_ACCEPTANCE_CRITERIA_DEVELOPER"))
         }
         meta_prompt_graph = MetaPromptGraph(llms=llms)
+        workflow = meta_prompt_graph._create_workflow_for_node(NODE_ACCEPTANCE_CRITERIA_DEVELOPER)
         # Check if the workflow contains the correct node
         self.assertIn(NODE_ACCEPTANCE_CRITERIA_DEVELOPER, workflow.nodes)
             user_message="How do I reverse a list in Python?",
             expected_output="The output should use the `reverse()` method.",
         )
+        output_state = meta_prompt_graph.run_node_graph(NODE_ACCEPTANCE_CRITERIA_DEVELOPER, state)
         # Check if the output state contains the acceptance criteria
         self.assertIsNotNone(output_state["acceptance_criteria"])
         }
         meta_prompt_graph = MetaPromptGraph(llms=llms)
         state = AgentState(user_message="How do I reverse a list in Python?")
+        output_state = meta_prompt_graph.run_node_graph(NODE_PROMPT_INITIAL_DEVELOPER, state)
         # Check if the output state contains the initial developer prompt
         self.assertIsNotNone(output_state['system_message'])