yaleh commited on
Commit
590b8c3
·
1 Parent(s): a291864

Updated unit test.

Browse files
app/gradio_meta_prompt.py CHANGED
@@ -318,6 +318,7 @@ with gr.Blocks(title='Meta Prompt') as demo:
318
  show_copy_button=True
319
  )
320
  with gr.Row():
 
321
  generate_acceptance_criteria_button = gr.Button(
322
  value="Generate",
323
  variant="secondary"
@@ -456,8 +457,11 @@ with gr.Blocks(title='Meta Prompt') as demo:
456
  value="Evaluate", variant="secondary")
457
  output_output = gr.Textbox(
458
  label="Output", show_copy_button=True)
459
- acceptance_criteria_output = gr.Textbox(
460
- label="Acceptance Criteria", show_copy_button=True)
 
 
 
461
  analysis_output = gr.Textbox(
462
  label="Analysis", show_copy_button=True)
463
  flag_button = gr.Button(
@@ -802,12 +806,40 @@ with gr.Blocks(title='Meta Prompt') as demo:
802
 
803
  generate_acceptance_criteria_button.click(
804
  generate_acceptance_criteria,
805
- inputs=[config_state, selected_example_input, selected_example_output,
 
806
  model_name_states["acceptance_criteria"],
807
  model_temperature_states["acceptance_criteria"],
808
  prompt_template_group],
809
  outputs=[acceptance_criteria_input, logs_chatbot]
810
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
811
  generate_initial_system_message_button.click(
812
  generate_initial_system_message,
813
  inputs=[config_state, selected_example_input, selected_example_output,
 
318
  show_copy_button=True
319
  )
320
  with gr.Row():
321
+ evaluate_acceptance_criteria_input_button = gr.Button("Evaluate")
322
  generate_acceptance_criteria_button = gr.Button(
323
  value="Generate",
324
  variant="secondary"
 
457
  value="Evaluate", variant="secondary")
458
  output_output = gr.Textbox(
459
  label="Output", show_copy_button=True)
460
+ with gr.Group():
461
+ acceptance_criteria_output = gr.Textbox(
462
+ label="Acceptance Criteria", show_copy_button=True)
463
+ evaluate_acceptance_criteria_output_button = gr.Button(
464
+ value="Evaluate", variant="secondary")
465
  analysis_output = gr.Textbox(
466
  label="Analysis", show_copy_button=True)
467
  flag_button = gr.Button(
 
806
 
807
  generate_acceptance_criteria_button.click(
808
  generate_acceptance_criteria,
809
+ inputs=[config_state, initial_system_message_input,
810
+ selected_example_input, selected_example_output,
811
  model_name_states["acceptance_criteria"],
812
  model_temperature_states["acceptance_criteria"],
813
  prompt_template_group],
814
  outputs=[acceptance_criteria_input, logs_chatbot]
815
  )
816
+ evaluate_acceptance_criteria_input_button.click(
817
+ fn=evaluate_output,
818
+ inputs=[
819
+ config_state,
820
+ selected_example_output,
821
+ output_output,
822
+ acceptance_criteria_input,
823
+ model_name_states["analyzer"],
824
+ model_temperature_states["analyzer"],
825
+ prompt_template_group
826
+ ],
827
+ outputs=[analysis_output]
828
+ )
829
+ evaluate_acceptance_criteria_output_button.click(
830
+ fn=evaluate_output,
831
+ inputs=[
832
+ config_state,
833
+ selected_example_output,
834
+ output_output,
835
+ acceptance_criteria_output,
836
+ model_name_states["analyzer"],
837
+ model_temperature_states["analyzer"],
838
+ prompt_template_group
839
+ ],
840
+ outputs=[analysis_output]
841
+ )
842
+
843
  generate_initial_system_message_button.click(
844
  generate_initial_system_message,
845
  inputs=[config_state, selected_example_input, selected_example_output,
app/gradio_meta_prompt_utils.py CHANGED
@@ -281,18 +281,20 @@ def evaluate_system_message(config, system_message, user_message, executor_model
281
  raise gr.Error(f"Error: {e}")
282
 
283
 
284
- def generate_acceptance_criteria(config, user_message, expected_output, acceptance_criteria_model_name, acceptance_criteria_temperature, prompt_template_group):
285
  """
286
- Generate acceptance criteria based on the user message and expected output.
287
 
288
  This function uses the MetaPromptGraph's run_acceptance_criteria_graph method
289
  to generate acceptance criteria.
290
 
291
  Args:
 
292
  user_message (str): The user's input message.
293
  expected_output (str): The anticipated response or outcome from the language
294
  model based on the user's message.
295
  acceptance_criteria_model_name (str): The name of the acceptance criteria model to use.
 
296
  prompt_template_group (Optional[str], optional): The group of prompt templates
297
  to use. Defaults to None.
298
 
@@ -321,10 +323,11 @@ def generate_acceptance_criteria(config, user_message, expected_output, acceptan
321
  }, prompts=prompt_templates,
322
  verbose=config.verbose, logger=logger)
323
  state = AgentState(
 
324
  user_message=user_message,
325
  expected_output=expected_output
326
  )
327
- output_state = acceptance_criteria_graph.run_acceptance_criteria_graph(state)
328
 
329
  if log_handler:
330
  log_handler.close()
@@ -386,7 +389,7 @@ def generate_initial_system_message(
386
  expected_output=expected_output
387
  )
388
 
389
- output_state = initial_system_message_graph.run_prompt_initial_developer_graph(state)
390
 
391
  if log_handler:
392
  log_handler.close()
@@ -713,7 +716,7 @@ def append_example_to_input_dataframe(
713
  ):
714
  try:
715
  if input_dataframe.empty or (input_dataframe.iloc[-1] == ['', '']).all():
716
- input_dataframe.iloc[-1] = [new_example_input, new_example_output]
717
  else:
718
  input_dataframe = pd.concat([input_dataframe, pd.DataFrame([[new_example_input, new_example_output]], columns=["Input", "Output"])], ignore_index=True)
719
  return input_dataframe, None, None, None, None
@@ -773,3 +776,42 @@ def apply_suggestions(config, description, suggestions, examples, model_name, te
773
  return result["description"], gr.update(choices=result["suggestions"], value=[])
774
  except Exception as e:
775
  raise gr.Error(f"An error occurred: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  raise gr.Error(f"Error: {e}")
282
 
283
 
284
+ def generate_acceptance_criteria(config, system_message, user_message, expected_output, acceptance_criteria_model_name, acceptance_criteria_temperature, prompt_template_group):
285
  """
286
+ Generate acceptance criteria based on the system message, user message, and expected output.
287
 
288
  This function uses the MetaPromptGraph's run_acceptance_criteria_graph method
289
  to generate acceptance criteria.
290
 
291
  Args:
292
+ system_message (str): The system message to use when generating acceptance criteria.
293
  user_message (str): The user's input message.
294
  expected_output (str): The anticipated response or outcome from the language
295
  model based on the user's message.
296
  acceptance_criteria_model_name (str): The name of the acceptance criteria model to use.
297
+ acceptance_criteria_temperature (float): The temperature to use for the acceptance criteria model.
298
  prompt_template_group (Optional[str], optional): The group of prompt templates
299
  to use. Defaults to None.
300
 
 
323
  }, prompts=prompt_templates,
324
  verbose=config.verbose, logger=logger)
325
  state = AgentState(
326
+ system_message=system_message,
327
  user_message=user_message,
328
  expected_output=expected_output
329
  )
330
+ output_state = acceptance_criteria_graph.run_node_graph(NODE_ACCEPTANCE_CRITERIA_DEVELOPER, state)
331
 
332
  if log_handler:
333
  log_handler.close()
 
389
  expected_output=expected_output
390
  )
391
 
392
+ output_state = initial_system_message_graph.run_node_graph(NODE_PROMPT_INITIAL_DEVELOPER, state)
393
 
394
  if log_handler:
395
  log_handler.close()
 
716
  ):
717
  try:
718
  if input_dataframe.empty or (input_dataframe.iloc[-1] == ['', '']).all():
719
+ input_dataframe = pd.DataFrame([[new_example_input, new_example_output]], columns=["Input", "Output"])
720
  else:
721
  input_dataframe = pd.concat([input_dataframe, pd.DataFrame([[new_example_input, new_example_output]], columns=["Input", "Output"])], ignore_index=True)
722
  return input_dataframe, None, None, None, None
 
776
  return result["description"], gr.update(choices=result["suggestions"], value=[])
777
  except Exception as e:
778
  raise gr.Error(f"An error occurred: {str(e)}")
779
+
780
+ def evaluate_output(
781
+ config,
782
+ expected_output: str,
783
+ output: str,
784
+ acceptance_criteria: str,
785
+ prompt_analyzer_model_name: str,
786
+ prompt_analyzer_temperature: float,
787
+ prompt_template_group: Optional[str] = None
788
+ ) -> str:
789
+ # Package the required variables into an AgentState dictionary
790
+ state = AgentState(
791
+ acceptance_criteria=acceptance_criteria,
792
+ expected_output=expected_output,
793
+ output=output
794
+ )
795
+
796
+ # Initialize the acceptance criteria model
797
+ llm = initialize_llm(config, prompt_analyzer_model_name, {'temperature': prompt_analyzer_temperature}).bind(response_format={"type": "json_object"})
798
+
799
+ # Get the prompt templates
800
+ if prompt_template_group is None:
801
+ prompt_template_group = 'default'
802
+ prompt_templates = prompt_templates_confz2langchain(
803
+ config.prompt_templates[prompt_template_group]
804
+ )
805
+
806
+ # Create the MetaPromptGraph instance
807
+ acceptance_criteria_graph = MetaPromptGraph(
808
+ llms={NODE_PROMPT_ANALYZER: llm},
809
+ prompts=prompt_templates,
810
+ verbose=config.verbose
811
+ )
812
+
813
+ # Run the node graph for evaluation
814
+ output_state = acceptance_criteria_graph.run_node_graph(NODE_PROMPT_ANALYZER, state)
815
+
816
+ # Return the evaluation result
817
+ return output_state.get('analysis', "Error: The output state does not contain a valid 'analysis'")
meta_prompt/meta_prompt.py CHANGED
@@ -117,46 +117,50 @@ class MetaPromptGraph:
117
 
118
  self.aggressive_exploration = aggressive_exploration
119
 
120
- def _create_acceptance_criteria_workflow(self) -> StateGraph:
121
- """
122
- Create a workflow state graph for acceptance criteria.
 
 
 
123
 
124
  Returns:
125
  StateGraph: A state graph representing the workflow.
126
  """
127
  workflow = StateGraph(AgentState)
128
  workflow.add_node(
129
- NODE_ACCEPTANCE_CRITERIA_DEVELOPER,
130
  lambda x: self._prompt_node(
131
- NODE_ACCEPTANCE_CRITERIA_DEVELOPER,
132
- "acceptance_criteria",
133
  x
134
  )
135
  )
136
- workflow.add_edge(NODE_ACCEPTANCE_CRITERIA_DEVELOPER, END)
137
- workflow.set_entry_point(NODE_ACCEPTANCE_CRITERIA_DEVELOPER)
138
  return workflow
139
 
140
 
141
- def _create_prompt_initial_developer_workflow(self) -> StateGraph:
142
- """
143
- Create a workflow state graph for the initial developer prompt.
 
 
144
 
145
  Returns:
146
- StateGraph: A state graph representing the workflow.
147
  """
148
- workflow = StateGraph(AgentState)
149
- workflow.add_node(
150
- NODE_PROMPT_INITIAL_DEVELOPER,
151
- lambda x: self._prompt_node(
152
- NODE_PROMPT_INITIAL_DEVELOPER,
153
- "system_message",
154
- x
155
- )
156
- )
157
- workflow.add_edge(NODE_PROMPT_INITIAL_DEVELOPER, END)
158
- workflow.set_entry_point(NODE_PROMPT_INITIAL_DEVELOPER)
159
- return workflow
160
 
161
 
162
  def _create_workflow(self) -> StateGraph:
@@ -251,45 +255,26 @@ class MetaPromptGraph:
251
 
252
  return workflow
253
 
254
-
255
- def run_acceptance_criteria_graph(self, state: AgentState) -> AgentState:
256
- """Run the acceptance criteria graph with the given state.
257
-
258
- Args:
259
- state (AgentState): The current state of the agent.
260
-
261
- Returns:
262
- AgentState: The output state of the agent after invoking the graph.
263
- """
264
- self.logger.debug("Creating acceptance criteria workflow")
265
- workflow = self._create_acceptance_criteria_workflow()
266
- memory = MemorySaver()
267
- graph = workflow.compile(checkpointer=memory)
268
- config = {"configurable": {"thread_id": "1"}}
269
- self.logger.debug("Invoking graph with state: %s", pprint.pformat(state))
270
- output_state = graph.invoke(state, config)
271
- self.logger.debug("Output state: %s", pprint.pformat(output_state))
272
- return output_state
273
-
274
 
275
- def run_prompt_initial_developer_graph(self, state: AgentState) -> AgentState:
276
- """Run the prompt initial developer graph with the given state.
277
 
278
  Args:
 
279
  state (AgentState): The current state of the agent.
280
 
281
  Returns:
282
  AgentState: The output state of the agent after invoking the graph.
283
  """
284
- self.logger.debug("Creating prompt initial developer workflow")
285
- workflow = self._create_prompt_initial_developer_workflow()
286
  memory = MemorySaver()
287
  graph = workflow.compile(checkpointer=memory)
288
  config = {"configurable": {"thread_id": "1"}}
289
- self.logger.debug("Invoking graph with state: %s", pprint.pformat(state))
290
  output_state = graph.invoke(state, config)
291
- self.logger.debug("Output state: %s", pprint.pformat(output_state))
292
- return output_state
293
 
294
 
295
  def run_meta_prompt_graph(
 
117
 
118
  self.aggressive_exploration = aggressive_exploration
119
 
120
+
121
+ def _create_workflow_for_node(self, node: str) -> StateGraph:
122
+ """Create a workflow state graph for the specified node.
123
+
124
+ Args:
125
+ node (str): The node name to create the workflow for.
126
 
127
  Returns:
128
  StateGraph: A state graph representing the workflow.
129
  """
130
  workflow = StateGraph(AgentState)
131
  workflow.add_node(
132
+ node,
133
  lambda x: self._prompt_node(
134
+ node,
135
+ self._get_target_attribute_for_node(node),
136
  x
137
  )
138
  )
139
+ workflow.add_edge(node, END)
140
+ workflow.set_entry_point(node)
141
  return workflow
142
 
143
 
144
+ def _get_target_attribute_for_node(self, node: str) -> str:
145
+ """Get the target attribute for the specified node.
146
+
147
+ Args:
148
+ node (str): The node name.
149
 
150
  Returns:
151
+ str: The target attribute for the node.
152
  """
153
+ # Define a mapping of nodes to their target attributes
154
+ node_to_attribute = {
155
+ NODE_ACCEPTANCE_CRITERIA_DEVELOPER: "acceptance_criteria",
156
+ NODE_PROMPT_INITIAL_DEVELOPER: "system_message",
157
+ NODE_PROMPT_DEVELOPER: "system_message",
158
+ NODE_PROMPT_EXECUTOR: "output",
159
+ NODE_OUTPUT_HISTORY_ANALYZER: "analysis",
160
+ NODE_PROMPT_ANALYZER: "analysis",
161
+ NODE_PROMPT_SUGGESTER: "suggestions"
162
+ }
163
+ return node_to_attribute.get(node, "")
 
164
 
165
 
166
  def _create_workflow(self) -> StateGraph:
 
255
 
256
  return workflow
257
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
 
259
+ def run_node_graph(self, node: str, state: AgentState) -> AgentState:
260
+ """Run the graph for the specified node with the given state.
261
 
262
  Args:
263
+ node (str): The node name to run.
264
  state (AgentState): The current state of the agent.
265
 
266
  Returns:
267
  AgentState: The output state of the agent after invoking the graph.
268
  """
269
+ self.logger.debug(f"Creating workflow for node: {node}")
270
+ workflow = self._create_workflow_for_node(node)
271
  memory = MemorySaver()
272
  graph = workflow.compile(checkpointer=memory)
273
  config = {"configurable": {"thread_id": "1"}}
274
+ self.logger.debug(f"Invoking graph for node {node} with state: %s", pprint.pformat(state))
275
  output_state = graph.invoke(state, config)
276
+ self.logger.debug(f"Output state for node {node}: %s", pprint.pformat(output_state))
277
+ return output_state
278
 
279
 
280
  def run_meta_prompt_graph(
tests/meta_prompt_graph_test.py CHANGED
@@ -8,9 +8,6 @@ from meta_prompt import *
8
  from meta_prompt.consts import NODE_ACCEPTANCE_CRITERIA_DEVELOPER
9
  from langgraph.graph import END
10
  import os
11
- # from dotenv import load_dotenv
12
-
13
- # load_dotenv()
14
 
15
  class TestMetaPromptGraph(unittest.TestCase):
16
  def setUp(self):
@@ -315,7 +312,7 @@ class TestMetaPromptGraph(unittest.TestCase):
315
  NODE_ACCEPTANCE_CRITERIA_DEVELOPER: ChatOpenAI(model_name=os.getenv("TEST_MODEL_NAME_ACCEPTANCE_CRITERIA_DEVELOPER"))
316
  }
317
  meta_prompt_graph = MetaPromptGraph(llms=llms)
318
- workflow = meta_prompt_graph._create_acceptance_criteria_workflow()
319
 
320
  # Check if the workflow contains the correct node
321
  self.assertIn(NODE_ACCEPTANCE_CRITERIA_DEVELOPER, workflow.nodes)
@@ -360,7 +357,7 @@ class TestMetaPromptGraph(unittest.TestCase):
360
  user_message="How do I reverse a list in Python?",
361
  expected_output="The output should use the `reverse()` method.",
362
  )
363
- output_state = meta_prompt_graph.run_acceptance_criteria_graph(state)
364
 
365
  # Check if the output state contains the acceptance criteria
366
  self.assertIsNotNone(output_state["acceptance_criteria"])
@@ -382,7 +379,7 @@ class TestMetaPromptGraph(unittest.TestCase):
382
  }
383
  meta_prompt_graph = MetaPromptGraph(llms=llms)
384
  state = AgentState(user_message="How do I reverse a list in Python?")
385
- output_state = meta_prompt_graph.run_prompt_initial_developer_graph(state)
386
 
387
  # Check if the output state contains the initial developer prompt
388
  self.assertIsNotNone(output_state['system_message'])
 
8
  from meta_prompt.consts import NODE_ACCEPTANCE_CRITERIA_DEVELOPER
9
  from langgraph.graph import END
10
  import os
 
 
 
11
 
12
  class TestMetaPromptGraph(unittest.TestCase):
13
  def setUp(self):
 
312
  NODE_ACCEPTANCE_CRITERIA_DEVELOPER: ChatOpenAI(model_name=os.getenv("TEST_MODEL_NAME_ACCEPTANCE_CRITERIA_DEVELOPER"))
313
  }
314
  meta_prompt_graph = MetaPromptGraph(llms=llms)
315
+ workflow = meta_prompt_graph._create_workflow_for_node(NODE_ACCEPTANCE_CRITERIA_DEVELOPER)
316
 
317
  # Check if the workflow contains the correct node
318
  self.assertIn(NODE_ACCEPTANCE_CRITERIA_DEVELOPER, workflow.nodes)
 
357
  user_message="How do I reverse a list in Python?",
358
  expected_output="The output should use the `reverse()` method.",
359
  )
360
+ output_state = meta_prompt_graph.run_node_graph(NODE_ACCEPTANCE_CRITERIA_DEVELOPER, state)
361
 
362
  # Check if the output state contains the acceptance criteria
363
  self.assertIsNotNone(output_state["acceptance_criteria"])
 
379
  }
380
  meta_prompt_graph = MetaPromptGraph(llms=llms)
381
  state = AgentState(user_message="How do I reverse a list in Python?")
382
+ output_state = meta_prompt_graph.run_node_graph(NODE_PROMPT_INITIAL_DEVELOPER, state)
383
 
384
  # Check if the output state contains the initial developer prompt
385
  self.assertIsNotNone(output_state['system_message'])