Spaces:
Running
Running
Updated unit test.
Browse files- app/gradio_meta_prompt.py +35 -3
- app/gradio_meta_prompt_utils.py +47 -5
- meta_prompt/meta_prompt.py +36 -51
- tests/meta_prompt_graph_test.py +3 -6
app/gradio_meta_prompt.py
CHANGED
@@ -318,6 +318,7 @@ with gr.Blocks(title='Meta Prompt') as demo:
|
|
318 |
show_copy_button=True
|
319 |
)
|
320 |
with gr.Row():
|
|
|
321 |
generate_acceptance_criteria_button = gr.Button(
|
322 |
value="Generate",
|
323 |
variant="secondary"
|
@@ -456,8 +457,11 @@ with gr.Blocks(title='Meta Prompt') as demo:
|
|
456 |
value="Evaluate", variant="secondary")
|
457 |
output_output = gr.Textbox(
|
458 |
label="Output", show_copy_button=True)
|
459 |
-
|
460 |
-
|
|
|
|
|
|
|
461 |
analysis_output = gr.Textbox(
|
462 |
label="Analysis", show_copy_button=True)
|
463 |
flag_button = gr.Button(
|
@@ -802,12 +806,40 @@ with gr.Blocks(title='Meta Prompt') as demo:
|
|
802 |
|
803 |
generate_acceptance_criteria_button.click(
|
804 |
generate_acceptance_criteria,
|
805 |
-
inputs=[config_state,
|
|
|
806 |
model_name_states["acceptance_criteria"],
|
807 |
model_temperature_states["acceptance_criteria"],
|
808 |
prompt_template_group],
|
809 |
outputs=[acceptance_criteria_input, logs_chatbot]
|
810 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
811 |
generate_initial_system_message_button.click(
|
812 |
generate_initial_system_message,
|
813 |
inputs=[config_state, selected_example_input, selected_example_output,
|
|
|
318 |
show_copy_button=True
|
319 |
)
|
320 |
with gr.Row():
|
321 |
+
evaluate_acceptance_criteria_input_button = gr.Button("Evaluate")
|
322 |
generate_acceptance_criteria_button = gr.Button(
|
323 |
value="Generate",
|
324 |
variant="secondary"
|
|
|
457 |
value="Evaluate", variant="secondary")
|
458 |
output_output = gr.Textbox(
|
459 |
label="Output", show_copy_button=True)
|
460 |
+
with gr.Group():
|
461 |
+
acceptance_criteria_output = gr.Textbox(
|
462 |
+
label="Acceptance Criteria", show_copy_button=True)
|
463 |
+
evaluate_acceptance_criteria_output_button = gr.Button(
|
464 |
+
value="Evaluate", variant="secondary")
|
465 |
analysis_output = gr.Textbox(
|
466 |
label="Analysis", show_copy_button=True)
|
467 |
flag_button = gr.Button(
|
|
|
806 |
|
807 |
generate_acceptance_criteria_button.click(
|
808 |
generate_acceptance_criteria,
|
809 |
+
inputs=[config_state, initial_system_message_input,
|
810 |
+
selected_example_input, selected_example_output,
|
811 |
model_name_states["acceptance_criteria"],
|
812 |
model_temperature_states["acceptance_criteria"],
|
813 |
prompt_template_group],
|
814 |
outputs=[acceptance_criteria_input, logs_chatbot]
|
815 |
)
|
816 |
+
evaluate_acceptance_criteria_input_button.click(
|
817 |
+
fn=evaluate_output,
|
818 |
+
inputs=[
|
819 |
+
config_state,
|
820 |
+
selected_example_output,
|
821 |
+
output_output,
|
822 |
+
acceptance_criteria_input,
|
823 |
+
model_name_states["analyzer"],
|
824 |
+
model_temperature_states["analyzer"],
|
825 |
+
prompt_template_group
|
826 |
+
],
|
827 |
+
outputs=[analysis_output]
|
828 |
+
)
|
829 |
+
evaluate_acceptance_criteria_output_button.click(
|
830 |
+
fn=evaluate_output,
|
831 |
+
inputs=[
|
832 |
+
config_state,
|
833 |
+
selected_example_output,
|
834 |
+
output_output,
|
835 |
+
acceptance_criteria_output,
|
836 |
+
model_name_states["analyzer"],
|
837 |
+
model_temperature_states["analyzer"],
|
838 |
+
prompt_template_group
|
839 |
+
],
|
840 |
+
outputs=[analysis_output]
|
841 |
+
)
|
842 |
+
|
843 |
generate_initial_system_message_button.click(
|
844 |
generate_initial_system_message,
|
845 |
inputs=[config_state, selected_example_input, selected_example_output,
|
app/gradio_meta_prompt_utils.py
CHANGED
@@ -281,18 +281,20 @@ def evaluate_system_message(config, system_message, user_message, executor_model
|
|
281 |
raise gr.Error(f"Error: {e}")
|
282 |
|
283 |
|
284 |
-
def generate_acceptance_criteria(config, user_message, expected_output, acceptance_criteria_model_name, acceptance_criteria_temperature, prompt_template_group):
|
285 |
"""
|
286 |
-
Generate acceptance criteria based on the user message and expected output.
|
287 |
|
288 |
This function uses the MetaPromptGraph's run_acceptance_criteria_graph method
|
289 |
to generate acceptance criteria.
|
290 |
|
291 |
Args:
|
|
|
292 |
user_message (str): The user's input message.
|
293 |
expected_output (str): The anticipated response or outcome from the language
|
294 |
model based on the user's message.
|
295 |
acceptance_criteria_model_name (str): The name of the acceptance criteria model to use.
|
|
|
296 |
prompt_template_group (Optional[str], optional): The group of prompt templates
|
297 |
to use. Defaults to None.
|
298 |
|
@@ -321,10 +323,11 @@ def generate_acceptance_criteria(config, user_message, expected_output, acceptan
|
|
321 |
}, prompts=prompt_templates,
|
322 |
verbose=config.verbose, logger=logger)
|
323 |
state = AgentState(
|
|
|
324 |
user_message=user_message,
|
325 |
expected_output=expected_output
|
326 |
)
|
327 |
-
output_state = acceptance_criteria_graph.
|
328 |
|
329 |
if log_handler:
|
330 |
log_handler.close()
|
@@ -386,7 +389,7 @@ def generate_initial_system_message(
|
|
386 |
expected_output=expected_output
|
387 |
)
|
388 |
|
389 |
-
output_state = initial_system_message_graph.
|
390 |
|
391 |
if log_handler:
|
392 |
log_handler.close()
|
@@ -713,7 +716,7 @@ def append_example_to_input_dataframe(
|
|
713 |
):
|
714 |
try:
|
715 |
if input_dataframe.empty or (input_dataframe.iloc[-1] == ['', '']).all():
|
716 |
-
input_dataframe
|
717 |
else:
|
718 |
input_dataframe = pd.concat([input_dataframe, pd.DataFrame([[new_example_input, new_example_output]], columns=["Input", "Output"])], ignore_index=True)
|
719 |
return input_dataframe, None, None, None, None
|
@@ -773,3 +776,42 @@ def apply_suggestions(config, description, suggestions, examples, model_name, te
|
|
773 |
return result["description"], gr.update(choices=result["suggestions"], value=[])
|
774 |
except Exception as e:
|
775 |
raise gr.Error(f"An error occurred: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
281 |
raise gr.Error(f"Error: {e}")
|
282 |
|
283 |
|
284 |
+
def generate_acceptance_criteria(config, system_message, user_message, expected_output, acceptance_criteria_model_name, acceptance_criteria_temperature, prompt_template_group):
|
285 |
"""
|
286 |
+
Generate acceptance criteria based on the system message, user message, and expected output.
|
287 |
|
288 |
This function uses the MetaPromptGraph's run_acceptance_criteria_graph method
|
289 |
to generate acceptance criteria.
|
290 |
|
291 |
Args:
|
292 |
+
system_message (str): The system message to use when generating acceptance criteria.
|
293 |
user_message (str): The user's input message.
|
294 |
expected_output (str): The anticipated response or outcome from the language
|
295 |
model based on the user's message.
|
296 |
acceptance_criteria_model_name (str): The name of the acceptance criteria model to use.
|
297 |
+
acceptance_criteria_temperature (float): The temperature to use for the acceptance criteria model.
|
298 |
prompt_template_group (Optional[str], optional): The group of prompt templates
|
299 |
to use. Defaults to None.
|
300 |
|
|
|
323 |
}, prompts=prompt_templates,
|
324 |
verbose=config.verbose, logger=logger)
|
325 |
state = AgentState(
|
326 |
+
system_message=system_message,
|
327 |
user_message=user_message,
|
328 |
expected_output=expected_output
|
329 |
)
|
330 |
+
output_state = acceptance_criteria_graph.run_node_graph(NODE_ACCEPTANCE_CRITERIA_DEVELOPER, state)
|
331 |
|
332 |
if log_handler:
|
333 |
log_handler.close()
|
|
|
389 |
expected_output=expected_output
|
390 |
)
|
391 |
|
392 |
+
output_state = initial_system_message_graph.run_node_graph(NODE_PROMPT_INITIAL_DEVELOPER, state)
|
393 |
|
394 |
if log_handler:
|
395 |
log_handler.close()
|
|
|
716 |
):
|
717 |
try:
|
718 |
if input_dataframe.empty or (input_dataframe.iloc[-1] == ['', '']).all():
|
719 |
+
input_dataframe = pd.DataFrame([[new_example_input, new_example_output]], columns=["Input", "Output"])
|
720 |
else:
|
721 |
input_dataframe = pd.concat([input_dataframe, pd.DataFrame([[new_example_input, new_example_output]], columns=["Input", "Output"])], ignore_index=True)
|
722 |
return input_dataframe, None, None, None, None
|
|
|
776 |
return result["description"], gr.update(choices=result["suggestions"], value=[])
|
777 |
except Exception as e:
|
778 |
raise gr.Error(f"An error occurred: {str(e)}")
|
779 |
+
|
780 |
+
def evaluate_output(
|
781 |
+
config,
|
782 |
+
expected_output: str,
|
783 |
+
output: str,
|
784 |
+
acceptance_criteria: str,
|
785 |
+
prompt_analyzer_model_name: str,
|
786 |
+
prompt_analyzer_temperature: float,
|
787 |
+
prompt_template_group: Optional[str] = None
|
788 |
+
) -> str:
|
789 |
+
# Package the required variables into an AgentState dictionary
|
790 |
+
state = AgentState(
|
791 |
+
acceptance_criteria=acceptance_criteria,
|
792 |
+
expected_output=expected_output,
|
793 |
+
output=output
|
794 |
+
)
|
795 |
+
|
796 |
+
# Initialize the acceptance criteria model
|
797 |
+
llm = initialize_llm(config, prompt_analyzer_model_name, {'temperature': prompt_analyzer_temperature}).bind(response_format={"type": "json_object"})
|
798 |
+
|
799 |
+
# Get the prompt templates
|
800 |
+
if prompt_template_group is None:
|
801 |
+
prompt_template_group = 'default'
|
802 |
+
prompt_templates = prompt_templates_confz2langchain(
|
803 |
+
config.prompt_templates[prompt_template_group]
|
804 |
+
)
|
805 |
+
|
806 |
+
# Create the MetaPromptGraph instance
|
807 |
+
acceptance_criteria_graph = MetaPromptGraph(
|
808 |
+
llms={NODE_PROMPT_ANALYZER: llm},
|
809 |
+
prompts=prompt_templates,
|
810 |
+
verbose=config.verbose
|
811 |
+
)
|
812 |
+
|
813 |
+
# Run the node graph for evaluation
|
814 |
+
output_state = acceptance_criteria_graph.run_node_graph(NODE_PROMPT_ANALYZER, state)
|
815 |
+
|
816 |
+
# Return the evaluation result
|
817 |
+
return output_state.get('analysis', "Error: The output state does not contain a valid 'analysis'")
|
meta_prompt/meta_prompt.py
CHANGED
@@ -117,46 +117,50 @@ class MetaPromptGraph:
|
|
117 |
|
118 |
self.aggressive_exploration = aggressive_exploration
|
119 |
|
120 |
-
|
121 |
-
|
122 |
-
Create a workflow state graph for
|
|
|
|
|
|
|
123 |
|
124 |
Returns:
|
125 |
StateGraph: A state graph representing the workflow.
|
126 |
"""
|
127 |
workflow = StateGraph(AgentState)
|
128 |
workflow.add_node(
|
129 |
-
|
130 |
lambda x: self._prompt_node(
|
131 |
-
|
132 |
-
|
133 |
x
|
134 |
)
|
135 |
)
|
136 |
-
workflow.add_edge(
|
137 |
-
workflow.set_entry_point(
|
138 |
return workflow
|
139 |
|
140 |
|
141 |
-
def
|
142 |
-
"""
|
143 |
-
|
|
|
|
|
144 |
|
145 |
Returns:
|
146 |
-
|
147 |
"""
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
return workflow
|
160 |
|
161 |
|
162 |
def _create_workflow(self) -> StateGraph:
|
@@ -251,45 +255,26 @@ class MetaPromptGraph:
|
|
251 |
|
252 |
return workflow
|
253 |
|
254 |
-
|
255 |
-
def run_acceptance_criteria_graph(self, state: AgentState) -> AgentState:
|
256 |
-
"""Run the acceptance criteria graph with the given state.
|
257 |
-
|
258 |
-
Args:
|
259 |
-
state (AgentState): The current state of the agent.
|
260 |
-
|
261 |
-
Returns:
|
262 |
-
AgentState: The output state of the agent after invoking the graph.
|
263 |
-
"""
|
264 |
-
self.logger.debug("Creating acceptance criteria workflow")
|
265 |
-
workflow = self._create_acceptance_criteria_workflow()
|
266 |
-
memory = MemorySaver()
|
267 |
-
graph = workflow.compile(checkpointer=memory)
|
268 |
-
config = {"configurable": {"thread_id": "1"}}
|
269 |
-
self.logger.debug("Invoking graph with state: %s", pprint.pformat(state))
|
270 |
-
output_state = graph.invoke(state, config)
|
271 |
-
self.logger.debug("Output state: %s", pprint.pformat(output_state))
|
272 |
-
return output_state
|
273 |
-
|
274 |
|
275 |
-
def
|
276 |
-
"""Run the
|
277 |
|
278 |
Args:
|
|
|
279 |
state (AgentState): The current state of the agent.
|
280 |
|
281 |
Returns:
|
282 |
AgentState: The output state of the agent after invoking the graph.
|
283 |
"""
|
284 |
-
self.logger.debug("Creating
|
285 |
-
workflow = self.
|
286 |
memory = MemorySaver()
|
287 |
graph = workflow.compile(checkpointer=memory)
|
288 |
config = {"configurable": {"thread_id": "1"}}
|
289 |
-
self.logger.debug("Invoking graph with state: %s", pprint.pformat(state))
|
290 |
output_state = graph.invoke(state, config)
|
291 |
-
self.logger.debug("Output state: %s", pprint.pformat(output_state))
|
292 |
-
return output_state
|
293 |
|
294 |
|
295 |
def run_meta_prompt_graph(
|
|
|
117 |
|
118 |
self.aggressive_exploration = aggressive_exploration
|
119 |
|
120 |
+
|
121 |
+
def _create_workflow_for_node(self, node: str) -> StateGraph:
|
122 |
+
"""Create a workflow state graph for the specified node.
|
123 |
+
|
124 |
+
Args:
|
125 |
+
node (str): The node name to create the workflow for.
|
126 |
|
127 |
Returns:
|
128 |
StateGraph: A state graph representing the workflow.
|
129 |
"""
|
130 |
workflow = StateGraph(AgentState)
|
131 |
workflow.add_node(
|
132 |
+
node,
|
133 |
lambda x: self._prompt_node(
|
134 |
+
node,
|
135 |
+
self._get_target_attribute_for_node(node),
|
136 |
x
|
137 |
)
|
138 |
)
|
139 |
+
workflow.add_edge(node, END)
|
140 |
+
workflow.set_entry_point(node)
|
141 |
return workflow
|
142 |
|
143 |
|
144 |
+
def _get_target_attribute_for_node(self, node: str) -> str:
|
145 |
+
"""Get the target attribute for the specified node.
|
146 |
+
|
147 |
+
Args:
|
148 |
+
node (str): The node name.
|
149 |
|
150 |
Returns:
|
151 |
+
str: The target attribute for the node.
|
152 |
"""
|
153 |
+
# Define a mapping of nodes to their target attributes
|
154 |
+
node_to_attribute = {
|
155 |
+
NODE_ACCEPTANCE_CRITERIA_DEVELOPER: "acceptance_criteria",
|
156 |
+
NODE_PROMPT_INITIAL_DEVELOPER: "system_message",
|
157 |
+
NODE_PROMPT_DEVELOPER: "system_message",
|
158 |
+
NODE_PROMPT_EXECUTOR: "output",
|
159 |
+
NODE_OUTPUT_HISTORY_ANALYZER: "analysis",
|
160 |
+
NODE_PROMPT_ANALYZER: "analysis",
|
161 |
+
NODE_PROMPT_SUGGESTER: "suggestions"
|
162 |
+
}
|
163 |
+
return node_to_attribute.get(node, "")
|
|
|
164 |
|
165 |
|
166 |
def _create_workflow(self) -> StateGraph:
|
|
|
255 |
|
256 |
return workflow
|
257 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
258 |
|
259 |
+
def run_node_graph(self, node: str, state: AgentState) -> AgentState:
|
260 |
+
"""Run the graph for the specified node with the given state.
|
261 |
|
262 |
Args:
|
263 |
+
node (str): The node name to run.
|
264 |
state (AgentState): The current state of the agent.
|
265 |
|
266 |
Returns:
|
267 |
AgentState: The output state of the agent after invoking the graph.
|
268 |
"""
|
269 |
+
self.logger.debug(f"Creating workflow for node: {node}")
|
270 |
+
workflow = self._create_workflow_for_node(node)
|
271 |
memory = MemorySaver()
|
272 |
graph = workflow.compile(checkpointer=memory)
|
273 |
config = {"configurable": {"thread_id": "1"}}
|
274 |
+
self.logger.debug(f"Invoking graph for node {node} with state: %s", pprint.pformat(state))
|
275 |
output_state = graph.invoke(state, config)
|
276 |
+
self.logger.debug(f"Output state for node {node}: %s", pprint.pformat(output_state))
|
277 |
+
return output_state
|
278 |
|
279 |
|
280 |
def run_meta_prompt_graph(
|
tests/meta_prompt_graph_test.py
CHANGED
@@ -8,9 +8,6 @@ from meta_prompt import *
|
|
8 |
from meta_prompt.consts import NODE_ACCEPTANCE_CRITERIA_DEVELOPER
|
9 |
from langgraph.graph import END
|
10 |
import os
|
11 |
-
# from dotenv import load_dotenv
|
12 |
-
|
13 |
-
# load_dotenv()
|
14 |
|
15 |
class TestMetaPromptGraph(unittest.TestCase):
|
16 |
def setUp(self):
|
@@ -315,7 +312,7 @@ class TestMetaPromptGraph(unittest.TestCase):
|
|
315 |
NODE_ACCEPTANCE_CRITERIA_DEVELOPER: ChatOpenAI(model_name=os.getenv("TEST_MODEL_NAME_ACCEPTANCE_CRITERIA_DEVELOPER"))
|
316 |
}
|
317 |
meta_prompt_graph = MetaPromptGraph(llms=llms)
|
318 |
-
workflow = meta_prompt_graph.
|
319 |
|
320 |
# Check if the workflow contains the correct node
|
321 |
self.assertIn(NODE_ACCEPTANCE_CRITERIA_DEVELOPER, workflow.nodes)
|
@@ -360,7 +357,7 @@ class TestMetaPromptGraph(unittest.TestCase):
|
|
360 |
user_message="How do I reverse a list in Python?",
|
361 |
expected_output="The output should use the `reverse()` method.",
|
362 |
)
|
363 |
-
output_state = meta_prompt_graph.
|
364 |
|
365 |
# Check if the output state contains the acceptance criteria
|
366 |
self.assertIsNotNone(output_state["acceptance_criteria"])
|
@@ -382,7 +379,7 @@ class TestMetaPromptGraph(unittest.TestCase):
|
|
382 |
}
|
383 |
meta_prompt_graph = MetaPromptGraph(llms=llms)
|
384 |
state = AgentState(user_message="How do I reverse a list in Python?")
|
385 |
-
output_state = meta_prompt_graph.
|
386 |
|
387 |
# Check if the output state contains the initial developer prompt
|
388 |
self.assertIsNotNone(output_state['system_message'])
|
|
|
8 |
from meta_prompt.consts import NODE_ACCEPTANCE_CRITERIA_DEVELOPER
|
9 |
from langgraph.graph import END
|
10 |
import os
|
|
|
|
|
|
|
11 |
|
12 |
class TestMetaPromptGraph(unittest.TestCase):
|
13 |
def setUp(self):
|
|
|
312 |
NODE_ACCEPTANCE_CRITERIA_DEVELOPER: ChatOpenAI(model_name=os.getenv("TEST_MODEL_NAME_ACCEPTANCE_CRITERIA_DEVELOPER"))
|
313 |
}
|
314 |
meta_prompt_graph = MetaPromptGraph(llms=llms)
|
315 |
+
workflow = meta_prompt_graph._create_workflow_for_node(NODE_ACCEPTANCE_CRITERIA_DEVELOPER)
|
316 |
|
317 |
# Check if the workflow contains the correct node
|
318 |
self.assertIn(NODE_ACCEPTANCE_CRITERIA_DEVELOPER, workflow.nodes)
|
|
|
357 |
user_message="How do I reverse a list in Python?",
|
358 |
expected_output="The output should use the `reverse()` method.",
|
359 |
)
|
360 |
+
output_state = meta_prompt_graph.run_node_graph(NODE_ACCEPTANCE_CRITERIA_DEVELOPER, state)
|
361 |
|
362 |
# Check if the output state contains the acceptance criteria
|
363 |
self.assertIsNotNone(output_state["acceptance_criteria"])
|
|
|
379 |
}
|
380 |
meta_prompt_graph = MetaPromptGraph(llms=llms)
|
381 |
state = AgentState(user_message="How do I reverse a list in Python?")
|
382 |
+
output_state = meta_prompt_graph.run_node_graph(NODE_PROMPT_INITIAL_DEVELOPER, state)
|
383 |
|
384 |
# Check if the output state contains the initial developer prompt
|
385 |
self.assertIsNotNone(output_state['system_message'])
|