Spaces:
Running
Running
Acceptance Criteria generating works now.
Browse files- app/gradio_meta_prompt.py +217 -72
- meta_prompt/__init__.py +1 -0
- meta_prompt/consts.py +32 -23
- meta_prompt/meta_prompt.py +61 -19
- tests/meta_prompt_graph_test.py +22 -0
app/gradio_meta_prompt.py
CHANGED
@@ -181,38 +181,39 @@ def on_model_tab_select(event: gr.SelectData):
|
|
181 |
active_model_tab = event.value
|
182 |
|
183 |
|
184 |
-
def
|
185 |
-
|
186 |
-
|
|
|
187 |
"""
|
188 |
Retrieve and return a language model (LLM) based on the currently active model tab.
|
189 |
|
190 |
-
This function uses a mapping to associate model tab names with their corresponding
|
191 |
-
It then looks up the configuration for the selected
|
192 |
-
configuration, creates an instance of the appropriate type of language
|
193 |
-
configuration, and returns it. If the active model tab is not found
|
194 |
-
will be used as a default.
|
195 |
|
196 |
Args:
|
197 |
-
simple_model_name (str): The name of the simple language model.
|
198 |
-
|
199 |
-
advanced_model_name (str): The name of the advanced language model.
|
200 |
-
|
201 |
-
expert_model_name (str): The name of the expert language model.
|
202 |
-
|
203 |
-
expert_model_config (Optional[Dict[str, Any]]): Optional configuration for the
|
204 |
-
This configuration will be used to update the
|
205 |
-
model tab is "Expert". Defaults to None.
|
206 |
|
207 |
Returns:
|
208 |
-
BaseLanguageModel: An instance of a language model that inherits from
|
209 |
-
|
|
|
210 |
|
211 |
Raises:
|
212 |
-
ValueError: If the active model tab is not found in the mapping or if the model
|
213 |
-
configuration is invalid.
|
214 |
-
RuntimeError: If an unexpected error occurs while retrieving the
|
215 |
-
|
216 |
"""
|
217 |
model_mapping = {
|
218 |
"Simple": simple_model_name,
|
@@ -221,16 +222,16 @@ def get_current_executor_model(simple_model_name: str,
|
|
221 |
}
|
222 |
|
223 |
try:
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
|
229 |
# Update the configuration with the expert model configurations if provided
|
230 |
if active_model_tab == "Expert" and expert_model_config:
|
231 |
-
|
232 |
|
233 |
-
return LLMModelFactory().create(
|
234 |
|
235 |
except KeyError as e:
|
236 |
logging.error(f"Configuration key error: {e}")
|
@@ -238,7 +239,7 @@ def get_current_executor_model(simple_model_name: str,
|
|
238 |
|
239 |
except Exception as e:
|
240 |
logging.error(f"An unexpected error occurred: {e}")
|
241 |
-
raise RuntimeError(f"Failed to retrieve the
|
242 |
|
243 |
|
244 |
def evaluate_system_message(system_message, user_message,
|
@@ -265,7 +266,7 @@ def evaluate_system_message(system_message, user_message,
|
|
265 |
gr.Error: If there is a Gradio-specific error during the execution of this function.
|
266 |
Exception: For any other unexpected errors that occur during the execution of this function.
|
267 |
"""
|
268 |
-
llm =
|
269 |
advanced_executor_model,
|
270 |
expert_executor_model, {"temperature": expert_execuor_model_temperature})
|
271 |
template = ChatPromptTemplate.from_messages([
|
@@ -282,6 +283,142 @@ def evaluate_system_message(system_message, user_message,
|
|
282 |
raise gr.Error(f"Error: {e}")
|
283 |
|
284 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
285 |
def process_message(user_message: str, expected_output: str,
|
286 |
acceptance_criteria: str, initial_system_message: str,
|
287 |
recursion_limit: int, max_output_age: int,
|
@@ -464,6 +601,7 @@ def process_message_with_2_llms(user_message: str, expected_output: str,
|
|
464 |
optimizer_model = initialize_llm(optimizer_model_name)
|
465 |
executor_model = initialize_llm(executor_model_name)
|
466 |
llms = {
|
|
|
467 |
NODE_PROMPT_INITIAL_DEVELOPER: optimizer_model,
|
468 |
NODE_PROMPT_DEVELOPER: optimizer_model,
|
469 |
NODE_PROMPT_EXECUTOR: executor_model,
|
@@ -479,6 +617,7 @@ def process_message_with_expert_llms(user_message: str, expected_output: str,
|
|
479 |
acceptance_criteria: str, initial_system_message: str,
|
480 |
recursion_limit: int, max_output_age: int,
|
481 |
initial_developer_model_name: str, initial_developer_temperature: float,
|
|
|
482 |
developer_model_name: str, developer_temperature: float,
|
483 |
executor_model_name: str, executor_temperature: float,
|
484 |
output_history_analyzer_model_name: str, output_history_analyzer_temperature: float,
|
@@ -488,6 +627,7 @@ def process_message_with_expert_llms(user_message: str, expected_output: str,
|
|
488 |
|
489 |
llms = {
|
490 |
NODE_PROMPT_INITIAL_DEVELOPER: initialize_llm(initial_developer_model_name, {"temperature": initial_developer_temperature}),
|
|
|
491 |
NODE_PROMPT_DEVELOPER: initialize_llm(developer_model_name, {"temperature": developer_temperature}),
|
492 |
NODE_PROMPT_EXECUTOR: initialize_llm(executor_model_name, {"temperature": executor_temperature}),
|
493 |
NODE_OUTPUT_HISTORY_ANALYZER: initialize_llm(output_history_analyzer_model_name, {"temperature": output_history_analyzer_temperature}),
|
@@ -498,25 +638,6 @@ def process_message_with_expert_llms(user_message: str, expected_output: str,
|
|
498 |
recursion_limit, max_output_age, llms, prompt_template_group=prompt_template_group)
|
499 |
|
500 |
|
501 |
-
def generate_acceptance_criteria(user_message, expected_output, model_name):
|
502 |
-
"""
|
503 |
-
Generate acceptance criteria based on the user message and expected output.
|
504 |
-
"""
|
505 |
-
prompt = f"""Given the following user message and expected output, generate appropriate acceptance criteria:
|
506 |
-
|
507 |
-
User Message: {user_message}
|
508 |
-
Expected Output: {expected_output}
|
509 |
-
|
510 |
-
Generate concise and specific acceptance criteria that can be used to evaluate the quality and relevance of the expected output in relation to the user message. The criteria should focus on key aspects such as relevance, accuracy, completeness, and clarity.
|
511 |
-
|
512 |
-
Acceptance Criteria:
|
513 |
-
"""
|
514 |
-
|
515 |
-
llm = initialize_llm(model_name)
|
516 |
-
response = llm.invoke(prompt)
|
517 |
-
return response.content if hasattr(response, 'content') else ""
|
518 |
-
|
519 |
-
|
520 |
class FileConfig(BaseConfig):
|
521 |
config_file: str = 'config.yml' # default path
|
522 |
|
@@ -554,29 +675,28 @@ with gr.Blocks(title='Meta Prompt') as demo:
|
|
554 |
show_copy_button=True
|
555 |
)
|
556 |
with gr.Group():
|
557 |
-
|
558 |
-
|
559 |
-
|
560 |
-
|
561 |
-
|
562 |
-
|
563 |
-
|
564 |
-
|
565 |
-
variant="secondary",
|
566 |
-
scale=1 # This makes it take up 1/4 of the row width
|
567 |
-
)
|
568 |
with gr.Group():
|
|
|
|
|
|
|
|
|
|
|
569 |
with gr.Row():
|
570 |
-
initial_system_message_input = gr.Textbox(
|
571 |
-
label="Initial System Message",
|
572 |
-
show_copy_button=True,
|
573 |
-
value="",
|
574 |
-
scale=4
|
575 |
-
)
|
576 |
evaluate_initial_system_message_button = gr.Button(
|
577 |
value="Evaluate",
|
578 |
-
variant="secondary"
|
579 |
-
|
|
|
|
|
|
|
580 |
)
|
581 |
recursion_limit_input = gr.Number(
|
582 |
label="Recursion Limit",
|
@@ -600,7 +720,7 @@ with gr.Blocks(title='Meta Prompt') as demo:
|
|
600 |
value=list(config.prompt_templates.keys())[0]
|
601 |
)
|
602 |
with gr.Row():
|
603 |
-
with gr.Tabs():
|
604 |
with gr.Tab('Simple') as simple_llm_tab:
|
605 |
simple_model_name_input = gr.Dropdown(
|
606 |
label="Model Name",
|
@@ -646,6 +766,17 @@ with gr.Blocks(title='Meta Prompt') as demo:
|
|
646 |
precision=1, minimum=0, maximum=1, step=0.1,
|
647 |
interactive=True)
|
648 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
649 |
with gr.Row():
|
650 |
expert_prompt_developer_model_name_input = gr.Dropdown(
|
651 |
label="Developer Model Name",
|
@@ -748,8 +879,21 @@ with gr.Blocks(title='Meta Prompt') as demo:
|
|
748 |
|
749 |
generate_acceptance_criteria_button.click(
|
750 |
generate_acceptance_criteria,
|
751 |
-
inputs=[user_message_input, expected_output_input,
|
752 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
753 |
)
|
754 |
|
755 |
evaluate_initial_system_message_button.click(
|
@@ -830,6 +974,7 @@ with gr.Blocks(title='Meta Prompt') as demo:
|
|
830 |
recursion_limit_input,
|
831 |
max_output_age,
|
832 |
expert_prompt_initial_developer_model_name_input, expert_prompt_initial_developer_temperature_input,
|
|
|
833 |
expert_prompt_developer_model_name_input, expert_prompt_developer_temperature_input,
|
834 |
expert_prompt_executor_model_name_input, expert_prompt_executor_temperature_input,
|
835 |
expert_output_history_analyzer_model_name_input, expert_output_history_analyzer_temperature_input,
|
|
|
181 |
active_model_tab = event.value
|
182 |
|
183 |
|
184 |
+
def get_current_model(simple_model_name: str,
|
185 |
+
advanced_model_name: str,
|
186 |
+
expert_model_name: str,
|
187 |
+
expert_model_config: Optional[Dict[str, Any]] = None) -> BaseLanguageModel:
|
188 |
"""
|
189 |
Retrieve and return a language model (LLM) based on the currently active model tab.
|
190 |
|
191 |
+
This function uses a mapping to associate model tab names with their corresponding
|
192 |
+
model names. It then looks up the configuration for the selected model in the
|
193 |
+
application's configuration, creates an instance of the appropriate type of language
|
194 |
+
model using that configuration, and returns it. If the active model tab is not found
|
195 |
+
in the mapping, the simple model will be used as a default.
|
196 |
|
197 |
Args:
|
198 |
+
simple_model_name (str): The name of the simple language model. This should
|
199 |
+
correspond to a key in the 'llms' section of the application's configuration.
|
200 |
+
advanced_model_name (str): The name of the advanced language model. This should
|
201 |
+
correspond to a key in the 'llms' section of the application's configuration.
|
202 |
+
expert_model_name (str): The name of the expert language model. This should
|
203 |
+
correspond to a key in the 'llms' section of the application's configuration.
|
204 |
+
expert_model_config (Optional[Dict[str, Any]]): Optional configuration for the
|
205 |
+
expert model. This configuration will be used to update the model configuration
|
206 |
+
if the active model tab is "Expert". Defaults to None.
|
207 |
|
208 |
Returns:
|
209 |
+
BaseLanguageModel: An instance of a language model that inherits from
|
210 |
+
BaseLanguageModel, based on the currently active model tab and the provided
|
211 |
+
model names.
|
212 |
|
213 |
Raises:
|
214 |
+
ValueError: If the active model tab is not found in the mapping or if the model
|
215 |
+
name or configuration is invalid.
|
216 |
+
RuntimeError: If an unexpected error occurs while retrieving the model.
|
|
|
217 |
"""
|
218 |
model_mapping = {
|
219 |
"Simple": simple_model_name,
|
|
|
222 |
}
|
223 |
|
224 |
try:
|
225 |
+
model_name = model_mapping.get(active_model_tab, simple_model_name)
|
226 |
+
model = config.llms[model_name]
|
227 |
+
model_type = model.type
|
228 |
+
model_config = model.model_dump(exclude={'type'})
|
229 |
|
230 |
# Update the configuration with the expert model configurations if provided
|
231 |
if active_model_tab == "Expert" and expert_model_config:
|
232 |
+
model_config.update(expert_model_config)
|
233 |
|
234 |
+
return LLMModelFactory().create(model_type, **model_config)
|
235 |
|
236 |
except KeyError as e:
|
237 |
logging.error(f"Configuration key error: {e}")
|
|
|
239 |
|
240 |
except Exception as e:
|
241 |
logging.error(f"An unexpected error occurred: {e}")
|
242 |
+
raise RuntimeError(f"Failed to retrieve the model: {e}")
|
243 |
|
244 |
|
245 |
def evaluate_system_message(system_message, user_message,
|
|
|
266 |
gr.Error: If there is a Gradio-specific error during the execution of this function.
|
267 |
Exception: For any other unexpected errors that occur during the execution of this function.
|
268 |
"""
|
269 |
+
llm = get_current_model(simple_model,
|
270 |
advanced_executor_model,
|
271 |
expert_executor_model, {"temperature": expert_execuor_model_temperature})
|
272 |
template = ChatPromptTemplate.from_messages([
|
|
|
283 |
raise gr.Error(f"Error: {e}")
|
284 |
|
285 |
|
286 |
+
def generate_acceptance_criteria(user_message, expected_output,
|
287 |
+
simple_model, advanced_executor_model,
|
288 |
+
expert_prompt_acceptance_criteria_model,
|
289 |
+
expert_prompt_acceptance_criteria_temperature=0.1,
|
290 |
+
prompt_template_group: Optional[str] = None):
|
291 |
+
"""
|
292 |
+
Generate acceptance criteria based on the user message and expected output.
|
293 |
+
|
294 |
+
This function uses the MetaPromptGraph's run_acceptance_criteria_graph method
|
295 |
+
to generate acceptance criteria.
|
296 |
+
|
297 |
+
Args:
|
298 |
+
user_message (str): The user's input message.
|
299 |
+
expected_output (str): The anticipated response or outcome from the language
|
300 |
+
model based on the user's message.
|
301 |
+
simple_model (str): The name of the simple language model.
|
302 |
+
advanced_executor_model (str): The name of the advanced language model.
|
303 |
+
expert_prompt_acceptance_criteria_model (str): The name of the expert language
|
304 |
+
model.
|
305 |
+
expert_prompt_acceptance_criteria_temperature (float, optional): The temperature
|
306 |
+
parameter for the expert model. Defaults to 0.1.
|
307 |
+
prompt_template_group (Optional[str], optional): The group of prompt templates
|
308 |
+
to use. Defaults to None.
|
309 |
+
|
310 |
+
Returns:
|
311 |
+
str: The generated acceptance criteria.
|
312 |
+
"""
|
313 |
+
|
314 |
+
log_stream = io.StringIO()
|
315 |
+
logger = logging.getLogger(MetaPromptGraph.__name__) if config.verbose else None
|
316 |
+
log_handler = logging.StreamHandler(log_stream) if logger else None
|
317 |
+
|
318 |
+
if log_handler:
|
319 |
+
log_handler.setFormatter(
|
320 |
+
jsonlogger.JsonFormatter('%(asctime)s %(name)s %(levelname)s %(message)s')
|
321 |
+
)
|
322 |
+
logger.addHandler(log_handler)
|
323 |
+
|
324 |
+
llm = get_current_model(simple_model, advanced_executor_model,
|
325 |
+
expert_prompt_acceptance_criteria_model,
|
326 |
+
{"temperature": expert_prompt_acceptance_criteria_temperature})
|
327 |
+
if prompt_template_group is None:
|
328 |
+
prompt_template_group = 'default'
|
329 |
+
prompt_templates = prompt_templates_confz2langchain(
|
330 |
+
config.prompt_templates[prompt_template_group]
|
331 |
+
)
|
332 |
+
acceptance_criteria_graph = MetaPromptGraph(llms={
|
333 |
+
NODE_ACCEPTANCE_CRITERIA_DEVELOPER: llm
|
334 |
+
}, prompts=prompt_templates,
|
335 |
+
verbose=config.verbose, logger=logger)
|
336 |
+
state = AgentState(
|
337 |
+
user_message=user_message,
|
338 |
+
expected_output=expected_output
|
339 |
+
)
|
340 |
+
output_state = acceptance_criteria_graph.run_acceptance_criteria_graph(state)
|
341 |
+
|
342 |
+
if log_handler:
|
343 |
+
log_handler.close()
|
344 |
+
log_output = log_stream.getvalue()
|
345 |
+
else:
|
346 |
+
log_output = None
|
347 |
+
return output_state.get('acceptance_criteria', ""), chat_log_2_chatbot_list(log_output)
|
348 |
+
|
349 |
+
|
350 |
+
def generate_initial_system_message(
|
351 |
+
user_message: str,
|
352 |
+
expected_output: str,
|
353 |
+
simple_model: str,
|
354 |
+
advanced_executor_model: str,
|
355 |
+
expert_prompt_initial_developer_model: str,
|
356 |
+
expert_prompt_initial_developer_temperature: float = 0.1,
|
357 |
+
prompt_template_group: Optional[str] = None
|
358 |
+
) -> tuple:
|
359 |
+
"""
|
360 |
+
Generate an initial system message based on the user message and expected output.
|
361 |
+
|
362 |
+
Args:
|
363 |
+
user_message (str): The user's input message.
|
364 |
+
expected_output (str): The anticipated response or outcome from the language model.
|
365 |
+
simple_model (str): The name of the simple language model.
|
366 |
+
advanced_executor_model (str): The name of the advanced language model.
|
367 |
+
expert_prompt_initial_developer_model (str): The name of the expert language model.
|
368 |
+
expert_prompt_initial_developer_temperature (float, optional): The temperature parameter for the expert model. Defaults to 0.1.
|
369 |
+
prompt_template_group (Optional[str], optional): The group of prompt templates to use. Defaults to None.
|
370 |
+
|
371 |
+
Returns:
|
372 |
+
tuple: A tuple containing the initial system message and the chat log.
|
373 |
+
"""
|
374 |
+
|
375 |
+
log_stream = io.StringIO()
|
376 |
+
logger = logging.getLogger(MetaPromptGraph.__name__) if config.verbose else None
|
377 |
+
log_handler = logging.StreamHandler(log_stream) if logger else None
|
378 |
+
|
379 |
+
if log_handler:
|
380 |
+
log_handler.setFormatter(
|
381 |
+
jsonlogger.JsonFormatter('%(asctime)s %(name)s %(levelname)s %(message)s')
|
382 |
+
)
|
383 |
+
logger.addHandler(log_handler)
|
384 |
+
|
385 |
+
llm = get_current_model(
|
386 |
+
simple_model,
|
387 |
+
advanced_executor_model,
|
388 |
+
expert_prompt_initial_developer_model,
|
389 |
+
{"temperature": expert_prompt_initial_developer_temperature}
|
390 |
+
)
|
391 |
+
|
392 |
+
if prompt_template_group is None:
|
393 |
+
prompt_template_group = 'default'
|
394 |
+
prompt_templates = prompt_templates_confz2langchain(
|
395 |
+
config.prompt_templates[prompt_template_group]
|
396 |
+
)
|
397 |
+
|
398 |
+
initial_system_message_graph = MetaPromptGraph(
|
399 |
+
llms={NODE_PROMPT_INITIAL_DEVELOPER: llm},
|
400 |
+
prompts=prompt_templates,
|
401 |
+
verbose=config.verbose,
|
402 |
+
logger=logger
|
403 |
+
)
|
404 |
+
|
405 |
+
state = AgentState(
|
406 |
+
user_message=user_message,
|
407 |
+
expected_output=expected_output
|
408 |
+
)
|
409 |
+
|
410 |
+
output_state = initial_system_message_graph.run_prompt_initial_developer_graph(state)
|
411 |
+
|
412 |
+
if log_handler:
|
413 |
+
log_handler.close()
|
414 |
+
log_output = log_stream.getvalue()
|
415 |
+
else:
|
416 |
+
log_output = None
|
417 |
+
|
418 |
+
system_message = output_state.get('system_message', "")
|
419 |
+
return system_message, chat_log_2_chatbot_list(log_output)
|
420 |
+
|
421 |
+
|
422 |
def process_message(user_message: str, expected_output: str,
|
423 |
acceptance_criteria: str, initial_system_message: str,
|
424 |
recursion_limit: int, max_output_age: int,
|
|
|
601 |
optimizer_model = initialize_llm(optimizer_model_name)
|
602 |
executor_model = initialize_llm(executor_model_name)
|
603 |
llms = {
|
604 |
+
NODE_ACCEPTANCE_CRITERIA_DEVELOPER: optimizer_model,
|
605 |
NODE_PROMPT_INITIAL_DEVELOPER: optimizer_model,
|
606 |
NODE_PROMPT_DEVELOPER: optimizer_model,
|
607 |
NODE_PROMPT_EXECUTOR: executor_model,
|
|
|
617 |
acceptance_criteria: str, initial_system_message: str,
|
618 |
recursion_limit: int, max_output_age: int,
|
619 |
initial_developer_model_name: str, initial_developer_temperature: float,
|
620 |
+
acceptance_criteria_model_name: str, acceptance_criteria_temperature: float,
|
621 |
developer_model_name: str, developer_temperature: float,
|
622 |
executor_model_name: str, executor_temperature: float,
|
623 |
output_history_analyzer_model_name: str, output_history_analyzer_temperature: float,
|
|
|
627 |
|
628 |
llms = {
|
629 |
NODE_PROMPT_INITIAL_DEVELOPER: initialize_llm(initial_developer_model_name, {"temperature": initial_developer_temperature}),
|
630 |
+
NODE_ACCEPTANCE_CRITERIA_DEVELOPER: initialize_llm(acceptance_criteria_model_name, {"temperature": acceptance_criteria_temperature}),
|
631 |
NODE_PROMPT_DEVELOPER: initialize_llm(developer_model_name, {"temperature": developer_temperature}),
|
632 |
NODE_PROMPT_EXECUTOR: initialize_llm(executor_model_name, {"temperature": executor_temperature}),
|
633 |
NODE_OUTPUT_HISTORY_ANALYZER: initialize_llm(output_history_analyzer_model_name, {"temperature": output_history_analyzer_temperature}),
|
|
|
638 |
recursion_limit, max_output_age, llms, prompt_template_group=prompt_template_group)
|
639 |
|
640 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
641 |
class FileConfig(BaseConfig):
|
642 |
config_file: str = 'config.yml' # default path
|
643 |
|
|
|
675 |
show_copy_button=True
|
676 |
)
|
677 |
with gr.Group():
|
678 |
+
acceptance_criteria_input = gr.Textbox(
|
679 |
+
label="Acceptance Criteria (Compared with Expected Output [EO])",
|
680 |
+
show_copy_button=True
|
681 |
+
)
|
682 |
+
generate_acceptance_criteria_button = gr.Button(
|
683 |
+
value="Generate",
|
684 |
+
variant="secondary"
|
685 |
+
)
|
|
|
|
|
|
|
686 |
with gr.Group():
|
687 |
+
initial_system_message_input = gr.Textbox(
|
688 |
+
label="Initial System Message",
|
689 |
+
show_copy_button=True,
|
690 |
+
value=""
|
691 |
+
)
|
692 |
with gr.Row():
|
|
|
|
|
|
|
|
|
|
|
|
|
693 |
evaluate_initial_system_message_button = gr.Button(
|
694 |
value="Evaluate",
|
695 |
+
variant="secondary"
|
696 |
+
)
|
697 |
+
generate_initial_system_message_button = gr.Button(
|
698 |
+
value="Generate",
|
699 |
+
variant="secondary"
|
700 |
)
|
701 |
recursion_limit_input = gr.Number(
|
702 |
label="Recursion Limit",
|
|
|
720 |
value=list(config.prompt_templates.keys())[0]
|
721 |
)
|
722 |
with gr.Row():
|
723 |
+
with gr.Tabs() as llm_tabs:
|
724 |
with gr.Tab('Simple') as simple_llm_tab:
|
725 |
simple_model_name_input = gr.Dropdown(
|
726 |
label="Model Name",
|
|
|
766 |
precision=1, minimum=0, maximum=1, step=0.1,
|
767 |
interactive=True)
|
768 |
|
769 |
+
with gr.Row():
|
770 |
+
expert_prompt_acceptance_criteria_model_name_input = gr.Dropdown(
|
771 |
+
label="Acceptance Criteria Model Name",
|
772 |
+
choices=config.llms.keys(),
|
773 |
+
value=list(config.llms.keys())[0],
|
774 |
+
)
|
775 |
+
expert_prompt_acceptance_criteria_temperature_input = gr.Number(
|
776 |
+
label="Acceptance Criteria Temperature", value=0.1,
|
777 |
+
precision=1, minimum=0, maximum=1, step=0.1,
|
778 |
+
interactive=True)
|
779 |
+
|
780 |
with gr.Row():
|
781 |
expert_prompt_developer_model_name_input = gr.Dropdown(
|
782 |
label="Developer Model Name",
|
|
|
879 |
|
880 |
generate_acceptance_criteria_button.click(
|
881 |
generate_acceptance_criteria,
|
882 |
+
inputs=[user_message_input, expected_output_input,
|
883 |
+
simple_model_name_input,
|
884 |
+
advanced_optimizer_model_name_input,
|
885 |
+
expert_prompt_acceptance_criteria_model_name_input, expert_prompt_acceptance_criteria_temperature_input],
|
886 |
+
outputs=[acceptance_criteria_input, logs_chatbot]
|
887 |
+
)
|
888 |
+
generate_initial_system_message_button.click(
|
889 |
+
generate_initial_system_message,
|
890 |
+
inputs=[user_message_input, expected_output_input,
|
891 |
+
simple_model_name_input,
|
892 |
+
advanced_optimizer_model_name_input,
|
893 |
+
expert_prompt_initial_developer_model_name_input,
|
894 |
+
expert_prompt_initial_developer_temperature_input,
|
895 |
+
prompt_template_group],
|
896 |
+
outputs=[initial_system_message_input, logs_chatbot]
|
897 |
)
|
898 |
|
899 |
evaluate_initial_system_message_button.click(
|
|
|
974 |
recursion_limit_input,
|
975 |
max_output_age,
|
976 |
expert_prompt_initial_developer_model_name_input, expert_prompt_initial_developer_temperature_input,
|
977 |
+
expert_prompt_acceptance_criteria_model_name_input, expert_prompt_acceptance_criteria_temperature_input,
|
978 |
expert_prompt_developer_model_name_input, expert_prompt_developer_temperature_input,
|
979 |
expert_prompt_executor_model_name_input, expert_prompt_executor_temperature_input,
|
980 |
expert_output_history_analyzer_model_name_input, expert_output_history_analyzer_temperature_input,
|
meta_prompt/__init__.py
CHANGED
@@ -3,6 +3,7 @@ __version__ = '0.1.0'
|
|
3 |
from .meta_prompt import AgentState, MetaPromptGraph
|
4 |
from .consts import (
|
5 |
META_PROMPT_NODES,
|
|
|
6 |
NODE_PROMPT_INITIAL_DEVELOPER,
|
7 |
NODE_PROMPT_DEVELOPER,
|
8 |
NODE_PROMPT_EXECUTOR,
|
|
|
3 |
from .meta_prompt import AgentState, MetaPromptGraph
|
4 |
from .consts import (
|
5 |
META_PROMPT_NODES,
|
6 |
+
NODE_ACCEPTANCE_CRITERIA_DEVELOPER,
|
7 |
NODE_PROMPT_INITIAL_DEVELOPER,
|
8 |
NODE_PROMPT_DEVELOPER,
|
9 |
NODE_PROMPT_EXECUTOR,
|
meta_prompt/consts.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
from langchain_core.prompts import ChatPromptTemplate
|
2 |
|
3 |
-
NODE_TASK_BRIEF_DEVELOPER = "task_brief_developer"
|
4 |
NODE_ACCEPTANCE_CRITERIA_DEVELOPER = "acceptance_criteria_developer"
|
5 |
NODE_PROMPT_INITIAL_DEVELOPER = "prompt_initial_developer"
|
6 |
NODE_PROMPT_DEVELOPER = "prompt_developer"
|
@@ -10,7 +10,7 @@ NODE_PROMPT_ANALYZER = "prompt_analyzer"
|
|
10 |
NODE_PROMPT_SUGGESTER = "prompt_suggester"
|
11 |
|
12 |
META_PROMPT_NODES = [
|
13 |
-
|
14 |
NODE_ACCEPTANCE_CRITERIA_DEVELOPER,
|
15 |
NODE_PROMPT_INITIAL_DEVELOPER,
|
16 |
NODE_PROMPT_DEVELOPER,
|
@@ -21,34 +21,35 @@ META_PROMPT_NODES = [
|
|
21 |
]
|
22 |
|
23 |
DEFAULT_PROMPT_TEMPLATES = {
|
24 |
-
|
25 |
-
|
26 |
|
27 |
-
You are a task brief developer. You will receive a specific example to create a task brief. You will respond directly with the brief for the task type.
|
28 |
|
29 |
-
## Instructions
|
30 |
|
31 |
-
The user will provide you a specific example with User Message (input) and Expected Output (output) of a task type. You will respond with a brief for the task type in the following format:
|
32 |
|
33 |
-
```
|
34 |
-
# Task Description
|
35 |
|
36 |
-
[Task description]
|
37 |
-
```
|
38 |
|
39 |
-
"""),
|
40 |
-
|
41 |
|
42 |
-
{user_message}
|
43 |
|
44 |
-
# Expected Output
|
45 |
|
46 |
-
{expected_output}
|
47 |
|
48 |
-
# Task Brief
|
|
|
|
|
|
|
49 |
|
50 |
-
""")
|
51 |
-
]),
|
52 |
NODE_ACCEPTANCE_CRITERIA_DEVELOPER: ChatPromptTemplate.from_messages([
|
53 |
("system", """# Acceptance Criteria Developer
|
54 |
|
@@ -56,11 +57,15 @@ You are an acceptance criteria developer. You will receive a specific example of
|
|
56 |
|
57 |
## Instructions
|
58 |
|
59 |
-
The user will provide you a specific example with User Message (input) and Expected Output (output) of a task type. You will respond with acceptance criteria for the task type includes the following:
|
60 |
|
61 |
* What the output should include
|
62 |
* What the output should not include
|
63 |
-
*
|
|
|
|
|
|
|
|
|
64 |
|
65 |
## Output
|
66 |
|
@@ -68,10 +73,14 @@ Create acceptance criteria in the following format:
|
|
68 |
|
69 |
```
|
70 |
# Acceptance Criteria
|
71 |
-
|
72 |
* [Criteria 1]
|
73 |
* [Criteria 2]
|
74 |
-
*
|
|
|
|
|
|
|
|
|
75 |
```
|
76 |
|
77 |
"""),
|
|
|
1 |
from langchain_core.prompts import ChatPromptTemplate
|
2 |
|
3 |
+
# NODE_TASK_BRIEF_DEVELOPER = "task_brief_developer"
|
4 |
NODE_ACCEPTANCE_CRITERIA_DEVELOPER = "acceptance_criteria_developer"
|
5 |
NODE_PROMPT_INITIAL_DEVELOPER = "prompt_initial_developer"
|
6 |
NODE_PROMPT_DEVELOPER = "prompt_developer"
|
|
|
10 |
NODE_PROMPT_SUGGESTER = "prompt_suggester"
|
11 |
|
12 |
META_PROMPT_NODES = [
|
13 |
+
# NODE_TASK_BRIEF_DEVELOPER,
|
14 |
NODE_ACCEPTANCE_CRITERIA_DEVELOPER,
|
15 |
NODE_PROMPT_INITIAL_DEVELOPER,
|
16 |
NODE_PROMPT_DEVELOPER,
|
|
|
21 |
]
|
22 |
|
23 |
DEFAULT_PROMPT_TEMPLATES = {
|
24 |
+
# NODE_TASK_BRIEF_DEVELOPER: ChatPromptTemplate.from_messages([
|
25 |
+
# ("system", """# Task Brief Developer
|
26 |
|
27 |
+
# You are a task brief developer. You will receive a specific example to create a task brief. You will respond directly with the brief for the task type.
|
28 |
|
29 |
+
# ## Instructions
|
30 |
|
31 |
+
# The user will provide you a specific example with User Message (input) and Expected Output (output) of a task type. You will respond with a brief for the task type in the following format:
|
32 |
|
33 |
+
# ```
|
34 |
+
# # Task Description
|
35 |
|
36 |
+
# [Task description]
|
37 |
+
# ```
|
38 |
|
39 |
+
# """),
|
40 |
+
# ("human", """# User Message
|
41 |
|
42 |
+
# {user_message}
|
43 |
|
44 |
+
# # Expected Output
|
45 |
|
46 |
+
# {expected_output}
|
47 |
|
48 |
+
# # Task Brief
|
49 |
+
|
50 |
+
# """)
|
51 |
+
# ]),
|
52 |
|
|
|
|
|
53 |
NODE_ACCEPTANCE_CRITERIA_DEVELOPER: ChatPromptTemplate.from_messages([
|
54 |
("system", """# Acceptance Criteria Developer
|
55 |
|
|
|
57 |
|
58 |
## Instructions
|
59 |
|
60 |
+
The user will provide you a specific example with User Message (input) and Expected Output (output) of a task type. You will respond with acceptance criteria for the task type, by comparing with Expected Output (which may be referenced as EO), includes the following:
|
61 |
|
62 |
* What the output should include
|
63 |
* What the output should not include
|
64 |
+
* Language requirements
|
65 |
+
* Formatting requirements
|
66 |
+
* Structure requirements
|
67 |
+
* Style requirements
|
68 |
+
* Any specific requirements
|
69 |
|
70 |
## Output
|
71 |
|
|
|
73 |
|
74 |
```
|
75 |
# Acceptance Criteria
|
76 |
+
|
77 |
* [Criteria 1]
|
78 |
* [Criteria 2]
|
79 |
+
* ...
|
80 |
+
* Unacceptable differences (comapire with EO):
|
81 |
+
* ...
|
82 |
+
* Acceptable differences (comapire with EO):
|
83 |
+
* ...
|
84 |
```
|
85 |
|
86 |
"""),
|
meta_prompt/meta_prompt.py
CHANGED
@@ -127,7 +127,19 @@ class MetaPromptGraph:
|
|
127 |
return workflow
|
128 |
|
129 |
|
130 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
"""Create a workflow state graph.
|
132 |
|
133 |
Args:
|
@@ -184,30 +196,61 @@ class MetaPromptGraph:
|
|
184 |
}
|
185 |
)
|
186 |
|
187 |
-
# Set entry point based on including_initial_developer flag
|
188 |
-
if including_initial_developer:
|
189 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
lambda x: self._prompt_node(
|
191 |
NODE_PROMPT_INITIAL_DEVELOPER,
|
192 |
"system_message",
|
193 |
-
x)
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
199 |
|
200 |
return workflow
|
|
|
201 |
|
202 |
-
def run_acceptance_criteria_graph(self, state: AgentState
|
203 |
self.logger.debug("Creating acceptance criteria workflow")
|
204 |
workflow = self._create_acceptance_criteria_workflow()
|
205 |
-
self.logger.debug("Compiling workflow with memory saver")
|
206 |
memory = MemorySaver()
|
207 |
graph = workflow.compile(checkpointer=memory)
|
208 |
-
|
209 |
-
|
210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
211 |
self.logger.debug("Invoking graph with state: %s", pprint.pformat(state))
|
212 |
output_state = graph.invoke(state, config)
|
213 |
self.logger.debug("Output state: %s", pprint.pformat(output_state))
|
@@ -232,8 +275,7 @@ class MetaPromptGraph:
|
|
232 |
Returns:
|
233 |
AgentState: The output state of the agent after invoking the workflow.
|
234 |
"""
|
235 |
-
workflow = self._create_workflow(
|
236 |
-
state.system_message is None or state.system_message == ""))
|
237 |
|
238 |
memory = MemorySaver()
|
239 |
graph = workflow.compile(checkpointer=memory)
|
@@ -271,7 +313,7 @@ class MetaPromptGraph:
|
|
271 |
|
272 |
def _optional_action(
|
273 |
self, target_attribute: str,
|
274 |
-
action:
|
275 |
state: AgentState
|
276 |
) -> AgentState:
|
277 |
"""
|
@@ -280,7 +322,7 @@ class MetaPromptGraph:
|
|
280 |
Args:
|
281 |
node (str): Node identifier.
|
282 |
target_attribute (str): State attribute to be updated.
|
283 |
-
action (
|
284 |
state (AgentState): Current agent state.
|
285 |
|
286 |
Returns:
|
|
|
127 |
return workflow
|
128 |
|
129 |
|
130 |
+
def _create_prompt_initial_developer_workflow(self) -> StateGraph:
|
131 |
+
workflow = StateGraph(AgentState)
|
132 |
+
workflow.add_node(NODE_PROMPT_INITIAL_DEVELOPER,
|
133 |
+
lambda x: self._prompt_node(
|
134 |
+
NODE_PROMPT_INITIAL_DEVELOPER,
|
135 |
+
"system_message",
|
136 |
+
x))
|
137 |
+
workflow.add_edge(NODE_PROMPT_INITIAL_DEVELOPER, END)
|
138 |
+
workflow.set_entry_point(NODE_PROMPT_INITIAL_DEVELOPER)
|
139 |
+
return workflow
|
140 |
+
|
141 |
+
|
142 |
+
def _create_workflow(self) -> StateGraph:
|
143 |
"""Create a workflow state graph.
|
144 |
|
145 |
Args:
|
|
|
196 |
}
|
197 |
)
|
198 |
|
199 |
+
# # Set entry point based on including_initial_developer flag
|
200 |
+
# if including_initial_developer:
|
201 |
+
# workflow.add_node(NODE_PROMPT_INITIAL_DEVELOPER,
|
202 |
+
# lambda x: self._prompt_node(
|
203 |
+
# NODE_PROMPT_INITIAL_DEVELOPER,
|
204 |
+
# "system_message",
|
205 |
+
# x))
|
206 |
+
# workflow.add_edge(NODE_PROMPT_INITIAL_DEVELOPER,
|
207 |
+
# NODE_PROMPT_EXECUTOR)
|
208 |
+
# workflow.set_entry_point(NODE_PROMPT_INITIAL_DEVELOPER)
|
209 |
+
# else:
|
210 |
+
# workflow.set_entry_point(NODE_PROMPT_EXECUTOR)
|
211 |
+
|
212 |
+
workflow.add_node(NODE_PROMPT_INITIAL_DEVELOPER,
|
213 |
+
lambda x: self._optional_action(
|
214 |
+
"system_message",
|
215 |
lambda x: self._prompt_node(
|
216 |
NODE_PROMPT_INITIAL_DEVELOPER,
|
217 |
"system_message",
|
218 |
+
x),
|
219 |
+
x))
|
220 |
+
workflow.add_node(NODE_ACCEPTANCE_CRITERIA_DEVELOPER,
|
221 |
+
lambda x: self._optional_action(
|
222 |
+
"acceptance_criteria",
|
223 |
+
lambda x: self._prompt_node(
|
224 |
+
NODE_ACCEPTANCE_CRITERIA_DEVELOPER,
|
225 |
+
"acceptance_criteria",
|
226 |
+
x),
|
227 |
+
x))
|
228 |
+
|
229 |
+
workflow.add_edge(NODE_PROMPT_INITIAL_DEVELOPER, NODE_ACCEPTANCE_CRITERIA_DEVELOPER)
|
230 |
+
workflow.add_edge(NODE_ACCEPTANCE_CRITERIA_DEVELOPER, NODE_PROMPT_EXECUTOR)
|
231 |
+
workflow.set_entry_point(NODE_PROMPT_INITIAL_DEVELOPER)
|
232 |
|
233 |
return workflow
|
234 |
+
|
235 |
|
236 |
+
def run_acceptance_criteria_graph(self, state: AgentState) -> AgentState:
|
237 |
self.logger.debug("Creating acceptance criteria workflow")
|
238 |
workflow = self._create_acceptance_criteria_workflow()
|
|
|
239 |
memory = MemorySaver()
|
240 |
graph = workflow.compile(checkpointer=memory)
|
241 |
+
config = {"configurable": {"thread_id": "1"}}
|
242 |
+
self.logger.debug("Invoking graph with state: %s", pprint.pformat(state))
|
243 |
+
output_state = graph.invoke(state, config)
|
244 |
+
self.logger.debug("Output state: %s", pprint.pformat(output_state))
|
245 |
+
return output_state
|
246 |
+
|
247 |
+
|
248 |
+
def run_prompt_initial_developer_graph(self, state: AgentState) -> AgentState:
|
249 |
+
self.logger.debug("Creating prompt initial developer workflow")
|
250 |
+
workflow = self._create_prompt_initial_developer_workflow()
|
251 |
+
memory = MemorySaver()
|
252 |
+
graph = workflow.compile(checkpointer=memory)
|
253 |
+
config = {"configurable": {"thread_id": "1"}}
|
254 |
self.logger.debug("Invoking graph with state: %s", pprint.pformat(state))
|
255 |
output_state = graph.invoke(state, config)
|
256 |
self.logger.debug("Output state: %s", pprint.pformat(output_state))
|
|
|
275 |
Returns:
|
276 |
AgentState: The output state of the agent after invoking the workflow.
|
277 |
"""
|
278 |
+
workflow = self._create_workflow()
|
|
|
279 |
|
280 |
memory = MemorySaver()
|
281 |
graph = workflow.compile(checkpointer=memory)
|
|
|
313 |
|
314 |
def _optional_action(
|
315 |
self, target_attribute: str,
|
316 |
+
action: RunnableLike,
|
317 |
state: AgentState
|
318 |
) -> AgentState:
|
319 |
"""
|
|
|
322 |
Args:
|
323 |
node (str): Node identifier.
|
324 |
target_attribute (str): State attribute to be updated.
|
325 |
+
action (RunnableLike): Action to be invoked. Defaults to None.
|
326 |
state (AgentState): Current agent state.
|
327 |
|
328 |
Returns:
|
tests/meta_prompt_graph_test.py
CHANGED
@@ -316,6 +316,7 @@ class TestMetaPromptGraph(unittest.TestCase):
|
|
316 |
|
317 |
pprint.pp(output_state["acceptance_criteria"])
|
318 |
|
|
|
319 |
def test_run_acceptance_criteria_graph(self):
|
320 |
"""
|
321 |
Test the run_acceptance_criteria_graph method of MetaPromptGraph.
|
@@ -340,5 +341,26 @@ class TestMetaPromptGraph(unittest.TestCase):
|
|
340 |
self.assertIn("Acceptance criteria: ...", output_state['acceptance_criteria'])
|
341 |
|
342 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
343 |
if __name__ == '__main__':
|
344 |
unittest.main()
|
|
|
316 |
|
317 |
pprint.pp(output_state["acceptance_criteria"])
|
318 |
|
319 |
+
|
320 |
def test_run_acceptance_criteria_graph(self):
|
321 |
"""
|
322 |
Test the run_acceptance_criteria_graph method of MetaPromptGraph.
|
|
|
341 |
self.assertIn("Acceptance criteria: ...", output_state['acceptance_criteria'])
|
342 |
|
343 |
|
344 |
+
def test_run_prompt_initial_developer_graph(self):
|
345 |
+
"""
|
346 |
+
Test the run_prompt_initial_developer_graph method of MetaPromptGraph.
|
347 |
+
|
348 |
+
This test case verifies that the run_prompt_initial_developer_graph method returns a state with an initial developer prompt.
|
349 |
+
"""
|
350 |
+
llms = {
|
351 |
+
NODE_PROMPT_INITIAL_DEVELOPER: MagicMock(
|
352 |
+
invoke=lambda prompt: MagicMock(content="Initial developer prompt: ..."))
|
353 |
+
}
|
354 |
+
meta_prompt_graph = MetaPromptGraph(llms=llms)
|
355 |
+
state = AgentState(user_message="How do I reverse a list in Python?")
|
356 |
+
output_state = meta_prompt_graph.run_prompt_initial_developer_graph(state)
|
357 |
+
|
358 |
+
# Check if the output state contains the initial developer prompt
|
359 |
+
self.assertIsNotNone(output_state['system_message'])
|
360 |
+
|
361 |
+
# Check if the initial developer prompt includes the expected content
|
362 |
+
self.assertIn("Initial developer prompt: ...", output_state['system_message'])
|
363 |
+
|
364 |
+
|
365 |
if __name__ == '__main__':
|
366 |
unittest.main()
|