yaleh commited on
Commit
4ba958e
·
1 Parent(s): e5e33ac

Updated pydocs. Corrected the `evalucate` behavior.

Browse files
app/gradio_meta_prompt.py CHANGED
@@ -70,6 +70,20 @@ class SimplifiedCSVLogger(CSVLogger):
70
 
71
 
72
  class LLMModelFactory:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  _instance = None
74
 
75
  def __new__(cls):
@@ -77,12 +91,34 @@ class LLMModelFactory:
77
  cls._instance = super(LLMModelFactory, cls).__new__(cls)
78
  return cls._instance
79
 
80
- def create(self, model_type: str, **kwargs):
 
 
 
 
 
 
 
 
 
 
81
  model_class = globals()[model_type]
82
  return model_class(**kwargs)
83
 
84
 
85
  def chat_log_2_chatbot_list(chat_log: str):
 
 
 
 
 
 
 
 
 
 
 
 
86
  chatbot_list = []
87
  if chat_log is None or chat_log == '':
88
  return chatbot_list
@@ -91,9 +127,9 @@ def chat_log_2_chatbot_list(chat_log: str):
91
  json_line = json.loads(line)
92
  if 'action' in json_line:
93
  if json_line['action'] == 'invoke':
94
- chatbot_list.append([json_line['message'],None])
95
  if json_line['action'] == 'response':
96
- chatbot_list.append([None,json_line['message']])
97
  except json.decoder.JSONDecodeError as e:
98
  print(f"Error decoding JSON log output: {e}")
99
  print(line)
@@ -106,6 +142,16 @@ def chat_log_2_chatbot_list(chat_log: str):
106
  active_model_tab = "Simple"
107
 
108
  def on_model_tab_select(event: gr.SelectData):
 
 
 
 
 
 
 
 
 
 
109
  if not event.selected:
110
  return
111
 
@@ -113,59 +159,96 @@ def on_model_tab_select(event: gr.SelectData):
113
  active_model_tab = event.value
114
 
115
 
116
- def get_current_models(simple_model_name: str, optimizer_model_name: str, executor_model_name: str):
117
- optimizer_model_config = config.llms[optimizer_model_name if active_model_tab ==
118
- "Advanced" else simple_model_name]
119
- executor_model_config = config.llms[executor_model_name if active_model_tab ==
120
- "Advanced" else simple_model_name]
121
- optimizer_model = LLMModelFactory().create(optimizer_model_config.type,
122
- **optimizer_model_config.model_dump(exclude={'type'}))
123
- executor_model = LLMModelFactory().create(executor_model_config.type,
124
- **executor_model_config.model_dump(exclude={'type'}))
125
-
126
- return {
127
- NODE_PROMPT_INITIAL_DEVELOPER: optimizer_model,
128
- NODE_PROMPT_DEVELOPER: optimizer_model,
129
- NODE_PROMPT_EXECUTOR: executor_model,
130
- NODE_OUTPUT_HISTORY_ANALYZER: optimizer_model,
131
- NODE_PROMPT_ANALYZER: optimizer_model,
132
- NODE_PROMPT_SUGGESTER: optimizer_model
 
 
 
 
 
 
 
 
 
133
  }
 
 
 
 
 
134
 
 
 
 
 
 
135
 
136
- def get_current_executor_model(simple_model_name: str, executor_model_name: str):
137
- executor_model_config = config.llms[executor_model_name if active_model_tab ==
138
- "Advanced" else simple_model_name]
139
- executor_model = LLMModelFactory().create(executor_model_config.type,
140
- **executor_model_config.model_dump(exclude={'type'}))
141
- return executor_model
142
 
 
 
143
 
144
- def evaluate_system_message(system_message, user_message, simple_model, executor_model):
145
- llm = get_current_executor_model(simple_model, executor_model)
 
 
 
146
  template = ChatPromptTemplate.from_messages([
147
  ("system", "{system_message}"),
148
  ("human", "{user_message}")
149
  ])
150
- messages = template.format_messages(system_message=system_message, user_message=user_message)
151
  try:
152
- output = llm.invoke(messages)
 
 
 
 
153
  except Exception as e:
154
- if isinstance(e, gr.Error):
155
- raise e
156
- else:
157
- raise gr.Error(f"Error: {e}")
158
-
159
- if hasattr(output, 'content'):
160
- return output.content
161
- else:
162
- return ""
163
 
164
 
165
- def process_message(user_message, expected_output, acceptance_criteria,
166
- initial_system_message, recursion_limit: int,
167
- max_output_age: int,
168
- llms: Union[BaseLanguageModel, Dict[str, BaseLanguageModel]]):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  input_state = AgentState(
170
  user_message=user_message,
171
  expected_output=expected_output,
@@ -197,34 +280,107 @@ def process_message(user_message, expected_output, acceptance_criteria,
197
  else:
198
  log_output = None
199
 
200
- system_message = output_state.get('best_system_message', "Error: The output state does not contain a valid 'best_system_message'")
201
- output = output_state.get('best_output', "Error: The output state does not contain a valid 'best_output'")
202
- analysis = output_state.get('analysis', "Error: The output state does not contain a valid 'analysis'")
 
 
 
203
 
204
  return (system_message, output, analysis, chat_log_2_chatbot_list(log_output))
205
 
206
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
 
208
  def process_message_with_single_llm(user_message, expected_output, acceptance_criteria, initial_system_message,
209
  recursion_limit: int, max_output_age: int,
210
  model_name: str):
211
- # Get the output state from MetaPromptGraph
212
- type = config.llms[model_name].type
213
- args = config.llms[model_name].model_dump(exclude={'type'})
214
- llm = LLMModelFactory().create(type, **args)
215
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  return process_message(user_message, expected_output, acceptance_criteria, initial_system_message,
217
  recursion_limit, max_output_age, llm)
218
 
219
 
220
  def process_message_with_2_llms(user_message, expected_output, acceptance_criteria, initial_system_message,
221
  recursion_limit: int, max_output_age: int,
222
- optimizer_model_name: str, executor_model_name: str,):
223
- # Get the output state from MetaPromptGraph
224
- optimizer_model = LLMModelFactory().create(config.llms[optimizer_model_name].type,
225
- **config.llms[optimizer_model_name].model_dump(exclude={'type'}))
226
- executor_model = LLMModelFactory().create(config.llms[executor_model_name].type,
227
- **config.llms[executor_model_name].model_dump(exclude={'type'}))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  llms = {
229
  NODE_PROMPT_INITIAL_DEVELOPER: optimizer_model,
230
  NODE_PROMPT_DEVELOPER: optimizer_model,
@@ -233,40 +389,54 @@ def process_message_with_2_llms(user_message, expected_output, acceptance_criter
233
  NODE_PROMPT_ANALYZER: optimizer_model,
234
  NODE_PROMPT_SUGGESTER: optimizer_model
235
  }
236
-
237
  return process_message(user_message, expected_output, acceptance_criteria, initial_system_message,
238
  recursion_limit, max_output_age, llms)
239
 
240
 
241
  def process_message_with_expert_llms(user_message, expected_output, acceptance_criteria, initial_system_message,
242
- recursion_limit: int, max_output_age: int,
243
- initial_developer_model_name: str, developer_model_name: str,
244
- executor_model_name: str, output_history_analyzer_model_name: str,
245
- analyzer_model_name: str, suggester_model_name: str):
246
- # Get the output state from MetaPromptGraph
247
- initial_developer_model = LLMModelFactory().create(config.llms[initial_developer_model_name].type,
248
- **config.llms[initial_developer_model_name].model_dump(exclude={'type'}))
249
- developer_model = LLMModelFactory().create(config.llms[developer_model_name].type,
250
- **config.llms[developer_model_name].model_dump(exclude={'type'}))
251
- executor_model = LLMModelFactory().create(config.llms[executor_model_name].type,
252
- **config.llms[executor_model_name].model_dump(exclude={'type'}))
253
- output_history_analyzer_model = LLMModelFactory().create(config.llms[output_history_analyzer_model_name].type,
254
- **config.llms[output_history_analyzer_model_name].model_dump(exclude={'type'}))
255
- analyzer_model = LLMModelFactory().create(config.llms[analyzer_model_name].type,
256
- **config.llms[analyzer_model_name].model_dump(exclude={'type'}))
257
- suggester_model = LLMModelFactory().create(config.llms[suggester_model_name].type,
258
- **config.llms[suggester_model_name].model_dump(exclude={'type'}))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  llms = {
260
- NODE_PROMPT_INITIAL_DEVELOPER: initial_developer_model,
261
- NODE_PROMPT_DEVELOPER: developer_model,
262
- NODE_PROMPT_EXECUTOR: executor_model,
263
- NODE_OUTPUT_HISTORY_ANALYZER: output_history_analyzer_model,
264
- NODE_PROMPT_ANALYZER: analyzer_model,
265
- NODE_PROMPT_SUGGESTER: suggester_model
266
  }
267
-
268
  return process_message(user_message, expected_output, acceptance_criteria, initial_system_message,
269
- recursion_limit, max_output_age, llms)
270
 
271
 
272
  class FileConfig(BaseConfig):
@@ -393,19 +563,25 @@ with gr.Blocks(title='Meta Prompt') as demo:
393
  acceptance_criteria_input, initial_system_message_input],
394
  value='Clear All')
395
  with gr.Column():
396
- system_message_output = gr.Textbox(label="System Message", show_copy_button=True)
 
397
  with gr.Row():
398
- evaluate_system_message_button = gr.Button(value="Evaluate", variant="secondary")
399
- copy_to_initial_system_message_button = gr.Button(value="Copy to Initial System Message", variant="secondary")
 
 
400
  output_output = gr.Textbox(label="Output", show_copy_button=True)
401
- analysis_output = gr.Textbox(label="Analysis", show_copy_button=True)
402
- flag_button = gr.Button(value="Flag", variant="secondary", visible=config.allow_flagging)
 
 
403
  with gr.Accordion("Details", open=False, visible=config.verbose):
404
  logs_chatbot = gr.Chatbot(
405
  label='Messages', show_copy_button=True, layout='bubble',
406
  bubble_full_width=False, render_markdown=False
407
  )
408
- clear_logs_button = gr.ClearButton([logs_chatbot], value='Clear Logs')
 
409
 
410
  # Load examples
411
  examples = gr.Examples(config.examples_path, inputs=[
@@ -425,13 +601,13 @@ with gr.Blocks(title='Meta Prompt') as demo:
425
  evaluate_initial_system_message_button.click(
426
  evaluate_system_message,
427
  inputs=[initial_system_message_input, user_message_input,
428
- simple_model_name_input, advanced_executor_model_name_input],
429
  outputs=[output_output]
430
  )
431
  evaluate_system_message_button.click(
432
  evaluate_system_message,
433
  inputs=[system_message_output, user_message_input,
434
- simple_model_name_input, advanced_executor_model_name_input],
435
  outputs=[output_output]
436
  )
437
  copy_to_initial_system_message_button.click(
 
70
 
71
 
72
  class LLMModelFactory:
73
+ """A factory class for creating instances of LLM models.
74
+
75
+ This class follows the Singleton pattern, ensuring that only one instance is created.
76
+ The `create` method dynamically instantiates a model based on the provided `model_type`.
77
+
78
+ Attributes:
79
+ _instance (LLMModelFactory): A private class variable to store the singleton instance.
80
+
81
+ Methods:
82
+ create(model_type: str, **kwargs) -> BaseLanguageModel:
83
+ Dynamically creates and returns an instance of a model based on `model_type`.
84
+
85
+ """
86
+
87
  _instance = None
88
 
89
  def __new__(cls):
 
91
  cls._instance = super(LLMModelFactory, cls).__new__(cls)
92
  return cls._instance
93
 
94
+ def create(self, model_type: str, **kwargs) -> BaseLanguageModel:
95
+ """Creates and returns an instance of a model based on `model_type`.
96
+
97
+ Args:
98
+ model_type (str): The name of the model class to instantiate.
99
+ **kwargs: Additional keyword arguments to pass to the model constructor.
100
+
101
+ Returns:
102
+ BaseLanguageModel: An instance of a model that inherits from BaseLanguageModel.
103
+
104
+ """
105
  model_class = globals()[model_type]
106
  return model_class(**kwargs)
107
 
108
 
109
  def chat_log_2_chatbot_list(chat_log: str):
110
+ """Convert a chat log string into a list of dialogues for the Chatbot format.
111
+
112
+ Args:
113
+ chat_log (str): A JSON formatted chat log where each line represents an action with its message.
114
+ Expected actions are 'invoke' and 'response'.
115
+
116
+ Returns:
117
+ List[List[str]]: A list of dialogue pairs where the first element is a user input and the second element is a bot response.
118
+ If the action was 'invoke', the first element will be the message, and the second element will be None.
119
+ If the action was 'response', the first element will be None, and the second element will be the message.
120
+ """
121
+
122
  chatbot_list = []
123
  if chat_log is None or chat_log == '':
124
  return chatbot_list
 
127
  json_line = json.loads(line)
128
  if 'action' in json_line:
129
  if json_line['action'] == 'invoke':
130
+ chatbot_list.append([json_line['message'], None])
131
  if json_line['action'] == 'response':
132
+ chatbot_list.append([None, json_line['message']])
133
  except json.decoder.JSONDecodeError as e:
134
  print(f"Error decoding JSON log output: {e}")
135
  print(line)
 
142
  active_model_tab = "Simple"
143
 
144
  def on_model_tab_select(event: gr.SelectData):
145
+ """
146
+ Handles model tab selection events and updates the active model tab.
147
+
148
+ Parameters:
149
+ event (gr.SelectData): The select data event triggered by the user's action.
150
+
151
+ Returns:
152
+ None: This function doesn't return anything but updates the global variable 'active_model_tab'.
153
+
154
+ """
155
  if not event.selected:
156
  return
157
 
 
159
  active_model_tab = event.value
160
 
161
 
162
+ def get_current_executor_model(simple_model_name: str, advanced_model_name: str, expert_model_name: str) -> BaseLanguageModel:
163
+ """
164
+ Retrieve and return a language model (LLM) based on the currently active model tab.
165
+
166
+ This function uses a mapping to associate model tab names with their corresponding model names.
167
+ It then looks up the configuration for the selected executor model in the application's
168
+ configuration, creates an instance of the appropriate type of language model using that
169
+ configuration, and returns it. If the active model tab is not found in the mapping, the simple model
170
+ will be used as a default.
171
+
172
+ Args:
173
+ simple_model_name (str): The name of the simple language model.
174
+ This should correspond to a key in the 'llms' section of the application's configuration.
175
+ advanced_model_name (str): The name of the advanced language model.
176
+ This should correspond to a key in the 'llms' section of the application's configuration.
177
+ expert_model_name (str): The name of the expert language model.
178
+ This should correspond to a key in the 'llms' section of the application's configuration.
179
+
180
+ Returns:
181
+ BaseLanguageModel: An instance of a language model that inherits from BaseLanguageModel,
182
+ based on the currently active model tab and the provided model names.
183
+ """
184
+ model_mapping = {
185
+ "Simple": simple_model_name,
186
+ "Advanced": advanced_model_name,
187
+ "Expert": expert_model_name
188
  }
189
+ executor_model_name = model_mapping.get(active_model_tab, simple_model_name)
190
+ executor_model_config = config.llms[executor_model_name]
191
+ return LLMModelFactory().create(executor_model_config.type,
192
+ **executor_model_config.model_dump(exclude={'type'}))
193
+
194
 
195
+ def evaluate_system_message(system_message, user_message, simple_model, advanced_executor_model, expert_executor_model):
196
+ """
197
+ Evaluate a system message by using it to generate a response from an executor model based on the current active tab and provided user message.
198
+
199
+ This function retrieves the appropriate language model (LLM) for the current active model tab, formats a chat prompt template with the system message and user message, invokes the LLM using this formatted prompt, and returns the content of the output if it exists.
200
 
201
+ Args:
202
+ system_message (str): The system message to use when evaluating the response.
203
+ user_message (str): The user's input message for which a response will be generated.
204
+ simple_model (str): The name of the simple language model. This should correspond to a key in the 'llms' section of the application's configuration.
205
+ advanced_executor_model (str): The name of the advanced language model. This should correspond to a key in the 'llms' section of the application's configuration.
206
+ expert_executor_model (str): The name of the expert language model. This should correspond to a key in the 'llms' section of the application's configuration.
207
 
208
+ Returns:
209
+ str: The content of the output generated by the LLM based on the system message and user message, if it exists; otherwise, an empty string.
210
 
211
+ Raises:
212
+ gr.Error: If there is a Gradio-specific error during the execution of this function.
213
+ Exception: For any other unexpected errors that occur during the execution of this function.
214
+ """
215
+ llm = get_current_executor_model(simple_model, advanced_executor_model, expert_executor_model)
216
  template = ChatPromptTemplate.from_messages([
217
  ("system", "{system_message}"),
218
  ("human", "{user_message}")
219
  ])
 
220
  try:
221
+ output = llm.invoke(template.format(
222
+ system_message=system_message, user_message=user_message))
223
+ return output.content if hasattr(output, 'content') else ""
224
+ except gr.Error as e:
225
+ raise e
226
  except Exception as e:
227
+ raise gr.Error(f"Error: {e}")
 
 
 
 
 
 
 
 
228
 
229
 
230
+ def process_message(user_message, expected_output, acceptance_criteria, initial_system_message, recursion_limit: int, max_output_age: int, llms: Union[BaseLanguageModel, Dict[str, BaseLanguageModel]]):
231
+ """
232
+ Process a user message by executing the MetaPromptGraph with provided language models and input state.
233
+ This function sets up the initial state of the conversation, logs the execution if verbose mode is enabled,
234
+ and extracts the best system message, output, and analysis from the output state of the MetaPromptGraph.
235
+
236
+ Args:
237
+ user_message (str): The user's input message to be processed by the language model(s).
238
+ expected_output (str): The anticipated response or outcome from the language model(s) based on the user's message.
239
+ acceptance_criteria (str): Criteria that determines whether the output is acceptable or not.
240
+ initial_system_message (str): Initial instruction given to the language model(s) before processing the user's message.
241
+ recursion_limit (int): The maximum number of times the MetaPromptGraph can call itself recursively.
242
+ max_output_age (int): The maximum age of output messages that should be considered in the conversation history.
243
+ llms (Union[BaseLanguageModel, Dict[str, BaseLanguageModel]]): A single language model or a dictionary of language models to use for processing the user's message.
244
+
245
+ Returns:
246
+ tuple: A tuple containing the best system message, output, analysis, and chat log in JSON format.
247
+ - best_system_message (str): The system message that resulted in the most appropriate response based on the acceptance criteria.
248
+ - best_output (str): The output generated by the language model(s) that best meets the expected outcome and acceptance criteria.
249
+ - analysis (str): An analysis of how well the generated output matches the expected output and acceptance criteria.
250
+ - chat_log (list): A list containing JSON objects representing the conversation log, with each object containing a timestamp, logger name, levelname, and message.
251
+ """
252
  input_state = AgentState(
253
  user_message=user_message,
254
  expected_output=expected_output,
 
280
  else:
281
  log_output = None
282
 
283
+ system_message = output_state.get(
284
+ 'best_system_message', "Error: The output state does not contain a valid 'best_system_message'")
285
+ output = output_state.get(
286
+ 'best_output', "Error: The output state does not contain a valid 'best_output'")
287
+ analysis = output_state.get(
288
+ 'analysis', "Error: The output state does not contain a valid 'analysis'")
289
 
290
  return (system_message, output, analysis, chat_log_2_chatbot_list(log_output))
291
 
292
 
293
+ def initialize_llm(model_name: str) -> Any:
294
+ """
295
+ Initialize and return a language model (LLM) based on its name.
296
+
297
+ This function looks up the configuration for the specified language model in the application's
298
+ configuration, creates an instance of the appropriate type of language model using that
299
+ configuration, and returns it.
300
+
301
+ Args:
302
+ model_name (str): The name of the language model to initialize.
303
+ This should correspond to a key in the 'llms' section of the application's configuration.
304
+
305
+ Returns:
306
+ Any: An instance of the specified type of language model, initialized with its configured settings.
307
+
308
+ Raises:
309
+ KeyError: If no configuration exists for the specified model name.
310
+ NotImplementedError: If an unrecognized type is configured for the language model.
311
+ This should not occur under normal circumstances because the LLMModelFactory class
312
+ checks and validates the type when creating a new language model.
313
+ """
314
+ model_config = config.llms[model_name]
315
+ return LLMModelFactory().create(model_config.type, **model_config.model_dump(exclude={'type'}))
316
+
317
 
318
  def process_message_with_single_llm(user_message, expected_output, acceptance_criteria, initial_system_message,
319
  recursion_limit: int, max_output_age: int,
320
  model_name: str):
321
+ """
322
+ Process a user message using a single language model.
323
+
324
+ This function initializes the specified language model and then uses it to process the user's
325
+ message along with other provided input parameters such as expected output, acceptance criteria,
326
+ initial system message, recursion limit, and max output age. The result is obtained by calling
327
+ the `process_message` function with this single language model.
328
+
329
+ Args:
330
+ user_message (str): The user's input message to be processed by the language model(s).
331
+ expected_output (str): The anticipated response or outcome from the language model based on the user's message.
332
+ acceptance_criteria (str): Criteria that determines whether the output is acceptable or not.
333
+ initial_system_message (str): Initial instruction given to the language model before processing the user's message.
334
+ recursion_limit (int): The maximum number of times the MetaPromptGraph can call itself recursively.
335
+ max_output_age (int): The maximum age of output messages that should be considered in the conversation history.
336
+ model_name (str): The name of the language model to initialize and use for processing the user's message.
337
+ This should correspond to a key in the 'llms' section of the application's configuration.
338
+
339
+ Returns:
340
+ tuple: A tuple containing the best system message, output, analysis, and chat log in JSON format.
341
+ - best_system_message (str): The system message that resulted in the most appropriate response based on the acceptance criteria.
342
+ - best_output (str): The output generated by the language model that best meets the expected outcome and acceptance criteria.
343
+ - analysis (str): An analysis of how well the generated output matches the expected output and acceptance criteria.
344
+ - chat_log (list): A list containing JSON objects representing the conversation log, with each object containing a timestamp, logger name, levelname, and message.
345
+ """
346
+ llm = initialize_llm(model_name)
347
  return process_message(user_message, expected_output, acceptance_criteria, initial_system_message,
348
  recursion_limit, max_output_age, llm)
349
 
350
 
351
  def process_message_with_2_llms(user_message, expected_output, acceptance_criteria, initial_system_message,
352
  recursion_limit: int, max_output_age: int,
353
+ optimizer_model_name: str, executor_model_name: str):
354
+ """
355
+ Process a user message using two language models - one for optimization and another for execution.
356
+
357
+ This function initializes the specified optimizer and executor language models and then uses them to process
358
+ the user's message along with other provided input parameters such as expected output, acceptance criteria,
359
+ initial system message, recursion limit, and max output age. The result is obtained by calling the `process_message`
360
+ function with a dictionary of language models where all nodes except for NODE_PROMPT_EXECUTOR use the optimizer model
361
+ and NODE_PROMPT_EXECUTOR uses the executor model.
362
+
363
+ Args:
364
+ user_message (str): The user's input message to be processed by the language models.
365
+ expected_output (str): The anticipated response or outcome from the language models based on the user's message.
366
+ acceptance_criteria (str): Criteria that determines whether the output is acceptable or not.
367
+ initial_system_message (str): Initial instruction given to the language models before processing the user's message.
368
+ recursion_limit (int): The maximum number of times the MetaPromptGraph can call itself recursively.
369
+ max_output_age (int): The maximum age of output messages that should be considered in the conversation history.
370
+ optimizer_model_name (str): The name of the language model to initialize and use for optimization tasks like prompt development, analysis, and suggestion.
371
+ This should correspond to a key in the 'llms' section of the application's configuration.
372
+ executor_model_name (str): The name of the language model to initialize and use for execution tasks like running code or providing final outputs.
373
+ This should correspond to a key in the 'llms' section of the application's configuration.
374
+
375
+ Returns:
376
+ tuple: A tuple containing the best system message, output, analysis, and chat log in JSON format.
377
+ - best_system_message (str): The system message that resulted in the most appropriate response based on the acceptance criteria.
378
+ - best_output (str): The output generated by the language models that best meets the expected outcome and acceptance criteria.
379
+ - analysis (str): An analysis of how well the generated output matches the expected output and acceptance criteria.
380
+ - chat_log (list): A list containing JSON objects representing the conversation log, with each object containing a timestamp, logger name, levelname, and message.
381
+ """
382
+ optimizer_model = initialize_llm(optimizer_model_name)
383
+ executor_model = initialize_llm(executor_model_name)
384
  llms = {
385
  NODE_PROMPT_INITIAL_DEVELOPER: optimizer_model,
386
  NODE_PROMPT_DEVELOPER: optimizer_model,
 
389
  NODE_PROMPT_ANALYZER: optimizer_model,
390
  NODE_PROMPT_SUGGESTER: optimizer_model
391
  }
 
392
  return process_message(user_message, expected_output, acceptance_criteria, initial_system_message,
393
  recursion_limit, max_output_age, llms)
394
 
395
 
396
  def process_message_with_expert_llms(user_message, expected_output, acceptance_criteria, initial_system_message,
397
+ recursion_limit: int, max_output_age: int,
398
+ initial_developer_model_name: str, developer_model_name: str,
399
+ executor_model_name: str, output_history_analyzer_model_name: str,
400
+ analyzer_model_name: str, suggester_model_name: str):
401
+ """
402
+ Process a user message using multiple expert language models.
403
+
404
+ This function initializes six expert language models based on their names and uses them to process the user's message
405
+ along with other provided input parameters such as expected output, acceptance criteria, initial system message,
406
+ recursion limit, and max output age. The result is obtained by calling the `process_message` function with a dictionary
407
+ of language models where each node uses a specific language model.
408
+
409
+ Args:
410
+ user_message (str): The user's input message to be processed by the language models.
411
+ expected_output (str): The anticipated response or outcome from the language models based on the user's message.
412
+ acceptance_criteria (str): Criteria that determines whether the output is acceptable or not.
413
+ initial_system_message (str): Initial instruction given to the language models before processing the user's message.
414
+ recursion_limit (int): The maximum number of times the MetaPromptGraph can call itself recursively.
415
+ max_output_age (int): The maximum age of output messages that should be considered in the conversation history.
416
+ initial_developer_model_name (str): The name of the language model to initialize and use for the initial developer node.
417
+ developer_model_name (str): The name of the language model to initialize and use for the developer node.
418
+ executor_model_name (str): The name of the language model to initialize and use for the executor node.
419
+ output_history_analyzer_model_name (str): The name of the language model to initialize and use for the output history analyzer node.
420
+ analyzer_model_name (str): The name of the language model to initialize and use for the analyzer node.
421
+ suggester_model_name (str): The name of the language model to initialize and use for the suggester node.
422
+
423
+ Returns:
424
+ tuple: A tuple containing the best system message, output, analysis, and chat log in JSON format.
425
+ - best_system_message (str): The system message that resulted in the most appropriate response based on the acceptance criteria.
426
+ - best_output (str): The output generated by the language models that best meets the expected outcome and acceptance criteria.
427
+ - analysis (str): An analysis of how well the generated output matches the expected output and acceptance criteria.
428
+ - chat_log (list): A list containing JSON objects representing the conversation log, with each object containing a timestamp, logger name, levelname, and message.
429
+ """
430
  llms = {
431
+ NODE_PROMPT_INITIAL_DEVELOPER: initialize_llm(initial_developer_model_name),
432
+ NODE_PROMPT_DEVELOPER: initialize_llm(developer_model_name),
433
+ NODE_PROMPT_EXECUTOR: initialize_llm(executor_model_name),
434
+ NODE_OUTPUT_HISTORY_ANALYZER: initialize_llm(output_history_analyzer_model_name),
435
+ NODE_PROMPT_ANALYZER: initialize_llm(analyzer_model_name),
436
+ NODE_PROMPT_SUGGESTER: initialize_llm(suggester_model_name)
437
  }
 
438
  return process_message(user_message, expected_output, acceptance_criteria, initial_system_message,
439
+ recursion_limit, max_output_age, llms)
440
 
441
 
442
  class FileConfig(BaseConfig):
 
563
  acceptance_criteria_input, initial_system_message_input],
564
  value='Clear All')
565
  with gr.Column():
566
+ system_message_output = gr.Textbox(
567
+ label="System Message", show_copy_button=True)
568
  with gr.Row():
569
+ evaluate_system_message_button = gr.Button(
570
+ value="Evaluate", variant="secondary")
571
+ copy_to_initial_system_message_button = gr.Button(
572
+ value="Copy to Initial System Message", variant="secondary")
573
  output_output = gr.Textbox(label="Output", show_copy_button=True)
574
+ analysis_output = gr.Textbox(
575
+ label="Analysis", show_copy_button=True)
576
+ flag_button = gr.Button(
577
+ value="Flag", variant="secondary", visible=config.allow_flagging)
578
  with gr.Accordion("Details", open=False, visible=config.verbose):
579
  logs_chatbot = gr.Chatbot(
580
  label='Messages', show_copy_button=True, layout='bubble',
581
  bubble_full_width=False, render_markdown=False
582
  )
583
+ clear_logs_button = gr.ClearButton(
584
+ [logs_chatbot], value='Clear Logs')
585
 
586
  # Load examples
587
  examples = gr.Examples(config.examples_path, inputs=[
 
601
  evaluate_initial_system_message_button.click(
602
  evaluate_system_message,
603
  inputs=[initial_system_message_input, user_message_input,
604
+ simple_model_name_input, advanced_executor_model_name_input, expert_prompt_executor_model_name_input],
605
  outputs=[output_output]
606
  )
607
  evaluate_system_message_button.click(
608
  evaluate_system_message,
609
  inputs=[system_message_output, user_message_input,
610
+ simple_model_name_input, advanced_executor_model_name_input, expert_prompt_executor_model_name_input],
611
  outputs=[output_output]
612
  )
613
  copy_to_initial_system_message_button.click(
meta_prompt/consts.py CHANGED
@@ -108,7 +108,7 @@ You output the following analysis according to the Acceptance Criteria:
108
  # Preferred Output ID: [ID]
109
  ```
110
 
111
- If both outputs are equally similar to the Expected Output, output the following:
112
 
113
  ```
114
  # Analysis
@@ -193,7 +193,7 @@ Provide your analysis in the following format:
193
  * Provide your suggestions in a Markdown list, nothing else. Output only the suggestions related with Unacceptable Differences.
194
  * Start every suggestion with `The System Message should ...`.
195
  * Figue out the contexts of the System Message that conflict with the suggestions, and suggest modification or deletion.
196
- * Do not simply describe the output as being the same/similar/different from the Expected Output, such as `the output should not use a different format and style compared to the Expected Output` or `the output should match the expected output exactly`; instead, describe the expected characteristics specifically and suggest a detailed example.
197
  * Avoiding the behavior should be explicitly requested (e.g. `The System Message should explicitly state that the output shoud not ...`) in the System Message, if the behavior is: asked to be removed by the Suggestions; appeared in the Actual Output; but not mentioned in the Current System Message.
198
  * Expected Output text should not appear in System Message as an example. But it's OK to use some similar but distinct text as an example instead.
199
  * Ask to remove the Expected Output text or text highly similar to Expected Output from System Message, if it's present.
 
108
  # Preferred Output ID: [ID]
109
  ```
110
 
111
+ You must choose one of the two outputs. If both outputs are exacly the same, output the following:
112
 
113
  ```
114
  # Analysis
 
193
  * Provide your suggestions in a Markdown list, nothing else. Output only the suggestions related with Unacceptable Differences.
194
  * Start every suggestion with `The System Message should ...`.
195
  * Figue out the contexts of the System Message that conflict with the suggestions, and suggest modification or deletion.
196
+ * While the Expected Output won't be shown to the prompt developer who will read your suggestions, do not simply describe the output as being the same/similar/different from the Expected Output, such as `the output should not use a different format and style compared to the Expected Output` or `the output should match the expected output exactly`; instead, describe the expected characteristics specifically and suggest a detailed example.
197
  * Avoiding the behavior should be explicitly requested (e.g. `The System Message should explicitly state that the output shoud not ...`) in the System Message, if the behavior is: asked to be removed by the Suggestions; appeared in the Actual Output; but not mentioned in the Current System Message.
198
  * Expected Output text should not appear in System Message as an example. But it's OK to use some similar but distinct text as an example instead.
199
  * Ask to remove the Expected Output text or text highly similar to Expected Output from System Message, if it's present.
meta_prompt/meta_prompt.py CHANGED
@@ -12,6 +12,23 @@ from pydantic import BaseModel
12
  from .consts import *
13
 
14
  class AgentState(BaseModel):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  max_output_age: int = 0
16
  user_message: Optional[str] = None
17
  expected_output: Optional[str] = None
@@ -26,8 +43,30 @@ class AgentState(BaseModel):
26
  best_output_age: int = 0
27
 
28
  class MetaPromptGraph:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  @classmethod
30
  def get_node_names(cls):
 
 
 
 
 
 
 
 
31
  return META_PROMPT_NODES
32
 
33
  def __init__(self,
@@ -36,6 +75,17 @@ class MetaPromptGraph:
36
  prompts: Dict[str, ChatPromptTemplate] = {},
37
  logger: Optional[logging.Logger] = None,
38
  verbose=False):
 
 
 
 
 
 
 
 
 
 
 
39
  self.logger = logger or logging.getLogger(__name__)
40
  if self.logger is not None:
41
  if verbose:
@@ -237,4 +287,4 @@ class MetaPromptGraph:
237
  return "continue"
238
 
239
  def _should_exit_on_acceptable_output(self, state: AgentState) -> str:
240
- return "continue" if not state.accepted else END
 
12
  from .consts import *
13
 
14
  class AgentState(BaseModel):
15
+ """
16
+ Represents the state of an agent in a conversation.
17
+
18
+ Attributes:
19
+ - max_output_age (int): The maximum age of the output.
20
+ - user_message (str, optional): The user's message.
21
+ - expected_output (str, optional): The expected output.
22
+ - acceptance_criteria (str, optional): The acceptance criteria.
23
+ - system_message (str, optional): The system message.
24
+ - output (str, optional): The output.
25
+ - suggestions (str, optional): The suggestions.
26
+ - accepted (bool): Whether the output is accepted.
27
+ - analysis (str, optional): The analysis.
28
+ - best_output (str, optional): The best output.
29
+ - best_system_message (str, optional): The best system message.
30
+ - best_output_age (int): The age of the best output.
31
+ """
32
  max_output_age: int = 0
33
  user_message: Optional[str] = None
34
  expected_output: Optional[str] = None
 
43
  best_output_age: int = 0
44
 
45
  class MetaPromptGraph:
46
+ """
47
+ This class represents a graph for meta-prompting in a conversational AI system.
48
+
49
+ It manages the state of the conversation, including the user's message, expected output,
50
+ acceptance criteria, system message, output, suggestions, and analysis. The graph
51
+ consists of nodes that represent different stages of the conversation, such as
52
+ prompting the developer, executing the output, analyzing the output history, and
53
+ suggesting new prompts. The class provides methods to create the workflow,
54
+ initialize the graph, and invoke the graph with a given state.
55
+
56
+ The MetaPromptGraph class is responsible for orchestrating the conversation flow
57
+ and deciding the next step based on the current state of the conversation. It uses
58
+ language models and prompt templates to generate responses and analyze the output.
59
+ """
60
  @classmethod
61
  def get_node_names(cls):
62
+ """
63
+ Returns a list of node names in the meta-prompt graph.
64
+
65
+ This method is used to initialize the language models and prompt templates for each node in the graph.
66
+
67
+ Returns:
68
+ list: A list of node names.
69
+ """
70
  return META_PROMPT_NODES
71
 
72
  def __init__(self,
 
75
  prompts: Dict[str, ChatPromptTemplate] = {},
76
  logger: Optional[logging.Logger] = None,
77
  verbose=False):
78
+ """
79
+ Initializes the MetaPromptGraph instance.
80
+
81
+ Args:
82
+ - llms (Union[BaseLanguageModel, Dict[str, BaseLanguageModel]], optional): The language models for the graph nodes. Defaults to {}.
83
+ - prompts (Dict[str, ChatPromptTemplate], optional): The custom prompt templates for the graph nodes. Defaults to {}.
84
+ - logger (Optional[logging.Logger], optional): The logger for the graph. Defaults to None.
85
+ - verbose (bool, optional): Whether to set the logger level to DEBUG. Defaults to False.
86
+
87
+ Initializes the logger, sets the language models and prompt templates for the graph nodes, and updates the prompt templates with custom ones if provided.
88
+ """
89
  self.logger = logger or logging.getLogger(__name__)
90
  if self.logger is not None:
91
  if verbose:
 
287
  return "continue"
288
 
289
  def _should_exit_on_acceptable_output(self, state: AgentState) -> str:
290
+ return "continue" if not state.accepted else END
tests/meta_prompt_graph_test.py CHANGED
@@ -1,6 +1,7 @@
1
  import unittest
2
  import pprint
3
  import logging
 
4
  from unittest.mock import MagicMock, Mock
5
  from langchain_core.language_models import BaseLanguageModel
6
  from langchain_openai import ChatOpenAI
@@ -168,7 +169,7 @@ class TestMetaPromptGraph(unittest.TestCase):
168
  Mock(type="content", content="Here's one way: `my_list[::-1]`"), # NODE_PROMPT_EXECUTOR
169
  Mock(type="content", content="Accept: Yes"), # NODE_PPROMPT_ANALYZER
170
  ]
171
- llm.invoke = lambda _: responses.pop(0)
172
 
173
  meta_prompt_graph = MetaPromptGraph(llms=llm)
174
  input_state = AgentState(
 
1
  import unittest
2
  import pprint
3
  import logging
4
+ import functools
5
  from unittest.mock import MagicMock, Mock
6
  from langchain_core.language_models import BaseLanguageModel
7
  from langchain_openai import ChatOpenAI
 
169
  Mock(type="content", content="Here's one way: `my_list[::-1]`"), # NODE_PROMPT_EXECUTOR
170
  Mock(type="content", content="Accept: Yes"), # NODE_PPROMPT_ANALYZER
171
  ]
172
+ llm.invoke = functools.partial(next, iter(responses))
173
 
174
  meta_prompt_graph = MetaPromptGraph(llms=llm)
175
  input_state = AgentState(