Spaces:
Running
Running
Updated pydocs. Corrected the `evalucate` behavior.
Browse files- app/gradio_meta_prompt.py +267 -91
- meta_prompt/consts.py +2 -2
- meta_prompt/meta_prompt.py +51 -1
- tests/meta_prompt_graph_test.py +2 -1
app/gradio_meta_prompt.py
CHANGED
@@ -70,6 +70,20 @@ class SimplifiedCSVLogger(CSVLogger):
|
|
70 |
|
71 |
|
72 |
class LLMModelFactory:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
_instance = None
|
74 |
|
75 |
def __new__(cls):
|
@@ -77,12 +91,34 @@ class LLMModelFactory:
|
|
77 |
cls._instance = super(LLMModelFactory, cls).__new__(cls)
|
78 |
return cls._instance
|
79 |
|
80 |
-
def create(self, model_type: str, **kwargs):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
model_class = globals()[model_type]
|
82 |
return model_class(**kwargs)
|
83 |
|
84 |
|
85 |
def chat_log_2_chatbot_list(chat_log: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
chatbot_list = []
|
87 |
if chat_log is None or chat_log == '':
|
88 |
return chatbot_list
|
@@ -91,9 +127,9 @@ def chat_log_2_chatbot_list(chat_log: str):
|
|
91 |
json_line = json.loads(line)
|
92 |
if 'action' in json_line:
|
93 |
if json_line['action'] == 'invoke':
|
94 |
-
chatbot_list.append([json_line['message'],None])
|
95 |
if json_line['action'] == 'response':
|
96 |
-
chatbot_list.append([None,json_line['message']])
|
97 |
except json.decoder.JSONDecodeError as e:
|
98 |
print(f"Error decoding JSON log output: {e}")
|
99 |
print(line)
|
@@ -106,6 +142,16 @@ def chat_log_2_chatbot_list(chat_log: str):
|
|
106 |
active_model_tab = "Simple"
|
107 |
|
108 |
def on_model_tab_select(event: gr.SelectData):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
if not event.selected:
|
110 |
return
|
111 |
|
@@ -113,59 +159,96 @@ def on_model_tab_select(event: gr.SelectData):
|
|
113 |
active_model_tab = event.value
|
114 |
|
115 |
|
116 |
-
def
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
}
|
|
|
|
|
|
|
|
|
|
|
134 |
|
|
|
|
|
|
|
|
|
|
|
135 |
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
|
|
|
|
|
143 |
|
144 |
-
|
145 |
-
|
|
|
|
|
|
|
146 |
template = ChatPromptTemplate.from_messages([
|
147 |
("system", "{system_message}"),
|
148 |
("human", "{user_message}")
|
149 |
])
|
150 |
-
messages = template.format_messages(system_message=system_message, user_message=user_message)
|
151 |
try:
|
152 |
-
output = llm.invoke(
|
|
|
|
|
|
|
|
|
153 |
except Exception as e:
|
154 |
-
|
155 |
-
raise e
|
156 |
-
else:
|
157 |
-
raise gr.Error(f"Error: {e}")
|
158 |
-
|
159 |
-
if hasattr(output, 'content'):
|
160 |
-
return output.content
|
161 |
-
else:
|
162 |
-
return ""
|
163 |
|
164 |
|
165 |
-
def process_message(user_message, expected_output, acceptance_criteria,
|
166 |
-
|
167 |
-
|
168 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
input_state = AgentState(
|
170 |
user_message=user_message,
|
171 |
expected_output=expected_output,
|
@@ -197,34 +280,107 @@ def process_message(user_message, expected_output, acceptance_criteria,
|
|
197 |
else:
|
198 |
log_output = None
|
199 |
|
200 |
-
system_message = output_state.get(
|
201 |
-
|
202 |
-
|
|
|
|
|
|
|
203 |
|
204 |
return (system_message, output, analysis, chat_log_2_chatbot_list(log_output))
|
205 |
|
206 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
207 |
|
208 |
def process_message_with_single_llm(user_message, expected_output, acceptance_criteria, initial_system_message,
|
209 |
recursion_limit: int, max_output_age: int,
|
210 |
model_name: str):
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
return process_message(user_message, expected_output, acceptance_criteria, initial_system_message,
|
217 |
recursion_limit, max_output_age, llm)
|
218 |
|
219 |
|
220 |
def process_message_with_2_llms(user_message, expected_output, acceptance_criteria, initial_system_message,
|
221 |
recursion_limit: int, max_output_age: int,
|
222 |
-
optimizer_model_name: str, executor_model_name: str
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
228 |
llms = {
|
229 |
NODE_PROMPT_INITIAL_DEVELOPER: optimizer_model,
|
230 |
NODE_PROMPT_DEVELOPER: optimizer_model,
|
@@ -233,40 +389,54 @@ def process_message_with_2_llms(user_message, expected_output, acceptance_criter
|
|
233 |
NODE_PROMPT_ANALYZER: optimizer_model,
|
234 |
NODE_PROMPT_SUGGESTER: optimizer_model
|
235 |
}
|
236 |
-
|
237 |
return process_message(user_message, expected_output, acceptance_criteria, initial_system_message,
|
238 |
recursion_limit, max_output_age, llms)
|
239 |
|
240 |
|
241 |
def process_message_with_expert_llms(user_message, expected_output, acceptance_criteria, initial_system_message,
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
259 |
llms = {
|
260 |
-
NODE_PROMPT_INITIAL_DEVELOPER:
|
261 |
-
NODE_PROMPT_DEVELOPER:
|
262 |
-
NODE_PROMPT_EXECUTOR:
|
263 |
-
NODE_OUTPUT_HISTORY_ANALYZER:
|
264 |
-
NODE_PROMPT_ANALYZER:
|
265 |
-
NODE_PROMPT_SUGGESTER:
|
266 |
}
|
267 |
-
|
268 |
return process_message(user_message, expected_output, acceptance_criteria, initial_system_message,
|
269 |
-
|
270 |
|
271 |
|
272 |
class FileConfig(BaseConfig):
|
@@ -393,19 +563,25 @@ with gr.Blocks(title='Meta Prompt') as demo:
|
|
393 |
acceptance_criteria_input, initial_system_message_input],
|
394 |
value='Clear All')
|
395 |
with gr.Column():
|
396 |
-
system_message_output = gr.Textbox(
|
|
|
397 |
with gr.Row():
|
398 |
-
evaluate_system_message_button = gr.Button(
|
399 |
-
|
|
|
|
|
400 |
output_output = gr.Textbox(label="Output", show_copy_button=True)
|
401 |
-
analysis_output = gr.Textbox(
|
402 |
-
|
|
|
|
|
403 |
with gr.Accordion("Details", open=False, visible=config.verbose):
|
404 |
logs_chatbot = gr.Chatbot(
|
405 |
label='Messages', show_copy_button=True, layout='bubble',
|
406 |
bubble_full_width=False, render_markdown=False
|
407 |
)
|
408 |
-
clear_logs_button = gr.ClearButton(
|
|
|
409 |
|
410 |
# Load examples
|
411 |
examples = gr.Examples(config.examples_path, inputs=[
|
@@ -425,13 +601,13 @@ with gr.Blocks(title='Meta Prompt') as demo:
|
|
425 |
evaluate_initial_system_message_button.click(
|
426 |
evaluate_system_message,
|
427 |
inputs=[initial_system_message_input, user_message_input,
|
428 |
-
simple_model_name_input, advanced_executor_model_name_input],
|
429 |
outputs=[output_output]
|
430 |
)
|
431 |
evaluate_system_message_button.click(
|
432 |
evaluate_system_message,
|
433 |
inputs=[system_message_output, user_message_input,
|
434 |
-
simple_model_name_input, advanced_executor_model_name_input],
|
435 |
outputs=[output_output]
|
436 |
)
|
437 |
copy_to_initial_system_message_button.click(
|
|
|
70 |
|
71 |
|
72 |
class LLMModelFactory:
|
73 |
+
"""A factory class for creating instances of LLM models.
|
74 |
+
|
75 |
+
This class follows the Singleton pattern, ensuring that only one instance is created.
|
76 |
+
The `create` method dynamically instantiates a model based on the provided `model_type`.
|
77 |
+
|
78 |
+
Attributes:
|
79 |
+
_instance (LLMModelFactory): A private class variable to store the singleton instance.
|
80 |
+
|
81 |
+
Methods:
|
82 |
+
create(model_type: str, **kwargs) -> BaseLanguageModel:
|
83 |
+
Dynamically creates and returns an instance of a model based on `model_type`.
|
84 |
+
|
85 |
+
"""
|
86 |
+
|
87 |
_instance = None
|
88 |
|
89 |
def __new__(cls):
|
|
|
91 |
cls._instance = super(LLMModelFactory, cls).__new__(cls)
|
92 |
return cls._instance
|
93 |
|
94 |
+
def create(self, model_type: str, **kwargs) -> BaseLanguageModel:
|
95 |
+
"""Creates and returns an instance of a model based on `model_type`.
|
96 |
+
|
97 |
+
Args:
|
98 |
+
model_type (str): The name of the model class to instantiate.
|
99 |
+
**kwargs: Additional keyword arguments to pass to the model constructor.
|
100 |
+
|
101 |
+
Returns:
|
102 |
+
BaseLanguageModel: An instance of a model that inherits from BaseLanguageModel.
|
103 |
+
|
104 |
+
"""
|
105 |
model_class = globals()[model_type]
|
106 |
return model_class(**kwargs)
|
107 |
|
108 |
|
109 |
def chat_log_2_chatbot_list(chat_log: str):
|
110 |
+
"""Convert a chat log string into a list of dialogues for the Chatbot format.
|
111 |
+
|
112 |
+
Args:
|
113 |
+
chat_log (str): A JSON formatted chat log where each line represents an action with its message.
|
114 |
+
Expected actions are 'invoke' and 'response'.
|
115 |
+
|
116 |
+
Returns:
|
117 |
+
List[List[str]]: A list of dialogue pairs where the first element is a user input and the second element is a bot response.
|
118 |
+
If the action was 'invoke', the first element will be the message, and the second element will be None.
|
119 |
+
If the action was 'response', the first element will be None, and the second element will be the message.
|
120 |
+
"""
|
121 |
+
|
122 |
chatbot_list = []
|
123 |
if chat_log is None or chat_log == '':
|
124 |
return chatbot_list
|
|
|
127 |
json_line = json.loads(line)
|
128 |
if 'action' in json_line:
|
129 |
if json_line['action'] == 'invoke':
|
130 |
+
chatbot_list.append([json_line['message'], None])
|
131 |
if json_line['action'] == 'response':
|
132 |
+
chatbot_list.append([None, json_line['message']])
|
133 |
except json.decoder.JSONDecodeError as e:
|
134 |
print(f"Error decoding JSON log output: {e}")
|
135 |
print(line)
|
|
|
142 |
active_model_tab = "Simple"
|
143 |
|
144 |
def on_model_tab_select(event: gr.SelectData):
|
145 |
+
"""
|
146 |
+
Handles model tab selection events and updates the active model tab.
|
147 |
+
|
148 |
+
Parameters:
|
149 |
+
event (gr.SelectData): The select data event triggered by the user's action.
|
150 |
+
|
151 |
+
Returns:
|
152 |
+
None: This function doesn't return anything but updates the global variable 'active_model_tab'.
|
153 |
+
|
154 |
+
"""
|
155 |
if not event.selected:
|
156 |
return
|
157 |
|
|
|
159 |
active_model_tab = event.value
|
160 |
|
161 |
|
162 |
+
def get_current_executor_model(simple_model_name: str, advanced_model_name: str, expert_model_name: str) -> BaseLanguageModel:
|
163 |
+
"""
|
164 |
+
Retrieve and return a language model (LLM) based on the currently active model tab.
|
165 |
+
|
166 |
+
This function uses a mapping to associate model tab names with their corresponding model names.
|
167 |
+
It then looks up the configuration for the selected executor model in the application's
|
168 |
+
configuration, creates an instance of the appropriate type of language model using that
|
169 |
+
configuration, and returns it. If the active model tab is not found in the mapping, the simple model
|
170 |
+
will be used as a default.
|
171 |
+
|
172 |
+
Args:
|
173 |
+
simple_model_name (str): The name of the simple language model.
|
174 |
+
This should correspond to a key in the 'llms' section of the application's configuration.
|
175 |
+
advanced_model_name (str): The name of the advanced language model.
|
176 |
+
This should correspond to a key in the 'llms' section of the application's configuration.
|
177 |
+
expert_model_name (str): The name of the expert language model.
|
178 |
+
This should correspond to a key in the 'llms' section of the application's configuration.
|
179 |
+
|
180 |
+
Returns:
|
181 |
+
BaseLanguageModel: An instance of a language model that inherits from BaseLanguageModel,
|
182 |
+
based on the currently active model tab and the provided model names.
|
183 |
+
"""
|
184 |
+
model_mapping = {
|
185 |
+
"Simple": simple_model_name,
|
186 |
+
"Advanced": advanced_model_name,
|
187 |
+
"Expert": expert_model_name
|
188 |
}
|
189 |
+
executor_model_name = model_mapping.get(active_model_tab, simple_model_name)
|
190 |
+
executor_model_config = config.llms[executor_model_name]
|
191 |
+
return LLMModelFactory().create(executor_model_config.type,
|
192 |
+
**executor_model_config.model_dump(exclude={'type'}))
|
193 |
+
|
194 |
|
195 |
+
def evaluate_system_message(system_message, user_message, simple_model, advanced_executor_model, expert_executor_model):
|
196 |
+
"""
|
197 |
+
Evaluate a system message by using it to generate a response from an executor model based on the current active tab and provided user message.
|
198 |
+
|
199 |
+
This function retrieves the appropriate language model (LLM) for the current active model tab, formats a chat prompt template with the system message and user message, invokes the LLM using this formatted prompt, and returns the content of the output if it exists.
|
200 |
|
201 |
+
Args:
|
202 |
+
system_message (str): The system message to use when evaluating the response.
|
203 |
+
user_message (str): The user's input message for which a response will be generated.
|
204 |
+
simple_model (str): The name of the simple language model. This should correspond to a key in the 'llms' section of the application's configuration.
|
205 |
+
advanced_executor_model (str): The name of the advanced language model. This should correspond to a key in the 'llms' section of the application's configuration.
|
206 |
+
expert_executor_model (str): The name of the expert language model. This should correspond to a key in the 'llms' section of the application's configuration.
|
207 |
|
208 |
+
Returns:
|
209 |
+
str: The content of the output generated by the LLM based on the system message and user message, if it exists; otherwise, an empty string.
|
210 |
|
211 |
+
Raises:
|
212 |
+
gr.Error: If there is a Gradio-specific error during the execution of this function.
|
213 |
+
Exception: For any other unexpected errors that occur during the execution of this function.
|
214 |
+
"""
|
215 |
+
llm = get_current_executor_model(simple_model, advanced_executor_model, expert_executor_model)
|
216 |
template = ChatPromptTemplate.from_messages([
|
217 |
("system", "{system_message}"),
|
218 |
("human", "{user_message}")
|
219 |
])
|
|
|
220 |
try:
|
221 |
+
output = llm.invoke(template.format(
|
222 |
+
system_message=system_message, user_message=user_message))
|
223 |
+
return output.content if hasattr(output, 'content') else ""
|
224 |
+
except gr.Error as e:
|
225 |
+
raise e
|
226 |
except Exception as e:
|
227 |
+
raise gr.Error(f"Error: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
228 |
|
229 |
|
230 |
+
def process_message(user_message, expected_output, acceptance_criteria, initial_system_message, recursion_limit: int, max_output_age: int, llms: Union[BaseLanguageModel, Dict[str, BaseLanguageModel]]):
|
231 |
+
"""
|
232 |
+
Process a user message by executing the MetaPromptGraph with provided language models and input state.
|
233 |
+
This function sets up the initial state of the conversation, logs the execution if verbose mode is enabled,
|
234 |
+
and extracts the best system message, output, and analysis from the output state of the MetaPromptGraph.
|
235 |
+
|
236 |
+
Args:
|
237 |
+
user_message (str): The user's input message to be processed by the language model(s).
|
238 |
+
expected_output (str): The anticipated response or outcome from the language model(s) based on the user's message.
|
239 |
+
acceptance_criteria (str): Criteria that determines whether the output is acceptable or not.
|
240 |
+
initial_system_message (str): Initial instruction given to the language model(s) before processing the user's message.
|
241 |
+
recursion_limit (int): The maximum number of times the MetaPromptGraph can call itself recursively.
|
242 |
+
max_output_age (int): The maximum age of output messages that should be considered in the conversation history.
|
243 |
+
llms (Union[BaseLanguageModel, Dict[str, BaseLanguageModel]]): A single language model or a dictionary of language models to use for processing the user's message.
|
244 |
+
|
245 |
+
Returns:
|
246 |
+
tuple: A tuple containing the best system message, output, analysis, and chat log in JSON format.
|
247 |
+
- best_system_message (str): The system message that resulted in the most appropriate response based on the acceptance criteria.
|
248 |
+
- best_output (str): The output generated by the language model(s) that best meets the expected outcome and acceptance criteria.
|
249 |
+
- analysis (str): An analysis of how well the generated output matches the expected output and acceptance criteria.
|
250 |
+
- chat_log (list): A list containing JSON objects representing the conversation log, with each object containing a timestamp, logger name, levelname, and message.
|
251 |
+
"""
|
252 |
input_state = AgentState(
|
253 |
user_message=user_message,
|
254 |
expected_output=expected_output,
|
|
|
280 |
else:
|
281 |
log_output = None
|
282 |
|
283 |
+
system_message = output_state.get(
|
284 |
+
'best_system_message', "Error: The output state does not contain a valid 'best_system_message'")
|
285 |
+
output = output_state.get(
|
286 |
+
'best_output', "Error: The output state does not contain a valid 'best_output'")
|
287 |
+
analysis = output_state.get(
|
288 |
+
'analysis', "Error: The output state does not contain a valid 'analysis'")
|
289 |
|
290 |
return (system_message, output, analysis, chat_log_2_chatbot_list(log_output))
|
291 |
|
292 |
|
293 |
+
def initialize_llm(model_name: str) -> Any:
|
294 |
+
"""
|
295 |
+
Initialize and return a language model (LLM) based on its name.
|
296 |
+
|
297 |
+
This function looks up the configuration for the specified language model in the application's
|
298 |
+
configuration, creates an instance of the appropriate type of language model using that
|
299 |
+
configuration, and returns it.
|
300 |
+
|
301 |
+
Args:
|
302 |
+
model_name (str): The name of the language model to initialize.
|
303 |
+
This should correspond to a key in the 'llms' section of the application's configuration.
|
304 |
+
|
305 |
+
Returns:
|
306 |
+
Any: An instance of the specified type of language model, initialized with its configured settings.
|
307 |
+
|
308 |
+
Raises:
|
309 |
+
KeyError: If no configuration exists for the specified model name.
|
310 |
+
NotImplementedError: If an unrecognized type is configured for the language model.
|
311 |
+
This should not occur under normal circumstances because the LLMModelFactory class
|
312 |
+
checks and validates the type when creating a new language model.
|
313 |
+
"""
|
314 |
+
model_config = config.llms[model_name]
|
315 |
+
return LLMModelFactory().create(model_config.type, **model_config.model_dump(exclude={'type'}))
|
316 |
+
|
317 |
|
318 |
def process_message_with_single_llm(user_message, expected_output, acceptance_criteria, initial_system_message,
|
319 |
recursion_limit: int, max_output_age: int,
|
320 |
model_name: str):
|
321 |
+
"""
|
322 |
+
Process a user message using a single language model.
|
323 |
+
|
324 |
+
This function initializes the specified language model and then uses it to process the user's
|
325 |
+
message along with other provided input parameters such as expected output, acceptance criteria,
|
326 |
+
initial system message, recursion limit, and max output age. The result is obtained by calling
|
327 |
+
the `process_message` function with this single language model.
|
328 |
+
|
329 |
+
Args:
|
330 |
+
user_message (str): The user's input message to be processed by the language model(s).
|
331 |
+
expected_output (str): The anticipated response or outcome from the language model based on the user's message.
|
332 |
+
acceptance_criteria (str): Criteria that determines whether the output is acceptable or not.
|
333 |
+
initial_system_message (str): Initial instruction given to the language model before processing the user's message.
|
334 |
+
recursion_limit (int): The maximum number of times the MetaPromptGraph can call itself recursively.
|
335 |
+
max_output_age (int): The maximum age of output messages that should be considered in the conversation history.
|
336 |
+
model_name (str): The name of the language model to initialize and use for processing the user's message.
|
337 |
+
This should correspond to a key in the 'llms' section of the application's configuration.
|
338 |
+
|
339 |
+
Returns:
|
340 |
+
tuple: A tuple containing the best system message, output, analysis, and chat log in JSON format.
|
341 |
+
- best_system_message (str): The system message that resulted in the most appropriate response based on the acceptance criteria.
|
342 |
+
- best_output (str): The output generated by the language model that best meets the expected outcome and acceptance criteria.
|
343 |
+
- analysis (str): An analysis of how well the generated output matches the expected output and acceptance criteria.
|
344 |
+
- chat_log (list): A list containing JSON objects representing the conversation log, with each object containing a timestamp, logger name, levelname, and message.
|
345 |
+
"""
|
346 |
+
llm = initialize_llm(model_name)
|
347 |
return process_message(user_message, expected_output, acceptance_criteria, initial_system_message,
|
348 |
recursion_limit, max_output_age, llm)
|
349 |
|
350 |
|
351 |
def process_message_with_2_llms(user_message, expected_output, acceptance_criteria, initial_system_message,
|
352 |
recursion_limit: int, max_output_age: int,
|
353 |
+
optimizer_model_name: str, executor_model_name: str):
|
354 |
+
"""
|
355 |
+
Process a user message using two language models - one for optimization and another for execution.
|
356 |
+
|
357 |
+
This function initializes the specified optimizer and executor language models and then uses them to process
|
358 |
+
the user's message along with other provided input parameters such as expected output, acceptance criteria,
|
359 |
+
initial system message, recursion limit, and max output age. The result is obtained by calling the `process_message`
|
360 |
+
function with a dictionary of language models where all nodes except for NODE_PROMPT_EXECUTOR use the optimizer model
|
361 |
+
and NODE_PROMPT_EXECUTOR uses the executor model.
|
362 |
+
|
363 |
+
Args:
|
364 |
+
user_message (str): The user's input message to be processed by the language models.
|
365 |
+
expected_output (str): The anticipated response or outcome from the language models based on the user's message.
|
366 |
+
acceptance_criteria (str): Criteria that determines whether the output is acceptable or not.
|
367 |
+
initial_system_message (str): Initial instruction given to the language models before processing the user's message.
|
368 |
+
recursion_limit (int): The maximum number of times the MetaPromptGraph can call itself recursively.
|
369 |
+
max_output_age (int): The maximum age of output messages that should be considered in the conversation history.
|
370 |
+
optimizer_model_name (str): The name of the language model to initialize and use for optimization tasks like prompt development, analysis, and suggestion.
|
371 |
+
This should correspond to a key in the 'llms' section of the application's configuration.
|
372 |
+
executor_model_name (str): The name of the language model to initialize and use for execution tasks like running code or providing final outputs.
|
373 |
+
This should correspond to a key in the 'llms' section of the application's configuration.
|
374 |
+
|
375 |
+
Returns:
|
376 |
+
tuple: A tuple containing the best system message, output, analysis, and chat log in JSON format.
|
377 |
+
- best_system_message (str): The system message that resulted in the most appropriate response based on the acceptance criteria.
|
378 |
+
- best_output (str): The output generated by the language models that best meets the expected outcome and acceptance criteria.
|
379 |
+
- analysis (str): An analysis of how well the generated output matches the expected output and acceptance criteria.
|
380 |
+
- chat_log (list): A list containing JSON objects representing the conversation log, with each object containing a timestamp, logger name, levelname, and message.
|
381 |
+
"""
|
382 |
+
optimizer_model = initialize_llm(optimizer_model_name)
|
383 |
+
executor_model = initialize_llm(executor_model_name)
|
384 |
llms = {
|
385 |
NODE_PROMPT_INITIAL_DEVELOPER: optimizer_model,
|
386 |
NODE_PROMPT_DEVELOPER: optimizer_model,
|
|
|
389 |
NODE_PROMPT_ANALYZER: optimizer_model,
|
390 |
NODE_PROMPT_SUGGESTER: optimizer_model
|
391 |
}
|
|
|
392 |
return process_message(user_message, expected_output, acceptance_criteria, initial_system_message,
|
393 |
recursion_limit, max_output_age, llms)
|
394 |
|
395 |
|
396 |
def process_message_with_expert_llms(user_message, expected_output, acceptance_criteria, initial_system_message,
|
397 |
+
recursion_limit: int, max_output_age: int,
|
398 |
+
initial_developer_model_name: str, developer_model_name: str,
|
399 |
+
executor_model_name: str, output_history_analyzer_model_name: str,
|
400 |
+
analyzer_model_name: str, suggester_model_name: str):
|
401 |
+
"""
|
402 |
+
Process a user message using multiple expert language models.
|
403 |
+
|
404 |
+
This function initializes six expert language models based on their names and uses them to process the user's message
|
405 |
+
along with other provided input parameters such as expected output, acceptance criteria, initial system message,
|
406 |
+
recursion limit, and max output age. The result is obtained by calling the `process_message` function with a dictionary
|
407 |
+
of language models where each node uses a specific language model.
|
408 |
+
|
409 |
+
Args:
|
410 |
+
user_message (str): The user's input message to be processed by the language models.
|
411 |
+
expected_output (str): The anticipated response or outcome from the language models based on the user's message.
|
412 |
+
acceptance_criteria (str): Criteria that determines whether the output is acceptable or not.
|
413 |
+
initial_system_message (str): Initial instruction given to the language models before processing the user's message.
|
414 |
+
recursion_limit (int): The maximum number of times the MetaPromptGraph can call itself recursively.
|
415 |
+
max_output_age (int): The maximum age of output messages that should be considered in the conversation history.
|
416 |
+
initial_developer_model_name (str): The name of the language model to initialize and use for the initial developer node.
|
417 |
+
developer_model_name (str): The name of the language model to initialize and use for the developer node.
|
418 |
+
executor_model_name (str): The name of the language model to initialize and use for the executor node.
|
419 |
+
output_history_analyzer_model_name (str): The name of the language model to initialize and use for the output history analyzer node.
|
420 |
+
analyzer_model_name (str): The name of the language model to initialize and use for the analyzer node.
|
421 |
+
suggester_model_name (str): The name of the language model to initialize and use for the suggester node.
|
422 |
+
|
423 |
+
Returns:
|
424 |
+
tuple: A tuple containing the best system message, output, analysis, and chat log in JSON format.
|
425 |
+
- best_system_message (str): The system message that resulted in the most appropriate response based on the acceptance criteria.
|
426 |
+
- best_output (str): The output generated by the language models that best meets the expected outcome and acceptance criteria.
|
427 |
+
- analysis (str): An analysis of how well the generated output matches the expected output and acceptance criteria.
|
428 |
+
- chat_log (list): A list containing JSON objects representing the conversation log, with each object containing a timestamp, logger name, levelname, and message.
|
429 |
+
"""
|
430 |
llms = {
|
431 |
+
NODE_PROMPT_INITIAL_DEVELOPER: initialize_llm(initial_developer_model_name),
|
432 |
+
NODE_PROMPT_DEVELOPER: initialize_llm(developer_model_name),
|
433 |
+
NODE_PROMPT_EXECUTOR: initialize_llm(executor_model_name),
|
434 |
+
NODE_OUTPUT_HISTORY_ANALYZER: initialize_llm(output_history_analyzer_model_name),
|
435 |
+
NODE_PROMPT_ANALYZER: initialize_llm(analyzer_model_name),
|
436 |
+
NODE_PROMPT_SUGGESTER: initialize_llm(suggester_model_name)
|
437 |
}
|
|
|
438 |
return process_message(user_message, expected_output, acceptance_criteria, initial_system_message,
|
439 |
+
recursion_limit, max_output_age, llms)
|
440 |
|
441 |
|
442 |
class FileConfig(BaseConfig):
|
|
|
563 |
acceptance_criteria_input, initial_system_message_input],
|
564 |
value='Clear All')
|
565 |
with gr.Column():
|
566 |
+
system_message_output = gr.Textbox(
|
567 |
+
label="System Message", show_copy_button=True)
|
568 |
with gr.Row():
|
569 |
+
evaluate_system_message_button = gr.Button(
|
570 |
+
value="Evaluate", variant="secondary")
|
571 |
+
copy_to_initial_system_message_button = gr.Button(
|
572 |
+
value="Copy to Initial System Message", variant="secondary")
|
573 |
output_output = gr.Textbox(label="Output", show_copy_button=True)
|
574 |
+
analysis_output = gr.Textbox(
|
575 |
+
label="Analysis", show_copy_button=True)
|
576 |
+
flag_button = gr.Button(
|
577 |
+
value="Flag", variant="secondary", visible=config.allow_flagging)
|
578 |
with gr.Accordion("Details", open=False, visible=config.verbose):
|
579 |
logs_chatbot = gr.Chatbot(
|
580 |
label='Messages', show_copy_button=True, layout='bubble',
|
581 |
bubble_full_width=False, render_markdown=False
|
582 |
)
|
583 |
+
clear_logs_button = gr.ClearButton(
|
584 |
+
[logs_chatbot], value='Clear Logs')
|
585 |
|
586 |
# Load examples
|
587 |
examples = gr.Examples(config.examples_path, inputs=[
|
|
|
601 |
evaluate_initial_system_message_button.click(
|
602 |
evaluate_system_message,
|
603 |
inputs=[initial_system_message_input, user_message_input,
|
604 |
+
simple_model_name_input, advanced_executor_model_name_input, expert_prompt_executor_model_name_input],
|
605 |
outputs=[output_output]
|
606 |
)
|
607 |
evaluate_system_message_button.click(
|
608 |
evaluate_system_message,
|
609 |
inputs=[system_message_output, user_message_input,
|
610 |
+
simple_model_name_input, advanced_executor_model_name_input, expert_prompt_executor_model_name_input],
|
611 |
outputs=[output_output]
|
612 |
)
|
613 |
copy_to_initial_system_message_button.click(
|
meta_prompt/consts.py
CHANGED
@@ -108,7 +108,7 @@ You output the following analysis according to the Acceptance Criteria:
|
|
108 |
# Preferred Output ID: [ID]
|
109 |
```
|
110 |
|
111 |
-
If both outputs are
|
112 |
|
113 |
```
|
114 |
# Analysis
|
@@ -193,7 +193,7 @@ Provide your analysis in the following format:
|
|
193 |
* Provide your suggestions in a Markdown list, nothing else. Output only the suggestions related with Unacceptable Differences.
|
194 |
* Start every suggestion with `The System Message should ...`.
|
195 |
* Figue out the contexts of the System Message that conflict with the suggestions, and suggest modification or deletion.
|
196 |
-
*
|
197 |
* Avoiding the behavior should be explicitly requested (e.g. `The System Message should explicitly state that the output shoud not ...`) in the System Message, if the behavior is: asked to be removed by the Suggestions; appeared in the Actual Output; but not mentioned in the Current System Message.
|
198 |
* Expected Output text should not appear in System Message as an example. But it's OK to use some similar but distinct text as an example instead.
|
199 |
* Ask to remove the Expected Output text or text highly similar to Expected Output from System Message, if it's present.
|
|
|
108 |
# Preferred Output ID: [ID]
|
109 |
```
|
110 |
|
111 |
+
You must choose one of the two outputs. If both outputs are exacly the same, output the following:
|
112 |
|
113 |
```
|
114 |
# Analysis
|
|
|
193 |
* Provide your suggestions in a Markdown list, nothing else. Output only the suggestions related with Unacceptable Differences.
|
194 |
* Start every suggestion with `The System Message should ...`.
|
195 |
* Figue out the contexts of the System Message that conflict with the suggestions, and suggest modification or deletion.
|
196 |
+
* While the Expected Output won't be shown to the prompt developer who will read your suggestions, do not simply describe the output as being the same/similar/different from the Expected Output, such as `the output should not use a different format and style compared to the Expected Output` or `the output should match the expected output exactly`; instead, describe the expected characteristics specifically and suggest a detailed example.
|
197 |
* Avoiding the behavior should be explicitly requested (e.g. `The System Message should explicitly state that the output shoud not ...`) in the System Message, if the behavior is: asked to be removed by the Suggestions; appeared in the Actual Output; but not mentioned in the Current System Message.
|
198 |
* Expected Output text should not appear in System Message as an example. But it's OK to use some similar but distinct text as an example instead.
|
199 |
* Ask to remove the Expected Output text or text highly similar to Expected Output from System Message, if it's present.
|
meta_prompt/meta_prompt.py
CHANGED
@@ -12,6 +12,23 @@ from pydantic import BaseModel
|
|
12 |
from .consts import *
|
13 |
|
14 |
class AgentState(BaseModel):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
max_output_age: int = 0
|
16 |
user_message: Optional[str] = None
|
17 |
expected_output: Optional[str] = None
|
@@ -26,8 +43,30 @@ class AgentState(BaseModel):
|
|
26 |
best_output_age: int = 0
|
27 |
|
28 |
class MetaPromptGraph:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
@classmethod
|
30 |
def get_node_names(cls):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
return META_PROMPT_NODES
|
32 |
|
33 |
def __init__(self,
|
@@ -36,6 +75,17 @@ class MetaPromptGraph:
|
|
36 |
prompts: Dict[str, ChatPromptTemplate] = {},
|
37 |
logger: Optional[logging.Logger] = None,
|
38 |
verbose=False):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
self.logger = logger or logging.getLogger(__name__)
|
40 |
if self.logger is not None:
|
41 |
if verbose:
|
@@ -237,4 +287,4 @@ class MetaPromptGraph:
|
|
237 |
return "continue"
|
238 |
|
239 |
def _should_exit_on_acceptable_output(self, state: AgentState) -> str:
|
240 |
-
return "continue" if not state.accepted else END
|
|
|
12 |
from .consts import *
|
13 |
|
14 |
class AgentState(BaseModel):
|
15 |
+
"""
|
16 |
+
Represents the state of an agent in a conversation.
|
17 |
+
|
18 |
+
Attributes:
|
19 |
+
- max_output_age (int): The maximum age of the output.
|
20 |
+
- user_message (str, optional): The user's message.
|
21 |
+
- expected_output (str, optional): The expected output.
|
22 |
+
- acceptance_criteria (str, optional): The acceptance criteria.
|
23 |
+
- system_message (str, optional): The system message.
|
24 |
+
- output (str, optional): The output.
|
25 |
+
- suggestions (str, optional): The suggestions.
|
26 |
+
- accepted (bool): Whether the output is accepted.
|
27 |
+
- analysis (str, optional): The analysis.
|
28 |
+
- best_output (str, optional): The best output.
|
29 |
+
- best_system_message (str, optional): The best system message.
|
30 |
+
- best_output_age (int): The age of the best output.
|
31 |
+
"""
|
32 |
max_output_age: int = 0
|
33 |
user_message: Optional[str] = None
|
34 |
expected_output: Optional[str] = None
|
|
|
43 |
best_output_age: int = 0
|
44 |
|
45 |
class MetaPromptGraph:
|
46 |
+
"""
|
47 |
+
This class represents a graph for meta-prompting in a conversational AI system.
|
48 |
+
|
49 |
+
It manages the state of the conversation, including the user's message, expected output,
|
50 |
+
acceptance criteria, system message, output, suggestions, and analysis. The graph
|
51 |
+
consists of nodes that represent different stages of the conversation, such as
|
52 |
+
prompting the developer, executing the output, analyzing the output history, and
|
53 |
+
suggesting new prompts. The class provides methods to create the workflow,
|
54 |
+
initialize the graph, and invoke the graph with a given state.
|
55 |
+
|
56 |
+
The MetaPromptGraph class is responsible for orchestrating the conversation flow
|
57 |
+
and deciding the next step based on the current state of the conversation. It uses
|
58 |
+
language models and prompt templates to generate responses and analyze the output.
|
59 |
+
"""
|
60 |
@classmethod
|
61 |
def get_node_names(cls):
|
62 |
+
"""
|
63 |
+
Returns a list of node names in the meta-prompt graph.
|
64 |
+
|
65 |
+
This method is used to initialize the language models and prompt templates for each node in the graph.
|
66 |
+
|
67 |
+
Returns:
|
68 |
+
list: A list of node names.
|
69 |
+
"""
|
70 |
return META_PROMPT_NODES
|
71 |
|
72 |
def __init__(self,
|
|
|
75 |
prompts: Dict[str, ChatPromptTemplate] = {},
|
76 |
logger: Optional[logging.Logger] = None,
|
77 |
verbose=False):
|
78 |
+
"""
|
79 |
+
Initializes the MetaPromptGraph instance.
|
80 |
+
|
81 |
+
Args:
|
82 |
+
- llms (Union[BaseLanguageModel, Dict[str, BaseLanguageModel]], optional): The language models for the graph nodes. Defaults to {}.
|
83 |
+
- prompts (Dict[str, ChatPromptTemplate], optional): The custom prompt templates for the graph nodes. Defaults to {}.
|
84 |
+
- logger (Optional[logging.Logger], optional): The logger for the graph. Defaults to None.
|
85 |
+
- verbose (bool, optional): Whether to set the logger level to DEBUG. Defaults to False.
|
86 |
+
|
87 |
+
Initializes the logger, sets the language models and prompt templates for the graph nodes, and updates the prompt templates with custom ones if provided.
|
88 |
+
"""
|
89 |
self.logger = logger or logging.getLogger(__name__)
|
90 |
if self.logger is not None:
|
91 |
if verbose:
|
|
|
287 |
return "continue"
|
288 |
|
289 |
def _should_exit_on_acceptable_output(self, state: AgentState) -> str:
|
290 |
+
return "continue" if not state.accepted else END
|
tests/meta_prompt_graph_test.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import unittest
|
2 |
import pprint
|
3 |
import logging
|
|
|
4 |
from unittest.mock import MagicMock, Mock
|
5 |
from langchain_core.language_models import BaseLanguageModel
|
6 |
from langchain_openai import ChatOpenAI
|
@@ -168,7 +169,7 @@ class TestMetaPromptGraph(unittest.TestCase):
|
|
168 |
Mock(type="content", content="Here's one way: `my_list[::-1]`"), # NODE_PROMPT_EXECUTOR
|
169 |
Mock(type="content", content="Accept: Yes"), # NODE_PPROMPT_ANALYZER
|
170 |
]
|
171 |
-
llm.invoke =
|
172 |
|
173 |
meta_prompt_graph = MetaPromptGraph(llms=llm)
|
174 |
input_state = AgentState(
|
|
|
1 |
import unittest
|
2 |
import pprint
|
3 |
import logging
|
4 |
+
import functools
|
5 |
from unittest.mock import MagicMock, Mock
|
6 |
from langchain_core.language_models import BaseLanguageModel
|
7 |
from langchain_openai import ChatOpenAI
|
|
|
169 |
Mock(type="content", content="Here's one way: `my_list[::-1]`"), # NODE_PROMPT_EXECUTOR
|
170 |
Mock(type="content", content="Accept: Yes"), # NODE_PPROMPT_ANALYZER
|
171 |
]
|
172 |
+
llm.invoke = functools.partial(next, iter(responses))
|
173 |
|
174 |
meta_prompt_graph = MetaPromptGraph(llms=llm)
|
175 |
input_state = AgentState(
|