Spaces:
Running
Running
Ported Gradio meta-prompt to Streamlit.
Browse files- app/streamlit_meta_prompt.py +613 -0
- guidelines/gradio.md +29 -0
- guidelines/prompts.md +348 -0
- guidelines/streamlit.md +30 -0
app/streamlit_meta_prompt.py
ADDED
@@ -0,0 +1,613 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import json
|
3 |
+
import logging
|
4 |
+
from pathlib import Path
|
5 |
+
from typing import Any, Dict, List, Optional, Union
|
6 |
+
from langchain_core.language_models import BaseLanguageModel
|
7 |
+
from langchain_core.prompts import ChatPromptTemplate
|
8 |
+
from langchain_openai import ChatOpenAI
|
9 |
+
from meta_prompt import *
|
10 |
+
from pythonjsonlogger import jsonlogger
|
11 |
+
from app.config import MetaPromptConfig, RoleMessage
|
12 |
+
from confz import BaseConfig, CLArgSource, EnvSource, FileSource
|
13 |
+
import io
|
14 |
+
|
15 |
+
def prompt_templates_confz2langchain(
|
16 |
+
prompt_templates: Dict[str, Dict[str, List[RoleMessage]]]
|
17 |
+
) -> Dict[str, ChatPromptTemplate]:
|
18 |
+
return {
|
19 |
+
node: ChatPromptTemplate.from_messages(
|
20 |
+
[
|
21 |
+
(role_message.role, role_message.message)
|
22 |
+
for role_message in role_messages
|
23 |
+
]
|
24 |
+
)
|
25 |
+
for node, role_messages in prompt_templates.items()
|
26 |
+
}
|
27 |
+
|
28 |
+
class LLMModelFactory:
|
29 |
+
_instance = None
|
30 |
+
|
31 |
+
def __new__(cls):
|
32 |
+
if not cls._instance:
|
33 |
+
cls._instance = super(LLMModelFactory, cls).__new__(cls)
|
34 |
+
return cls._instance
|
35 |
+
|
36 |
+
def create(self, model_type: str, **kwargs) -> BaseLanguageModel:
|
37 |
+
model_class = globals()[model_type]
|
38 |
+
return model_class(**kwargs)
|
39 |
+
|
40 |
+
def chat_log_2_chatbot_list(chat_log: str) -> List[List[str]]:
|
41 |
+
chatbot_list = []
|
42 |
+
if chat_log is None or chat_log == '':
|
43 |
+
return chatbot_list
|
44 |
+
for line in chat_log.splitlines():
|
45 |
+
try:
|
46 |
+
json_line = json.loads(line)
|
47 |
+
if 'action' in json_line:
|
48 |
+
if json_line['action'] == 'invoke':
|
49 |
+
chatbot_list.append([json_line['message'], None])
|
50 |
+
if json_line['action'] == 'response':
|
51 |
+
chatbot_list.append([None, json_line['message']])
|
52 |
+
except json.decoder.JSONDecodeError as e:
|
53 |
+
print(f"Error decoding JSON log output: {e}")
|
54 |
+
print(line)
|
55 |
+
except KeyError as e:
|
56 |
+
print(f"Error accessing key in JSON log output: {e}")
|
57 |
+
print(line)
|
58 |
+
return chatbot_list
|
59 |
+
|
60 |
+
active_model_tab = "Simple"
|
61 |
+
|
62 |
+
def get_current_model(simple_model_name: str,
|
63 |
+
advanced_model_name: str,
|
64 |
+
expert_model_name: str,
|
65 |
+
expert_model_config: Optional[Dict[str, Any]] = None) -> BaseLanguageModel:
|
66 |
+
model_mapping = {
|
67 |
+
"Simple": simple_model_name,
|
68 |
+
"Advanced": advanced_model_name,
|
69 |
+
"Expert": expert_model_name
|
70 |
+
}
|
71 |
+
|
72 |
+
try:
|
73 |
+
model_name = model_mapping.get(active_model_tab, simple_model_name)
|
74 |
+
model = config.llms[model_name]
|
75 |
+
model_type = model.type
|
76 |
+
model_config = model.model_dump(exclude={'type'})
|
77 |
+
|
78 |
+
if active_model_tab == "Expert" and expert_model_config:
|
79 |
+
model_config.update(expert_model_config)
|
80 |
+
|
81 |
+
return LLMModelFactory().create(model_type, **model_config)
|
82 |
+
|
83 |
+
except KeyError as e:
|
84 |
+
logging.error(f"Configuration key error: {e}")
|
85 |
+
raise ValueError(f"Invalid model name or configuration: {e}")
|
86 |
+
|
87 |
+
except Exception as e:
|
88 |
+
logging.error(f"An unexpected error occurred: {e}")
|
89 |
+
raise RuntimeError(f"Failed to retrieve the model: {e}")
|
90 |
+
|
91 |
+
def evaluate_system_message(system_message, user_message, simple_model,
|
92 |
+
advanced_executor_model, expert_executor_model,
|
93 |
+
expert_executor_model_temperature=0.1):
|
94 |
+
llm = get_current_model(simple_model, advanced_executor_model,
|
95 |
+
expert_executor_model,
|
96 |
+
{"temperature": expert_executor_model_temperature})
|
97 |
+
template = ChatPromptTemplate.from_messages([
|
98 |
+
("system", "{system_message}"),
|
99 |
+
("human", "{user_message}")
|
100 |
+
])
|
101 |
+
try:
|
102 |
+
output = llm.invoke(template.format(
|
103 |
+
system_message=system_message, user_message=user_message))
|
104 |
+
return output.content if hasattr(output, 'content') else ""
|
105 |
+
except Exception as e:
|
106 |
+
raise st.error(f"Error: {e}")
|
107 |
+
|
108 |
+
def generate_acceptance_criteria(user_message, expected_output,
|
109 |
+
simple_model, advanced_executor_model,
|
110 |
+
expert_prompt_acceptance_criteria_model,
|
111 |
+
expert_prompt_acceptance_criteria_temperature=0.1,
|
112 |
+
prompt_template_group: Optional[str] = None):
|
113 |
+
log_stream = io.StringIO()
|
114 |
+
logger = logging.getLogger(MetaPromptGraph.__name__) if config.verbose else None
|
115 |
+
log_handler = logging.StreamHandler(log_stream) if logger else None
|
116 |
+
|
117 |
+
if log_handler:
|
118 |
+
log_handler.setFormatter(
|
119 |
+
jsonlogger.JsonFormatter('%(asctime)s %(name)s %(levelname)s %(message)s')
|
120 |
+
)
|
121 |
+
logger.addHandler(log_handler)
|
122 |
+
|
123 |
+
llm = get_current_model(simple_model, advanced_executor_model,
|
124 |
+
expert_prompt_acceptance_criteria_model,
|
125 |
+
{"temperature": expert_prompt_acceptance_criteria_temperature})
|
126 |
+
if prompt_template_group is None:
|
127 |
+
prompt_template_group = 'default'
|
128 |
+
prompt_templates = prompt_templates_confz2langchain(
|
129 |
+
config.prompt_templates[prompt_template_group]
|
130 |
+
)
|
131 |
+
acceptance_criteria_graph = MetaPromptGraph(llms={
|
132 |
+
NODE_ACCEPTANCE_CRITERIA_DEVELOPER: llm
|
133 |
+
}, prompts=prompt_templates,
|
134 |
+
verbose=config.verbose, logger=logger)
|
135 |
+
state = AgentState(
|
136 |
+
user_message=user_message,
|
137 |
+
expected_output=expected_output
|
138 |
+
)
|
139 |
+
output_state = acceptance_criteria_graph.run_acceptance_criteria_graph(state)
|
140 |
+
|
141 |
+
if log_handler:
|
142 |
+
log_handler.close()
|
143 |
+
log_output = log_stream.getvalue()
|
144 |
+
else:
|
145 |
+
log_output = None
|
146 |
+
return output_state.get('acceptance_criteria', ""), chat_log_2_chatbot_list(log_output)
|
147 |
+
|
148 |
+
def generate_initial_system_message(
|
149 |
+
user_message: str,
|
150 |
+
expected_output: str,
|
151 |
+
simple_model: str,
|
152 |
+
advanced_executor_model: str,
|
153 |
+
expert_prompt_initial_developer_model: str,
|
154 |
+
expert_prompt_initial_developer_temperature: float = 0.1,
|
155 |
+
prompt_template_group: Optional[str] = None
|
156 |
+
) -> tuple:
|
157 |
+
log_stream = io.StringIO()
|
158 |
+
logger = logging.getLogger(MetaPromptGraph.__name__) if config.verbose else None
|
159 |
+
log_handler = logging.StreamHandler(log_stream) if logger else None
|
160 |
+
|
161 |
+
if log_handler:
|
162 |
+
log_handler.setFormatter(
|
163 |
+
jsonlogger.JsonFormatter('%(asctime)s %(name)s %(levelname)s %(message)s')
|
164 |
+
)
|
165 |
+
logger.addHandler(log_handler)
|
166 |
+
|
167 |
+
llm = get_current_model(
|
168 |
+
simple_model,
|
169 |
+
advanced_executor_model,
|
170 |
+
expert_prompt_initial_developer_model,
|
171 |
+
{"temperature": expert_prompt_initial_developer_temperature}
|
172 |
+
)
|
173 |
+
|
174 |
+
if prompt_template_group is None:
|
175 |
+
prompt_template_group = 'default'
|
176 |
+
prompt_templates = prompt_templates_confz2langchain(
|
177 |
+
config.prompt_templates[prompt_template_group]
|
178 |
+
)
|
179 |
+
|
180 |
+
initial_system_message_graph = MetaPromptGraph(
|
181 |
+
llms={NODE_PROMPT_INITIAL_DEVELOPER: llm},
|
182 |
+
prompts=prompt_templates,
|
183 |
+
verbose=config.verbose,
|
184 |
+
logger=logger
|
185 |
+
)
|
186 |
+
|
187 |
+
state = AgentState(
|
188 |
+
user_message=user_message,
|
189 |
+
expected_output=expected_output
|
190 |
+
)
|
191 |
+
|
192 |
+
output_state = initial_system_message_graph.run_prompt_initial_developer_graph(state)
|
193 |
+
|
194 |
+
if log_handler:
|
195 |
+
log_handler.close()
|
196 |
+
log_output = log_stream.getvalue()
|
197 |
+
else:
|
198 |
+
log_output = None
|
199 |
+
|
200 |
+
system_message = output_state.get('system_message', "")
|
201 |
+
return system_message, chat_log_2_chatbot_list(log_output)
|
202 |
+
|
203 |
+
def process_message(
|
204 |
+
user_message: str, expected_output: str, acceptance_criteria: str,
|
205 |
+
initial_system_message: str, recursion_limit: int, max_output_age: int,
|
206 |
+
llms: Union[BaseLanguageModel, Dict[str, BaseLanguageModel]],
|
207 |
+
prompt_template_group: Optional[str] = None,
|
208 |
+
aggressive_exploration: bool = False
|
209 |
+
) -> tuple:
|
210 |
+
input_state = AgentState(
|
211 |
+
user_message=user_message,
|
212 |
+
expected_output=expected_output,
|
213 |
+
acceptance_criteria=acceptance_criteria,
|
214 |
+
system_message=initial_system_message,
|
215 |
+
max_output_age=max_output_age
|
216 |
+
)
|
217 |
+
|
218 |
+
log_stream = io.StringIO()
|
219 |
+
logger = logging.getLogger(MetaPromptGraph.__name__) if config.verbose else None
|
220 |
+
log_handler = logging.StreamHandler(log_stream) if logger else None
|
221 |
+
if log_handler:
|
222 |
+
log_handler.setFormatter(jsonlogger.JsonFormatter(
|
223 |
+
'%(asctime)s %(name)s %(levelname)s %(message)s'))
|
224 |
+
logger.addHandler(log_handler)
|
225 |
+
|
226 |
+
if prompt_template_group is None:
|
227 |
+
prompt_template_group = 'default'
|
228 |
+
prompt_templates = prompt_templates_confz2langchain(config.prompt_templates[prompt_template_group])
|
229 |
+
meta_prompt_graph = MetaPromptGraph(llms=llms, prompts=prompt_templates,
|
230 |
+
aggressive_exploration=aggressive_exploration,
|
231 |
+
verbose=config.verbose, logger=logger)
|
232 |
+
try:
|
233 |
+
output_state = meta_prompt_graph(input_state, recursion_limit=recursion_limit)
|
234 |
+
except Exception as e:
|
235 |
+
raise st.error(f"Error: {e}")
|
236 |
+
|
237 |
+
if log_handler:
|
238 |
+
log_handler.close()
|
239 |
+
log_output = log_stream.getvalue()
|
240 |
+
else:
|
241 |
+
log_output = None
|
242 |
+
|
243 |
+
system_message = output_state.get(
|
244 |
+
'best_system_message', "Error: The output state does not contain a valid 'best_system_message'")
|
245 |
+
output = output_state.get(
|
246 |
+
'best_output', "Error: The output state does not contain a valid 'best_output'")
|
247 |
+
analysis = output_state.get(
|
248 |
+
'analysis', "Error: The output state does not contain a valid 'analysis'")
|
249 |
+
acceptance_criteria = output_state.get(
|
250 |
+
'acceptance_criteria', "Error: The output state does not contain a valid 'acceptance_criteria'")
|
251 |
+
|
252 |
+
return (system_message, output, analysis, acceptance_criteria, chat_log_2_chatbot_list(log_output))
|
253 |
+
|
254 |
+
def initialize_llm(model_name: str, model_config: Optional[Dict[str, Any]] = None) -> Any:
|
255 |
+
try:
|
256 |
+
llm_config = config.llms[model_name]
|
257 |
+
model_type = llm_config.type
|
258 |
+
dumped_config = llm_config.model_dump(exclude={'type'})
|
259 |
+
|
260 |
+
if model_config:
|
261 |
+
dumped_config.update(model_config)
|
262 |
+
|
263 |
+
return LLMModelFactory().create(model_type, **dumped_config)
|
264 |
+
except KeyError:
|
265 |
+
raise KeyError(f"No configuration exists for the model name: {model_name}")
|
266 |
+
except NotImplementedError:
|
267 |
+
raise NotImplementedError(
|
268 |
+
f"Unrecognized type configured for the language model: {model_type}"
|
269 |
+
)
|
270 |
+
|
271 |
+
def process_message_with_single_llm(
|
272 |
+
user_message: str, expected_output: str, acceptance_criteria: str,
|
273 |
+
initial_system_message: str, recursion_limit: int, max_output_age: int,
|
274 |
+
model_name: str, prompt_template_group: Optional[str] = None,
|
275 |
+
aggressive_exploration: bool = False
|
276 |
+
) -> tuple:
|
277 |
+
llm = initialize_llm(model_name)
|
278 |
+
return process_message(
|
279 |
+
user_message, expected_output, acceptance_criteria, initial_system_message,
|
280 |
+
recursion_limit, max_output_age, llm, prompt_template_group, aggressive_exploration
|
281 |
+
)
|
282 |
+
|
283 |
+
def process_message_with_2_llms(
|
284 |
+
user_message: str, expected_output: str, acceptance_criteria: str,
|
285 |
+
initial_system_message: str, recursion_limit: int, max_output_age: int,
|
286 |
+
optimizer_model_name: str, executor_model_name: str,
|
287 |
+
prompt_template_group: Optional[str] = None,
|
288 |
+
aggressive_exploration: bool = False
|
289 |
+
) -> tuple:
|
290 |
+
optimizer_model = initialize_llm(optimizer_model_name)
|
291 |
+
executor_model = initialize_llm(executor_model_name)
|
292 |
+
llms = {
|
293 |
+
NODE_ACCEPTANCE_CRITERIA_DEVELOPER: optimizer_model,
|
294 |
+
NODE_PROMPT_INITIAL_DEVELOPER: optimizer_model,
|
295 |
+
NODE_PROMPT_DEVELOPER: optimizer_model,
|
296 |
+
NODE_PROMPT_EXECUTOR: executor_model,
|
297 |
+
NODE_OUTPUT_HISTORY_ANALYZER: optimizer_model,
|
298 |
+
NODE_PROMPT_ANALYZER: optimizer_model,
|
299 |
+
NODE_PROMPT_SUGGESTER: optimizer_model
|
300 |
+
}
|
301 |
+
return process_message(
|
302 |
+
user_message, expected_output, acceptance_criteria,
|
303 |
+
initial_system_message, recursion_limit, max_output_age, llms,
|
304 |
+
prompt_template_group, aggressive_exploration
|
305 |
+
)
|
306 |
+
|
307 |
+
def process_message_with_expert_llms(
|
308 |
+
user_message: str, expected_output: str, acceptance_criteria: str,
|
309 |
+
initial_system_message: str, recursion_limit: int, max_output_age: int,
|
310 |
+
initial_developer_model_name: str, initial_developer_temperature: float,
|
311 |
+
acceptance_criteria_model_name: str, acceptance_criteria_temperature: float,
|
312 |
+
developer_model_name: str, developer_temperature: float,
|
313 |
+
executor_model_name: str, executor_temperature: float,
|
314 |
+
output_history_analyzer_model_name: str, output_history_analyzer_temperature: float,
|
315 |
+
analyzer_model_name: str, analyzer_temperature: float,
|
316 |
+
suggester_model_name: str, suggester_temperature: float,
|
317 |
+
prompt_template_group: Optional[str] = None, aggressive_exploration: bool = False
|
318 |
+
) -> tuple:
|
319 |
+
llms = {
|
320 |
+
NODE_PROMPT_INITIAL_DEVELOPER: initialize_llm(
|
321 |
+
initial_developer_model_name, {"temperature": initial_developer_temperature}
|
322 |
+
),
|
323 |
+
NODE_ACCEPTANCE_CRITERIA_DEVELOPER: initialize_llm(
|
324 |
+
acceptance_criteria_model_name, {"temperature": acceptance_criteria_temperature}
|
325 |
+
),
|
326 |
+
NODE_PROMPT_DEVELOPER: initialize_llm(
|
327 |
+
developer_model_name, {"temperature": developer_temperature}
|
328 |
+
),
|
329 |
+
NODE_PROMPT_EXECUTOR: initialize_llm(
|
330 |
+
executor_model_name, {"temperature": executor_temperature}
|
331 |
+
),
|
332 |
+
NODE_OUTPUT_HISTORY_ANALYZER: initialize_llm(
|
333 |
+
output_history_analyzer_model_name,
|
334 |
+
{"temperature": output_history_analyzer_temperature}
|
335 |
+
),
|
336 |
+
NODE_PROMPT_ANALYZER: initialize_llm(
|
337 |
+
analyzer_model_name, {"temperature": analyzer_temperature}
|
338 |
+
),
|
339 |
+
NODE_PROMPT_SUGGESTER: initialize_llm(
|
340 |
+
suggester_model_name, {"temperature": suggester_temperature}
|
341 |
+
)
|
342 |
+
}
|
343 |
+
return process_message(
|
344 |
+
user_message,
|
345 |
+
expected_output,
|
346 |
+
acceptance_criteria,
|
347 |
+
initial_system_message,
|
348 |
+
recursion_limit,
|
349 |
+
max_output_age,
|
350 |
+
llms,
|
351 |
+
prompt_template_group,
|
352 |
+
aggressive_exploration
|
353 |
+
)
|
354 |
+
|
355 |
+
class FileConfig(BaseConfig):
|
356 |
+
config_file: str = 'config.yml' # default path
|
357 |
+
|
358 |
+
pre_config_sources = [
|
359 |
+
EnvSource(prefix='METAPROMPT_', allow_all=True),
|
360 |
+
CLArgSource()
|
361 |
+
]
|
362 |
+
pre_config = FileConfig(config_sources=pre_config_sources)
|
363 |
+
|
364 |
+
config_sources = [
|
365 |
+
FileSource(file=pre_config.config_file, optional=True),
|
366 |
+
EnvSource(prefix='METAPROMPT_', allow_all=True),
|
367 |
+
CLArgSource()
|
368 |
+
]
|
369 |
+
|
370 |
+
config = MetaPromptConfig(config_sources=config_sources)
|
371 |
+
|
372 |
+
# Streamlit UI
|
373 |
+
st.title("Meta Prompt")
|
374 |
+
st.markdown("A tool for generating and analyzing natural language prompts using multiple language models.")
|
375 |
+
|
376 |
+
with st.sidebar:
|
377 |
+
st.header("Model Settings")
|
378 |
+
model_tab = st.selectbox("Select Model Type", ["Simple", "Advanced", "Expert"], key="model_tab")
|
379 |
+
|
380 |
+
if model_tab == "Simple":
|
381 |
+
simple_model_name_input = st.selectbox(
|
382 |
+
"Model Name",
|
383 |
+
config.llms.keys(),
|
384 |
+
index=0,
|
385 |
+
)
|
386 |
+
elif model_tab == "Advanced":
|
387 |
+
advanced_optimizer_model_name_input = st.selectbox(
|
388 |
+
"Optimizer Model Name",
|
389 |
+
config.llms.keys(),
|
390 |
+
index=0,
|
391 |
+
)
|
392 |
+
advanced_executor_model_name_input = st.selectbox(
|
393 |
+
"Executor Model Name",
|
394 |
+
config.llms.keys(),
|
395 |
+
index=1,
|
396 |
+
)
|
397 |
+
else: # Expert
|
398 |
+
expert_prompt_initial_developer_model_name_input = st.selectbox(
|
399 |
+
"Initial Developer Model Name",
|
400 |
+
config.llms.keys(),
|
401 |
+
index=0,
|
402 |
+
)
|
403 |
+
expert_prompt_initial_developer_temperature_input = st.slider(
|
404 |
+
"Initial Developer Temperature", 0.0, 1.0, 0.1, 0.1
|
405 |
+
)
|
406 |
+
|
407 |
+
expert_prompt_acceptance_criteria_model_name_input = st.selectbox(
|
408 |
+
"Acceptance Criteria Model Name",
|
409 |
+
config.llms.keys(),
|
410 |
+
index=0,
|
411 |
+
)
|
412 |
+
expert_prompt_acceptance_criteria_temperature_input = st.slider(
|
413 |
+
"Acceptance Criteria Temperature", 0.0, 1.0, 0.1, 0.1
|
414 |
+
)
|
415 |
+
|
416 |
+
expert_prompt_developer_model_name_input = st.selectbox(
|
417 |
+
"Developer Model Name", config.llms.keys(), index=0
|
418 |
+
)
|
419 |
+
expert_prompt_developer_temperature_input = st.slider(
|
420 |
+
"Developer Temperature", 0.0, 1.0, 0.1, 0.1
|
421 |
+
)
|
422 |
+
|
423 |
+
expert_prompt_executor_model_name_input = st.selectbox(
|
424 |
+
"Executor Model Name", config.llms.keys(), index=1
|
425 |
+
)
|
426 |
+
expert_prompt_executor_temperature_input = st.slider(
|
427 |
+
"Executor Temperature", 0.0, 1.0, 0.1, 0.1
|
428 |
+
)
|
429 |
+
|
430 |
+
expert_prompt_output_history_analyzer_model_name_input = st.selectbox(
|
431 |
+
"Output History Analyzer Model Name",
|
432 |
+
config.llms.keys(),
|
433 |
+
index=0,
|
434 |
+
)
|
435 |
+
expert_prompt_output_history_analyzer_temperature_input = st.slider(
|
436 |
+
"Output History Analyzer Temperature", 0.0, 1.0, 0.1, 0.1
|
437 |
+
)
|
438 |
+
|
439 |
+
expert_prompt_analyzer_model_name_input = st.selectbox(
|
440 |
+
"Analyzer Model Name", config.llms.keys(), index=0
|
441 |
+
)
|
442 |
+
expert_prompt_analyzer_temperature_input = st.slider(
|
443 |
+
"Analyzer Temperature", 0.0, 1.0, 0.1, 0.1
|
444 |
+
)
|
445 |
+
|
446 |
+
expert_prompt_suggester_model_name_input = st.selectbox(
|
447 |
+
"Suggester Model Name", config.llms.keys(), index=0
|
448 |
+
)
|
449 |
+
expert_prompt_suggester_temperature_input = st.slider(
|
450 |
+
"Suggester Temperature", 0.0, 1.0, 0.1, 0.1
|
451 |
+
)
|
452 |
+
|
453 |
+
st.header("Prompt Template Settings")
|
454 |
+
prompt_template_group_input = st.selectbox(
|
455 |
+
"Prompt Template Group", config.prompt_templates.keys(), index=0
|
456 |
+
)
|
457 |
+
|
458 |
+
st.header("Advanced Settings")
|
459 |
+
recursion_limit_input = st.number_input("Recursion Limit", 1, 100, 16, 1)
|
460 |
+
max_output_age_input = st.number_input("Max Output Age", 1, 10, 2, 1)
|
461 |
+
aggressive_exploration_input = st.checkbox("Aggressive Exploration", False)
|
462 |
+
|
463 |
+
if __name__ == "__main__":
|
464 |
+
# Initialize session state
|
465 |
+
if 'initial_system_message' not in st.session_state:
|
466 |
+
st.session_state.initial_system_message = ""
|
467 |
+
if 'initial_acceptance_criteria' not in st.session_state:
|
468 |
+
st.session_state.initial_acceptance_criteria = ""
|
469 |
+
if 'system_message' not in st.session_state:
|
470 |
+
st.session_state.system_message = ""
|
471 |
+
if 'output' not in st.session_state:
|
472 |
+
st.session_state.output = ""
|
473 |
+
if 'analysis' not in st.session_state:
|
474 |
+
st.session_state.analysis = ""
|
475 |
+
if 'acceptance_criteria_output' not in st.session_state:
|
476 |
+
st.session_state.acceptance_criteria_output = ""
|
477 |
+
if 'chat_log' not in st.session_state:
|
478 |
+
st.session_state.chat_log = []
|
479 |
+
|
480 |
+
def copy_system_message():
|
481 |
+
if 'system_message_output' in st.session_state:
|
482 |
+
st.session_state.initial_system_message = st.session_state.system_message_output
|
483 |
+
|
484 |
+
def copy_acceptance_criteria():
|
485 |
+
if 'acceptance_criteria_output' in st.session_state:
|
486 |
+
st.session_state.initial_acceptance_criteria = st.session_state.acceptance_criteria_output
|
487 |
+
|
488 |
+
if active_model_tab == "Simple":
|
489 |
+
simple_model_name = simple_model_name_input
|
490 |
+
advanced_executor_model_name = None
|
491 |
+
expert_prompt_initial_developer_model_name = None
|
492 |
+
expert_prompt_acceptance_criteria_model_name = None
|
493 |
+
expert_prompt_developer_model_name = None
|
494 |
+
expert_prompt_executor_model_name = None
|
495 |
+
expert_prompt_output_history_analyzer_model_name = None
|
496 |
+
expert_prompt_analyzer_model_name = None
|
497 |
+
expert_prompt_suggester_model_name = None
|
498 |
+
elif active_model_tab == "Advanced":
|
499 |
+
simple_model_name = None
|
500 |
+
advanced_executor_model_name = advanced_executor_model_name_input
|
501 |
+
expert_prompt_initial_developer_model_name = None
|
502 |
+
expert_prompt_acceptance_criteria_model_name = None
|
503 |
+
expert_prompt_developer_model_name = None
|
504 |
+
expert_prompt_executor_model_name = None
|
505 |
+
expert_prompt_output_history_analyzer_model_name = None
|
506 |
+
expert_prompt_analyzer_model_name = None
|
507 |
+
expert_prompt_suggester_model_name = None
|
508 |
+
else: # Expert
|
509 |
+
simple_model_name = None
|
510 |
+
advanced_executor_model_name = None
|
511 |
+
expert_prompt_initial_developer_model_name = (
|
512 |
+
expert_prompt_initial_developer_model_name_input
|
513 |
+
)
|
514 |
+
expert_prompt_acceptance_criteria_model_name = (
|
515 |
+
expert_prompt_acceptance_criteria_model_name_input
|
516 |
+
)
|
517 |
+
expert_prompt_developer_model_name = expert_prompt_developer_model_name_input
|
518 |
+
expert_prompt_executor_model_name = expert_prompt_executor_model_name_input
|
519 |
+
expert_prompt_output_history_analyzer_model_name = (
|
520 |
+
expert_prompt_output_history_analyzer_model_name_input
|
521 |
+
)
|
522 |
+
expert_prompt_analyzer_model_name = expert_prompt_analyzer_model_name_input
|
523 |
+
expert_prompt_suggester_model_name = expert_prompt_suggester_model_name_input
|
524 |
+
|
525 |
+
prompt_template_group = prompt_template_group_input
|
526 |
+
recursion_limit = recursion_limit_input
|
527 |
+
max_output_age = max_output_age_input
|
528 |
+
aggressive_exploration = aggressive_exploration_input
|
529 |
+
|
530 |
+
col1, col2 = st.columns(2)
|
531 |
+
|
532 |
+
with col1:
|
533 |
+
user_message = st.text_area("User Message", "").strip()
|
534 |
+
expected_output = st.text_area("Expected Output", "").strip()
|
535 |
+
initial_system_message = st.text_area("Initial System Message", st.session_state.initial_system_message).strip()
|
536 |
+
acceptance_criteria = st.text_area("Acceptance Criteria", st.session_state.initial_acceptance_criteria).strip()
|
537 |
+
|
538 |
+
generate_button_clicked = st.button("Generate", type="primary")
|
539 |
+
|
540 |
+
with col2:
|
541 |
+
if generate_button_clicked:
|
542 |
+
try:
|
543 |
+
if active_model_tab == "Simple":
|
544 |
+
system_message, output, analysis, acceptance_criteria, chat_log = process_message_with_single_llm(
|
545 |
+
user_message,
|
546 |
+
expected_output,
|
547 |
+
acceptance_criteria,
|
548 |
+
initial_system_message,
|
549 |
+
recursion_limit,
|
550 |
+
max_output_age,
|
551 |
+
simple_model_name,
|
552 |
+
prompt_template_group,
|
553 |
+
aggressive_exploration,
|
554 |
+
)
|
555 |
+
elif active_model_tab == "Advanced":
|
556 |
+
system_message, output, analysis, acceptance_criteria, chat_log = process_message_with_2_llms(
|
557 |
+
user_message,
|
558 |
+
expected_output,
|
559 |
+
acceptance_criteria,
|
560 |
+
initial_system_message,
|
561 |
+
recursion_limit,
|
562 |
+
max_output_age,
|
563 |
+
advanced_optimizer_model_name_input,
|
564 |
+
advanced_executor_model_name_input,
|
565 |
+
prompt_template_group,
|
566 |
+
aggressive_exploration,
|
567 |
+
)
|
568 |
+
else: # Expert
|
569 |
+
system_message, output, analysis, acceptance_criteria, chat_log = process_message_with_expert_llms(
|
570 |
+
user_message,
|
571 |
+
expected_output,
|
572 |
+
acceptance_criteria,
|
573 |
+
initial_system_message,
|
574 |
+
recursion_limit,
|
575 |
+
max_output_age,
|
576 |
+
expert_prompt_initial_developer_model_name,
|
577 |
+
expert_prompt_initial_developer_temperature_input,
|
578 |
+
expert_prompt_acceptance_criteria_model_name,
|
579 |
+
expert_prompt_acceptance_criteria_temperature_input,
|
580 |
+
expert_prompt_developer_model_name,
|
581 |
+
expert_prompt_developer_temperature_input,
|
582 |
+
expert_prompt_executor_model_name,
|
583 |
+
expert_prompt_executor_temperature_input,
|
584 |
+
expert_prompt_output_history_analyzer_model_name,
|
585 |
+
expert_prompt_output_history_analyzer_temperature_input,
|
586 |
+
expert_prompt_analyzer_model_name,
|
587 |
+
expert_prompt_analyzer_temperature_input,
|
588 |
+
expert_prompt_suggester_model_name,
|
589 |
+
expert_prompt_suggester_temperature_input,
|
590 |
+
prompt_template_group,
|
591 |
+
aggressive_exploration,
|
592 |
+
)
|
593 |
+
|
594 |
+
st.session_state.system_message_output = system_message
|
595 |
+
st.session_state.output = output
|
596 |
+
st.session_state.analysis = analysis
|
597 |
+
st.session_state.acceptance_criteria_output = acceptance_criteria
|
598 |
+
st.session_state.chat_log = chat_log
|
599 |
+
|
600 |
+
except Exception as e:
|
601 |
+
st.error(f"Error: {e}")
|
602 |
+
|
603 |
+
st.text_area("System Message",
|
604 |
+
key="system_message_output", height=100)
|
605 |
+
st.button("Copy System Message", key="copy_system_message",
|
606 |
+
on_click=copy_system_message)
|
607 |
+
st.text_area("Output", st.session_state.output, height=100)
|
608 |
+
st.text_area("Analysis", st.session_state.analysis, height=100)
|
609 |
+
acceptance_criteria_output = st.text_area(
|
610 |
+
"Acceptance Criteria", key="acceptance_criteria_output", height=100)
|
611 |
+
st.button("Copy Acceptance Criteria", key="copy_acceptance_criteria",
|
612 |
+
on_click=copy_acceptance_criteria)
|
613 |
+
st.json(st.session_state.chat_log)
|
guidelines/gradio.md
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Gradio Development Guidelines
|
2 |
+
|
3 |
+
## Best Practices
|
4 |
+
|
5 |
+
- Use `gr.Blocks` to create a structured UI layout with rows, columns, tabs, and accordions.
|
6 |
+
- Organize related UI elements into groups using `gr.Group` for better readability and maintainability.
|
7 |
+
- Provide clear labels and instructions for user inputs and interactions using `gr.Markdown` and `gr.Textbox`.
|
8 |
+
- Use `gr.Dropdown` to allow users to select from a predefined list of options.
|
9 |
+
- Implement buttons with `gr.Button` and assign appropriate callbacks using the `click` event.
|
10 |
+
- Use `gr.Examples` to provide sample inputs for users to quickly test the app's functionality.
|
11 |
+
- Handle file uploads and downloads using `gr.FileUpload` and `gr.FileDownload`.
|
12 |
+
- Display output using appropriate components like `gr.Textbox`, `gr.Chatbot`, `gr.Dataframe`, etc.
|
13 |
+
- Implement flagging functionality to allow users to report issues or provide feedback.
|
14 |
+
- Use `gr.Accordion` to hide detailed information that may not be necessary for all users.
|
15 |
+
- Provide a clear and concise title for the app using the `title` parameter in `gr.Blocks`.
|
16 |
+
- Use `gr.ClearButton` to allow users to reset input fields and start over.
|
17 |
+
|
18 |
+
## Principles
|
19 |
+
|
20 |
+
- Prioritize usability and user experience in the app's design and layout.
|
21 |
+
- Ensure the app is responsive and works well on different screen sizes and devices.
|
22 |
+
- Optimize performance by minimizing unnecessary computations and caching results when possible.
|
23 |
+
- Follow PEP 8 style guidelines for Python code.
|
24 |
+
- Document the app's purpose, usage instructions, and code to enhance maintainability.
|
25 |
+
- Test the app thoroughly to identify and fix bugs, edge cases, and performance issues.
|
26 |
+
- Consider accessibility and ensure the app can be used by people with different abilities.
|
27 |
+
- Provide clear feedback to users about the app's status and results.
|
28 |
+
- Allow users to customize the app's behavior through settings and options.
|
29 |
+
- Design the app to be modular and extensible, allowing for future enhancements and new features.
|
guidelines/prompts.md
ADDED
@@ -0,0 +1,348 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Prompts and Guidelines
|
2 |
+
|
3 |
+
## Prompts
|
4 |
+
|
5 |
+
### Prompt Initial Developer
|
6 |
+
- **gpt**:
|
7 |
+
- role: system
|
8 |
+
message: |
|
9 |
+
# Expert Prompt Engineer
|
10 |
+
|
11 |
+
You are an expert prompt engineer tasked with creating system messages for AI assistants.
|
12 |
+
|
13 |
+
## Instructions
|
14 |
+
|
15 |
+
1. Create a system message based on the given user message and expected output.
|
16 |
+
2. Ensure the system message can handle similar user messages.
|
17 |
+
3. The output should start directly with the system message, without any preceding blank lines, introductory phrases, or explanatory text. Do not include extra lines at the beginning or end of the output.
|
18 |
+
4. Expected Output text should not appear in System Message as an example. But it's OK to use some similar text as an example instead.
|
19 |
+
5. In the System Message, do not use `Expected Output` to refer to the example you want to illustrate. Instead, directly describe the specific features you need.
|
20 |
+
6. Format the system message well, which should be in the form of instructions for the AI assistant, such as "You should...". Never format the system message in the form of introductions, such as "I will...".
|
21 |
+
|
22 |
+
## Output
|
23 |
+
|
24 |
+
Provide only the system message, adhering to the above guidelines.
|
25 |
+
|
26 |
+
### Prompt Developer
|
27 |
+
- **gpt**:
|
28 |
+
- role: system
|
29 |
+
message: |
|
30 |
+
# Expert Prompt Engineer
|
31 |
+
|
32 |
+
You are an expert prompt engineer tasked with updating system messages for AI assistants. You Update System Message according to Suggestions, to improve Output and match Expected Output more closely.
|
33 |
+
|
34 |
+
## Instructions
|
35 |
+
|
36 |
+
1. Update the system message based on the given Suggestion, User Message, and Expected Output.
|
37 |
+
2. Ensure the updated system message can handle similar user messages.
|
38 |
+
3. Modify only the content mentioned in the Suggestion. Do not change the parts that are not related to the Suggestion.
|
39 |
+
4. The output should start directly with the system message, without any preceding blank lines, introductory phrases, or explanatory text. Do not include extra lines at the beginning or end of the output.
|
40 |
+
5. Avoiding the behavior should be explicitly requested (e.g. `Don't ...`) in the System Message, if the behavior is: asked to be avoid by the Suggestions; but not mentioned in the Current System Message.
|
41 |
+
6. Expected Output text should not appear in System Message as an example. But it's OK to use some similar text as an example instead.
|
42 |
+
7. In the System Message, do not use `Expected Output` to refer to the example you want to illustrate. Instead, directly describe the specific features you need.
|
43 |
+
8. Remove the Expected Output text or text highly similar to Expected Output from System Message, if it's present.
|
44 |
+
9. Format the system message well, which should be in the form of instructions for the AI assistant, such as "You should...". Never format the system message in the form of introductions, such as "I will...".
|
45 |
+
|
46 |
+
## Output
|
47 |
+
|
48 |
+
Provide only the updated System Message, adhering to the above guidelines.
|
49 |
+
|
50 |
+
### Prompt Executor
|
51 |
+
- **gpt**:
|
52 |
+
- role: system
|
53 |
+
message: "{system_message}"
|
54 |
+
- role: human
|
55 |
+
message: "{user_message}"
|
56 |
+
|
57 |
+
### Output History Analyzer
|
58 |
+
- **gpt**:
|
59 |
+
- role: system
|
60 |
+
message: |
|
61 |
+
You are a text comparing program. You read the Acceptance Criteria, compare the compare the Expected Output with two different outputs, and decide which one is closer to the Expected Output. When comparing the outputs, ignore the differences which are acceptable or ignorable according to the Acceptance Criteria.
|
62 |
+
|
63 |
+
You output the following analysis according to the Acceptance Criteria:
|
64 |
+
|
65 |
+
* Your analysis in a Markdown list.
|
66 |
+
* Indicates an output ID that is closer to the Expected Output, in the following format:
|
67 |
+
|
68 |
+
```
|
69 |
+
# Analysis
|
70 |
+
|
71 |
+
...
|
72 |
+
|
73 |
+
# Output ID closer to Expected Output: [ID]
|
74 |
+
```
|
75 |
+
|
76 |
+
You must choose one of the two outputs. If both outputs are exactly the same, output the following:
|
77 |
+
|
78 |
+
```
|
79 |
+
# Analysis
|
80 |
+
|
81 |
+
...
|
82 |
+
|
83 |
+
# Draw
|
84 |
+
```
|
85 |
+
|
86 |
+
### Prompt Analyzer
|
87 |
+
- **gpt**:
|
88 |
+
- role: system
|
89 |
+
message: |
|
90 |
+
You are a text comparing program. You compare the following output texts, analysis the System Message and provide a detailed analysis according to [`Acceptance Criteria`]. Then you decide whether [`Actual Output`] is acceptable.
|
91 |
+
|
92 |
+
Provide your analysis in the following format:
|
93 |
+
|
94 |
+
```
|
95 |
+
- Acceptable Differences: [List acceptable differences succinctly]
|
96 |
+
- Unacceptable Differences: [List unacceptable differences succinctly]
|
97 |
+
- Accept: [Yes/No]
|
98 |
+
```
|
99 |
+
|
100 |
+
* Compare Expected Output and Actual Output with the guidance of Accept Criteria.
|
101 |
+
* Only set 'Accept' to 'Yes', if Accept Criteria are all met. Otherwise, set 'Accept' to 'No'.
|
102 |
+
* List only the acceptable differences according to Accept Criteria in 'acceptable Differences' section.
|
103 |
+
* List only the unacceptable differences according to Accept Criteria in 'Unacceptable Differences' section.
|
104 |
+
|
105 |
+
# Acceptance Criteria
|
106 |
+
|
107 |
+
Compared with Expected Output [EO]:
|
108 |
+
```
|
109 |
+
{acceptance_criteria}
|
110 |
+
```
|
111 |
+
|
112 |
+
### Prompt Suggester
|
113 |
+
- **gpt**:
|
114 |
+
- role: system
|
115 |
+
message: |
|
116 |
+
Read the following inputs and outputs of an LLM prompt, and also analysis about them. Then suggest how to improve System Message.
|
117 |
+
|
118 |
+
* The goal is to improve the System Message to match the Expected Output better.
|
119 |
+
* Ignore all Acceptable Differences and focus on Unacceptable Differences.
|
120 |
+
* Suggest formal changes first, then semantic changes.
|
121 |
+
* Provide your suggestions in a Markdown list, nothing else. Output only the suggestions related with Unacceptable Differences.
|
122 |
+
* Start every suggestion with [`The System Message should ...`].
|
123 |
+
* Figue out the contexts of the System Message that conflict with the suggestions, and suggest modification or deletion.
|
124 |
+
* While the Expected Output won't be shown to the prompt developer who will read your suggestions, do not simply describe the output as being the same/similar/different from the Expected Output, such as [`the output should not use a different format and style compared to the Expected Output`] or [`the output should match the expected output exactly`]; instead, describe the expected characteristics specifically and suggest a detailed example.
|
125 |
+
* Avoiding the behavior should be explicitly requested (e.g. [`The System Message should explicitly state that the output shoud not ...`]) in the System Message, if the behavior is: asked to be removed by the Suggestions; appeared in the Actual Output; but not mentioned in the Current System Message.
|
126 |
+
* Expected Output text should not appear in System Message as an example. But it's OK to use some similar but distinct text as an example instead.
|
127 |
+
* Ask to remove the Expected Output text or text highly similar to Expected Output from System Message, if it's present.
|
128 |
+
* Provide format examples (but don't use Expected Output text as the example) or detected format name, if System Message does not.
|
129 |
+
* Specify the detected format name (e.g. XML, JSON, etc.) of Expected Output, if System Message does not mention it.
|
130 |
+
|
131 |
+
### Acceptance Criteria Developer
|
132 |
+
- **gpt**:
|
133 |
+
- role: system
|
134 |
+
message: |
|
135 |
+
# Acceptance Criteria Developer
|
136 |
+
|
137 |
+
You are an acceptance criteria developer. You will receive a specific example of a task type to create acceptance criteria. You will respond directly with the acceptance criteria.
|
138 |
+
|
139 |
+
## Instructions
|
140 |
+
|
141 |
+
The user will provide you a specific example with User Message (input) and Expected Output (output) of a task type. You will respond with acceptance criteria for the task type, by comparing with Expected Output (which may be referenced as EO), includes the following:
|
142 |
+
|
143 |
+
* What the output should include
|
144 |
+
* What the output should not include
|
145 |
+
* Language requirements
|
146 |
+
* Formatting requirements
|
147 |
+
* Structure requirements
|
148 |
+
* Style requirements
|
149 |
+
* Any specific requirements
|
150 |
+
|
151 |
+
## Output
|
152 |
+
|
153 |
+
Create acceptance criteria in the following format:
|
154 |
+
|
155 |
+
```
|
156 |
+
# Acceptance Criteria
|
157 |
+
|
158 |
+
* [Overall Criteria]
|
159 |
+
* ...
|
160 |
+
* Unacceptable differences (compared with EO):
|
161 |
+
* ...
|
162 |
+
* Acceptable differences (compared with EO):
|
163 |
+
* ...
|
164 |
+
```
|
165 |
+
|
166 |
+
Focus on `Unacceptable differences` and `Acceptable differences`. Keep Overall Criteria brief (no more than 50 words).
|
167 |
+
|
168 |
+
### Task Description Generator
|
169 |
+
- **gpt**:
|
170 |
+
- role: system
|
171 |
+
message: |
|
172 |
+
Given the JSON example(s) for a task type:
|
173 |
+
|
174 |
+
{raw_example}
|
175 |
+
|
176 |
+
Provide a concise description of the task type, including the format and style
|
177 |
+
of the input and output. If there are multiple examples, provide an overall
|
178 |
+
description and ignore unique parts.
|
179 |
+
|
180 |
+
Format your response as follows:
|
181 |
+
Task Description: [Your description here]
|
182 |
+
|
183 |
+
### Task Description Updater
|
184 |
+
- **gpt**:
|
185 |
+
- role: system
|
186 |
+
message: |
|
187 |
+
Given the task type description and suggestions, update the task type description according to the suggestions.
|
188 |
+
|
189 |
+
1. Input Information:
|
190 |
+
- You will receive a task type description and suggestions for updating the description.
|
191 |
+
- Carefully read and understand the provided information.
|
192 |
+
|
193 |
+
2. Task Analysis:
|
194 |
+
- Identify the core elements and characteristics of the task.
|
195 |
+
- Consider possible generalization dimensions such as task domain, complexity, input/output format, application scenarios, etc.
|
196 |
+
|
197 |
+
3. Update Task Description:
|
198 |
+
- Apply the suggestions to update the task description. Don't change anything that is not suggested.
|
199 |
+
- Ensure the updated description is clear, specific, and directly related to the task.
|
200 |
+
|
201 |
+
4. Output Format:
|
202 |
+
- Format your response as follows:
|
203 |
+
|
204 |
+
Task Description: [Your updated description here]
|
205 |
+
|
206 |
+
- Output the updated `Task Description` only. Don't output anything else.
|
207 |
+
|
208 |
+
5. Completeness Check:
|
209 |
+
- Ensure all important aspects of the task description are covered.
|
210 |
+
- Check for any missing key information or dimensions.
|
211 |
+
|
212 |
+
6. Quantity Requirement:
|
213 |
+
- Provide at least 5 specification suggestions across different dimensions.
|
214 |
+
|
215 |
+
### Specification Suggestions Generator
|
216 |
+
- **gpt**:
|
217 |
+
- role: system
|
218 |
+
message: |
|
219 |
+
{{
|
220 |
+
"prompt": "Generate suggestions to narrow the task scope for a given task type and example:\n\n1. Analyze the task description and input/output examples.\n2. Identify 3~5 relevant dimensions (e.g., purpose, input/output format, language, steps, criteria, constraints).\n3. Create 3~5 actionable suggestions (no more than 20 words for each) to narrow the task scope based on the above dimensions. Make sure the suggestions are compatible with the provided example.\n4. Start each suggestion with a verb.\n5. Output in JSON format, following `output_format`.\n",
|
221 |
+
"output_format": "{{\n \"dimensions\": [\n {{ \"dimension\": \"...\" }},\n {{ \"dimension\": \"...\" }}\n ],\n \"suggestions\": [\n {{ \"suggestion\": \"...\" }},\n {{ \"suggestion\": \"...\" }}\n ]\n}}\n",
|
222 |
+
"task_description": "\n{description}\n",
|
223 |
+
"examples": "\n{raw_example}\n"
|
224 |
+
}}
|
225 |
+
|
226 |
+
### Generalization Suggestions Generator
|
227 |
+
- **gpt**:
|
228 |
+
- role: system
|
229 |
+
message: |
|
230 |
+
{{
|
231 |
+
"prompt": "Generate task generalization suggestions for a given task type and example:\n\n1. Analyze the task description and input/output examples.\n2. Identify 3~5 relevant dimensions (e.g., purpose, input/output format, language, steps, criteria, constraints).\n3. Create 3~5 actionable suggestions (no more than 20 words for each) to expand the scope of the task based on the above dimensions. Make sure the suggestions are compatible with the provided example.\n4. Start each suggestion with a verb.\n5. Output in JSON format, following `output_format`.\n",
|
232 |
+
"output_format": "{{\n \"dimensions\": [\n {{ \"dimension\": \"...\" }},\n {{ \"dimension\": \"...\" }}\n ],\n \"suggestions\": [\n {{ \"suggestion\": \"...\" }},\n {{ \"suggestion\": \"...\" }}\n ]\n}}\n",
|
233 |
+
"task_description": "\n{description}\n",
|
234 |
+
"examples": "\n{raw_example}\n"
|
235 |
+
}}
|
236 |
+
|
237 |
+
### Input Analyzer
|
238 |
+
- **gpt**:
|
239 |
+
- role: system
|
240 |
+
message: |
|
241 |
+
For the specific task type, analyze the possible task inputs across multiple dimensions.
|
242 |
+
|
243 |
+
Conduct a detailed analysis and enumerate:
|
244 |
+
|
245 |
+
1. Core Attributes: Identify the fundamental properties or characteristics of this input type.
|
246 |
+
1. Variation Dimensions: For each dimension that may vary, specify:
|
247 |
+
- Dimension name
|
248 |
+
- Possible range of values or options
|
249 |
+
- Impact on input nature or task difficulty
|
250 |
+
1. Constraints: List any rules or limitations that must be adhered to.
|
251 |
+
1. Edge Cases: Describe extreme or special scenarios that may test the robustness of task processing.
|
252 |
+
1. External Factors: Enumerate factors that might influence input generation or task completion.
|
253 |
+
1. Potential Extensions: Propose ways to expand or modify this input type to create new variants.
|
254 |
+
|
255 |
+
Format your response as follows:
|
256 |
+
Input Analysis: [Your analysis here]
|
257 |
+
|
258 |
+
### Briefs Generator
|
259 |
+
- **gpt**:
|
260 |
+
- role: system
|
261 |
+
message: |
|
262 |
+
{{
|
263 |
+
"prompt": "Given the task type description, and input analysis, generate descriptions for {generating_batch_size} new examples with detailed attributes based on this task type. But don't provide any detailed task output.\n\nUse the input analysis to create diverse and comprehensive example briefs that cover various input dimensions and attribute ranges.\n\nFormat your response as a JSON object following `output_format`.",
|
264 |
+
"output_format": "{{
|
265 |
+
"new_example_briefs": [
|
266 |
+
{{
|
267 |
+
"example_brief": "..."
|
268 |
+
}},
|
269 |
+
{{
|
270 |
+
"example_brief": "..."
|
271 |
+
}},
|
272 |
+
...
|
273 |
+
]
|
274 |
+
}},
|
275 |
+
"task_description": "{description}",
|
276 |
+
"input_analysis": "{input_analysis}",
|
277 |
+
"generating_batch_size": "{generating_batch_size}"
|
278 |
+
}}
|
279 |
+
|
280 |
+
### Examples From Briefs Generator
|
281 |
+
- **gpt**:
|
282 |
+
- role: system
|
283 |
+
message: |
|
284 |
+
{{
|
285 |
+
"prompt": "Given the task type description, brief descriptions for new examples, and JSON example(s), generate {generating_batch_size} more input/output examples for this task type, strictly based on the brief descriptions. Ensure that the new examples are consistent with the brief descriptions and do not introduce any new information not present in the briefs. Output in JSON format, following `output_format`.",
|
286 |
+
"output_format": "{{
|
287 |
+
"examples": [
|
288 |
+
{{
|
289 |
+
"input": "...",
|
290 |
+
"output": "..."
|
291 |
+
}},
|
292 |
+
{{
|
293 |
+
"input": "...",
|
294 |
+
"output": "..."
|
295 |
+
}},
|
296 |
+
...
|
297 |
+
]
|
298 |
+
}},
|
299 |
+
"task_description": "{description}",
|
300 |
+
"new_example_briefs": {new_example_briefs},
|
301 |
+
"raw_example": "{raw_example}"
|
302 |
+
}}
|
303 |
+
|
304 |
+
### Examples Directly Generator
|
305 |
+
- **gpt**:
|
306 |
+
- role: system
|
307 |
+
message: |
|
308 |
+
{{
|
309 |
+
"prompt": "Given the task type description, and input/output example(s), generate {generating_batch_size} new input/output examples for this task type. Output in JSON format, following `output_format`.",
|
310 |
+
"output_format": "{{
|
311 |
+
"examples": [
|
312 |
+
{{
|
313 |
+
"input": "...",
|
314 |
+
"output": "..."
|
315 |
+
}},
|
316 |
+
{{
|
317 |
+
"input": "...",
|
318 |
+
"output": "..."
|
319 |
+
}},
|
320 |
+
...
|
321 |
+
]
|
322 |
+
}},
|
323 |
+
"task_description": "{description}",
|
324 |
+
"examples": "{raw_example}"
|
325 |
+
}}
|
326 |
+
|
327 |
+
## Guidelines
|
328 |
+
|
329 |
+
1. **Clarity and Specificity**: Prompts should be clear and specific to guide the AI assistant effectively. Avoid ambiguity in instructions and examples.
|
330 |
+
|
331 |
+
2. **Consistency**: Maintain consistency in the format and style of prompts across different tasks to ensure predictable behavior from the AI assistant.
|
332 |
+
|
333 |
+
3. **Avoidance of Expected Output**: Do not include the expected output directly in the prompt. Instead, describe the desired characteristics and format.
|
334 |
+
|
335 |
+
4. **Focus on System Message**: The system message should be formatted as instructions for the AI assistant, not as an introduction or explanation.
|
336 |
+
|
337 |
+
5. **Modularity and Reusability**: Prompts should be modular and reusable, allowing for easy adaptation and combination for different tasks.
|
338 |
+
|
339 |
+
6. **Detailed Analysis**: For input analysis, consider multiple dimensions such as core attributes, variation dimensions, constraints, edge cases, external factors, and potential extensions.
|
340 |
+
|
341 |
+
7. **Actionable Suggestions**: When generating suggestions, ensure they are actionable, concise, and start with a verb. Focus on relevant dimensions to narrow or expand the task scope.
|
342 |
+
|
343 |
+
8. **JSON Format for Complex Outputs**: Use JSON format for prompts that require complex outputs, ensuring structured and machine-readable responses.
|
344 |
+
|
345 |
+
9. **Batch Generation**: When generating multiple examples or briefs, specify the batch size to control the quantity and diversity of outputs.
|
346 |
+
|
347 |
+
10. **Validation and Retry Mechanisms**: Implement validation and retry mechanisms for prompts that may fail due to incorrect formats or other errors, ensuring robustness and reliability.
|
348 |
+
|
guidelines/streamlit.md
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Streamlit Development Guidelines
|
2 |
+
|
3 |
+
## Best Practices
|
4 |
+
|
5 |
+
- Use `st.session_state` to store and share data across the app, such as input data, output results, and UI state.
|
6 |
+
- Organize code into functions for better readability and maintainability.
|
7 |
+
- Use `st.expander` to group related UI elements and allow users to collapse/expand sections.
|
8 |
+
- Provide options for users to import/export data, such as using `st.file_uploader` and `st.download_button`.
|
9 |
+
- Use `st.columns` to create responsive layouts and align UI elements.
|
10 |
+
- Provide clear labels and instructions for user inputs and interactions.
|
11 |
+
- Handle exceptions and display user-friendly error messages using `st.warning`.
|
12 |
+
- Use `st.spinner` to indicate when long-running operations are in progress.
|
13 |
+
- Allow users to customize and control the app's behavior through widgets like `st.slider`, `st.selectbox`, etc.
|
14 |
+
- Use `st.dataframe` to display interactive tables, with features like row selection.
|
15 |
+
- Implement callbacks using `on_click` or `on_change` to respond to user interactions.
|
16 |
+
- Use `st.sidebar` to display additional information or controls without cluttering the main UI.
|
17 |
+
- Organize the app's UI elements in a logical order, grouping related functionality together.
|
18 |
+
|
19 |
+
## Principles
|
20 |
+
|
21 |
+
- Prioritize usability and user experience in the app's design and layout.
|
22 |
+
- Ensure the app is responsive and works well on different screen sizes.
|
23 |
+
- Optimize performance by minimizing unnecessary computations and caching results when possible.
|
24 |
+
- Follow PEP 8 style guidelines for Python code.
|
25 |
+
- Document the app's purpose, usage instructions, and code to enhance maintainability.
|
26 |
+
- Test the app thoroughly to identify and fix bugs, edge cases, and performance issues.
|
27 |
+
- Consider accessibility and ensure the app can be used by people with different abilities.
|
28 |
+
- Provide clear feedback to users about the app's status and results.
|
29 |
+
- Allow users to customize the app's behavior through settings and options.
|
30 |
+
- Design the app to be modular and extensible, allowing for future enhancements and new features.
|