Spaces:
Running
Running
Merge branch 'lib' into hf-main
Browse files- .github/workflows/docker-image.yml +41 -0
- .gitignore +7 -0
- Dockerfile +20 -0
- LICENSE +21 -0
- README.md +262 -8
- app/config.py +18 -0
- app/examples/log.csv +236 -0
- app/gradio_meta_prompt.py +103 -0
- config.yml +39 -0
- demo/cot_meta_prompt.ipynb +69 -0
- demo/cot_meta_prompt.py +61 -0
- demo/default_meta_prompts.py +219 -0
- demo/langgraph_meta_prompt.ipynb +0 -0
- demo/prompt_ui.py +712 -0
- meta_prompt/__init__.py +4 -0
- meta_prompt/meta_prompt.py +442 -0
- poetry.lock +0 -0
- pyproject.toml +24 -0
- requirements.txt +122 -0
- tests/meta_prompt_graph_test.py +160 -0
.github/workflows/docker-image.yml
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Docker Image CI
|
2 |
+
|
3 |
+
on:
|
4 |
+
push:
|
5 |
+
branches: [ "main" ]
|
6 |
+
pull_request:
|
7 |
+
branches: [ "main" ]
|
8 |
+
|
9 |
+
jobs:
|
10 |
+
|
11 |
+
# build:
|
12 |
+
|
13 |
+
# runs-on: ubuntu-latest
|
14 |
+
|
15 |
+
# steps:
|
16 |
+
# - uses: actions/checkout@v3
|
17 |
+
# - name: Build the Docker image
|
18 |
+
# run: docker build . --file Dockerfile --tag yaleh/meta-prompt:$(date +%s)
|
19 |
+
|
20 |
+
build-and-publish:
|
21 |
+
runs-on: ubuntu-latest
|
22 |
+
|
23 |
+
steps:
|
24 |
+
- name: Checkout code
|
25 |
+
uses: actions/checkout@v2
|
26 |
+
|
27 |
+
- name: Login to Docker Hub
|
28 |
+
uses: docker/login-action@v1
|
29 |
+
with:
|
30 |
+
username: ${{ secrets.DOCKER_USERNAME }}
|
31 |
+
password: ${{ secrets.DOCKER_PASSWORD }}
|
32 |
+
|
33 |
+
- name: Build and Publish Docker image
|
34 |
+
uses: docker/build-push-action@v2
|
35 |
+
with:
|
36 |
+
context: .
|
37 |
+
push: true
|
38 |
+
tags: |
|
39 |
+
yaleh/meta-prompt:${{ github.sha }}
|
40 |
+
${{ github.ref == 'refs/heads/main' && 'yaleh/meta-prompt:latest' || '' }}
|
41 |
+
|
.gitignore
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.venv
|
2 |
+
.vscode
|
3 |
+
__pycache__
|
4 |
+
.env
|
5 |
+
config.yml.debug
|
6 |
+
debug.yml
|
7 |
+
dist
|
Dockerfile
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Use an official Python runtime as the base image
|
2 |
+
FROM python:3.10
|
3 |
+
|
4 |
+
# Set the working directory in the container
|
5 |
+
WORKDIR /app
|
6 |
+
|
7 |
+
# Copy all files from the current directory to the working directory in the container
|
8 |
+
COPY config.yml poetry.lock pyproject.toml /app/
|
9 |
+
COPY app /app/app/
|
10 |
+
COPY meta_prompt /app/meta_prompt/
|
11 |
+
|
12 |
+
RUN pip install --no-cache-dir -U poetry
|
13 |
+
RUN poetry config virtualenvs.create false
|
14 |
+
RUN poetry install --with=dev
|
15 |
+
|
16 |
+
# Expose the port (if necessary)
|
17 |
+
EXPOSE 7860
|
18 |
+
|
19 |
+
# Run the script when the container launches
|
20 |
+
CMD python app/gradio_meta_prompt.py
|
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2023 Yale Huang
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
README.md
CHANGED
@@ -1,10 +1,264 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
-
title: Meta Prompt Docker
|
3 |
-
emoji: 🦀
|
4 |
-
colorFrom: red
|
5 |
-
colorTo: purple
|
6 |
-
sdk: docker
|
7 |
-
pinned: false
|
8 |
-
---
|
9 |
|
10 |
-
|
|
|
|
|
|
1 |
+
# Meta Prompt Demo
|
2 |
+
|
3 |
+
This project is a demonstration of the concept of Meta Prompt, which involves generating a language model prompt using another language model. The demo showcases how a language model can be used to generate high-quality prompts for another language model.
|
4 |
+
|
5 |
+
[](https://www.youtube.com/watch?v=eNFUq2AjKCk)
|
6 |
+
|
7 |
+
## Overview
|
8 |
+
|
9 |
+
The demo utilizes OpenAI's language models and provides a user interface for interacting with the chatbot. It allows users to input prompts, execute model calls, compare outputs, and optimize prompts based on desired criteria.
|
10 |
+
|
11 |
+
**(2023/10/15)** A new working mode called `Other User Prompts` has been added. In the prompt optimization process, similar compatible prompts to the original user prompt are referenced to significantly reduce iteration cycles.
|
12 |
+
|
13 |
+
**(2024/06/30)** The `langgraph_meta_prompt.ipynb` file, committed on 06/30/2024, introduces a sophisticated framework for generating and refining system messages for AI assistants. Powered by LangGraph, this notebook can generate high-quality prompts with much more models, including `claude-3.5-sonnet:beta`, `llama-3-70b-instruct`, and quite some with 70B+ parameters. The notebook also introduced a new approach to converge the system messages automatically.
|
14 |
+
|
15 |
+
## Try it out!
|
16 |
+
|
17 |
+
| Name | Colab Notebook |
|
18 |
+
|------------------------|----------------------------------------------------------------------------------------------------------------------------|
|
19 |
+
| Meta Prompt | [](https://colab.research.google.com/github/yaleh/meta-prompt/blob/main/meta_prompt.ipynb) |
|
20 |
+
| LangGraph Meta Prompt | [](https://colab.research.google.com/github/yaleh/meta-prompt/blob/main/langgraph_meta_prompt.ipynb) |
|
21 |
+
|
22 |
+
## Installation
|
23 |
+
|
24 |
+
To use this demo, please follow these steps:
|
25 |
+
|
26 |
+
1. Clone the repository: `git clone https://github.com/yaleh/meta-prompt.git`
|
27 |
+
2. Change into the project directory: `cd meta-prompt`
|
28 |
+
3. Install the required dependencies: `pip install -r requirements.txt`
|
29 |
+
|
30 |
+
Please note that you need to have Python and pip installed on your system.
|
31 |
+
|
32 |
+
## Usage
|
33 |
+
|
34 |
+
To run the demo, execute the following command:
|
35 |
+
|
36 |
+
```
|
37 |
+
python meta_prompt.py --api_key YOUR_API_KEY
|
38 |
+
```
|
39 |
+
|
40 |
+
Replace `YOUR_API_KEY` with your OpenAI API key. Other optional parameters can be specified as well, such as proxy settings, model name, API base URL, maximum message length, sharing option, and advanced mode. Please refer to the command-line argument options in the script for more details.
|
41 |
+
|
42 |
+
Once the demo is running, you can interact with the chatbot through the user interface provided. Enter prompts, execute model calls, compare outputs, and explore the functionality of the Meta Prompt concept.
|
43 |
+
|
44 |
+
To perform the demo on the web, follow these steps:
|
45 |
+
|
46 |
+
1. Fill in the user prompt in the "Testing User Prompt" section with a prompt suitable for training/testing.
|
47 |
+
2. Fill in the expected output in the "Expected Output" section to specify the desired response from the model.
|
48 |
+
3. Set the "Optimize Iterations" parameter. It is recommended to start with 1 iteration and gradually increase it later.
|
49 |
+
4. Click on "Optimize Prompts" or "Single Step Optimize" to optimize (generate) the prompt.
|
50 |
+
5. After generating the "New System Prompt," click "Run New" to validate it using the "New System Prompt" and "Testing User Prompt."
|
51 |
+
6. If the "New Output" is better than the "Current Output," click "Accept New Prompt" to copy the "New System Prompt" and "New Output" to the "Current System Prompt" and "Current Output," respectively, as a basis for further optimization.
|
52 |
+
7. Adjust the "Optimize Iterations" and optimize again.
|
53 |
+
|
54 |
+
Usually, simple questions (such as arithmetic operations) require around 3 iterations of optimization, while complex problems may require more than 10 iterations.
|
55 |
+
|
56 |
+
### Settings
|
57 |
+
|
58 |
+
It is recommended to use GPT-4 as the Generating LLM Model for running the meta prompt. GPT-3.5 may not reliably generate the expected results for most questions.
|
59 |
+
|
60 |
+
You can use either GPT-4 or GPT-3.5 as the Testing LLM Model, similar to when using GPT/ChatGPT in regular scenarios.
|
61 |
+
|
62 |
+
If you have access to ChatGPT and want to save costs on GPT-4 API usage, you can also manually execute the meta-prompt by clicking "Merge Meta System Prompt." This will generate a complete prompt, including the meta-prompt and the current example, that can be used with ChatGPT. However, note that if further iterations are required, you need to manually copy the newly generated system prompt to the Current System Prompt and click "Run Current" to update the Current Output.
|
63 |
+
|
64 |
+
## Running Docker Image
|
65 |
+
|
66 |
+
To perform the demo using Docker, make sure you have Docker installed on your system, and then follow these steps:
|
67 |
+
|
68 |
+
1. Pull the Meta Prompt Docker image by running the following command:
|
69 |
+
|
70 |
+
```
|
71 |
+
docker pull yaleh/meta-prompt
|
72 |
+
```
|
73 |
+
|
74 |
+
2. Run the Docker container with the following command:
|
75 |
+
|
76 |
+
```
|
77 |
+
docker run -d --name meta-prompt-container -p 7860:7860 -e API_KEY=YOUR_API_KEY -e OTHER_ARGS="--advanced_mode" -e OPENAI_API_BASE=https://openai.lrfz.com/v1 yaleh/meta-prompt
|
78 |
+
```
|
79 |
+
|
80 |
+
Replace `YOUR_API_KEY` with your OpenAI API key. You can modify other environment variables if needed.
|
81 |
+
3. You can now access the Meta Prompt demo by opening your web browser and visiting `http://localhost:7860`.
|
82 |
+
|
83 |
+
To stop and remove the Meta Prompt container, run the following commands:
|
84 |
+
|
85 |
+
```
|
86 |
+
docker stop meta-prompt-container
|
87 |
+
docker rm meta-prompt-container
|
88 |
+
```
|
89 |
+
|
90 |
+
Usually, simple questions (such as arithmetic operations) require around 3 iterations of optimization, while complex problems may require more than 10 iterations.
|
91 |
+
|
92 |
+
## Examples
|
93 |
+
|
94 |
+
### Arithmetic
|
95 |
+
|
96 |
+
#### Testing User Prompt
|
97 |
+
|
98 |
+
```
|
99 |
+
(2+8)*3
|
100 |
+
```
|
101 |
+
|
102 |
+
#### Expected Output
|
103 |
+
|
104 |
+
```
|
105 |
+
(2+8)*3
|
106 |
+
= 10*3
|
107 |
+
= 30
|
108 |
+
```
|
109 |
+
|
110 |
+
#### Prompt After 4 Interations
|
111 |
+
|
112 |
+
```
|
113 |
+
ROLE
|
114 |
+
|
115 |
+
You are a math tutor.
|
116 |
+
|
117 |
+
TASK
|
118 |
+
|
119 |
+
Your task is to solve the mathematical expression provided by the user and provide a concise, step-by-step solution. Each step should only include the calculation and the result, without any additional explanations or step labels.
|
120 |
+
|
121 |
+
REQUIREMENTS AND RESTRICTIONS
|
122 |
+
|
123 |
+
* The solution should be provided in standard mathematical notation.
|
124 |
+
* The format of the mathematical expressions should be consistent with the user's input.
|
125 |
+
* The symbols used in the mathematical expressions should be consistent with the user's input.
|
126 |
+
* No spaces should be included around the mathematical operators.
|
127 |
+
* Avoid unnecessary explanations or verbosity.
|
128 |
+
* Do not include any additional information or explanations beyond the direct calculation steps.
|
129 |
+
* Do not include a final solution statement.
|
130 |
+
|
131 |
+
{user_message}
|
132 |
+
```
|
133 |
+
|
134 |
+
### GDP
|
135 |
+
|
136 |
+
#### Testing User Prompt
|
137 |
+
|
138 |
+
```
|
139 |
+
Here is the GDP data in billions of US dollars (USD) for these years:
|
140 |
+
|
141 |
+
Germany:
|
142 |
+
|
143 |
+
2015: $3,368.29 billion
|
144 |
+
2016: $3,467.79 billion
|
145 |
+
2017: $3,677.83 billion
|
146 |
+
2018: $3,946.00 billion
|
147 |
+
2019: $3,845.03 billion
|
148 |
+
France:
|
149 |
+
|
150 |
+
2015: $2,423.47 billion
|
151 |
+
2016: $2,465.12 billion
|
152 |
+
2017: $2,582.49 billion
|
153 |
+
2018: $2,787.86 billion
|
154 |
+
2019: $2,715.52 billion
|
155 |
+
United Kingdom:
|
156 |
+
|
157 |
+
2015: $2,860.58 billion
|
158 |
+
2016: $2,650.90 billion
|
159 |
+
2017: $2,622.43 billion
|
160 |
+
2018: $2,828.87 billion
|
161 |
+
2019: $2,829.21 billion
|
162 |
+
Italy:
|
163 |
+
|
164 |
+
2015: $1,815.72 billion
|
165 |
+
2016: $1,852.50 billion
|
166 |
+
2017: $1,937.80 billion
|
167 |
+
2018: $2,073.90 billion
|
168 |
+
2019: $1,988.14 billion
|
169 |
+
Spain:
|
170 |
+
|
171 |
+
2015: $1,199.74 billion
|
172 |
+
2016: $1,235.95 billion
|
173 |
+
2017: $1,313.13 billion
|
174 |
+
2018: $1,426.19 billion
|
175 |
+
2019: $1,430.38 billion
|
176 |
+
|
177 |
+
```
|
178 |
+
|
179 |
+
#### Expected Output
|
180 |
+
|
181 |
+
```
|
182 |
+
Year,Germany,France,United Kingdom,Italy,Spain
|
183 |
+
2016-2015,2.96%,1.71%,-7.35%,2.02%,3.04%
|
184 |
+
2017-2016,5.08%,4.78%,-1.07%,4.61%,6.23%
|
185 |
+
2018-2017,7.48%,7.99%,7.89%,7.10%,8.58%
|
186 |
+
2019-2018,-2.56%,-2.59%,0.01%,-4.11%,0.30%
|
187 |
+
```
|
188 |
+
|
189 |
+
#### Other User Prompts
|
190 |
+
|
191 |
+
```
|
192 |
+
Here is the GDP data in billions of US dollars (USD) for these years:
|
193 |
+
|
194 |
+
1. China:
|
195 |
+
- 2010: $6,101.18 billion
|
196 |
+
- 2011: $7,572.80 billion
|
197 |
+
- 2012: $8,560.59 billion
|
198 |
+
- 2013: $9,607.23 billion
|
199 |
+
- 2014: $10,482.65 billion
|
200 |
+
|
201 |
+
2. India:
|
202 |
+
- 2010: $1,675.62 billion
|
203 |
+
- 2011: $1,823.05 billion
|
204 |
+
- 2012: $1,827.64 billion
|
205 |
+
- 2013: $1,856.72 billion
|
206 |
+
- 2014: $2,046.88 billion
|
207 |
+
|
208 |
+
3. Japan:
|
209 |
+
- 2010: $5,700.35 billion
|
210 |
+
- 2011: $6,157.47 billion
|
211 |
+
- 2012: $6,203.21 billion
|
212 |
+
- 2013: $5,155.72 billion
|
213 |
+
- 2014: $4,616.52 billion
|
214 |
+
|
215 |
+
4. South Korea:
|
216 |
+
- 2010: $1,464.26 billion
|
217 |
+
- 2011: $1,622.03 billion
|
218 |
+
- 2012: $1,624.76 billion
|
219 |
+
- 2013: $1,305.76 billion
|
220 |
+
- 2014: $1,411.25 billion
|
221 |
+
|
222 |
+
5. Indonesia:
|
223 |
+
- 2010: $706.39 billion
|
224 |
+
- 2011: $846.48 billion
|
225 |
+
- 2012: $878.47 billion
|
226 |
+
- 2013: $868.36 billion
|
227 |
+
- 2014: $891.77 billion
|
228 |
+
```
|
229 |
+
|
230 |
+
#### Prompt After 1 Interation
|
231 |
+
|
232 |
+
```
|
233 |
+
ROLE
|
234 |
+
|
235 |
+
You are an economic analyst.
|
236 |
+
|
237 |
+
TASK
|
238 |
+
|
239 |
+
Your task is to calculate the annual percentage change in GDP for each country based on the provided data.
|
240 |
+
|
241 |
+
REQUIREMENTS_AND_RESTRICTIONS
|
242 |
+
|
243 |
+
- The data will be provided in the format: "Year: $GDP in billions"
|
244 |
+
- Calculate the percentage change from year to year for each country.
|
245 |
+
- Present the results in a table format, with each row representing the change from one year to the next, and each column representing a different country.
|
246 |
+
- The table should be formatted as "Year-Year,Country1,Country2,..."
|
247 |
+
- The percentage change should be calculated as ((GDP Year 2 - GDP Year 1) / GDP Year 1) * 100
|
248 |
+
- The percentage change should be rounded to two decimal places and followed by a "%" symbol.
|
249 |
+
- If data for a year is missing for a country, leave that cell blank in the table.
|
250 |
+
```
|
251 |
+
|
252 |
+
## License
|
253 |
+
|
254 |
+
This project is licensed under the MIT License. Please see the [LICENSE](LICENSE) file for more information.
|
255 |
+
|
256 |
+
## Contact
|
257 |
+
|
258 |
+
For any questions or feedback regarding this project, please feel free to reach out to Yale Huang at [email protected].
|
259 |
+
|
260 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
261 |
|
262 |
+
**Acknowledgements:**
|
263 |
+
|
264 |
+
I would like to express my gratitude to my colleagues at [Wiz.AI](https://www.wiz.ai/) for their support and contributions.
|
app/config.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# config.py
|
2 |
+
from confz import BaseConfig
|
3 |
+
from pydantic import BaseModel, Extra
|
4 |
+
from typing import Optional
|
5 |
+
|
6 |
+
class LLMConfig(BaseModel):
|
7 |
+
type: str
|
8 |
+
|
9 |
+
class Config:
|
10 |
+
extra = Extra.allow
|
11 |
+
|
12 |
+
class MetaPromptConfig(BaseConfig):
|
13 |
+
llms: Optional[dict[str, LLMConfig]]
|
14 |
+
examples_path: Optional[str]
|
15 |
+
server_name: Optional[str] = None
|
16 |
+
server_port: Optional[int] = None
|
17 |
+
recursion_limit: Optional[int] = 25
|
18 |
+
recursion_limit_max: Optional[int] = 50
|
app/examples/log.csv
ADDED
@@ -0,0 +1,236 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
User Message,Expected Output,Acceptance Criteria
|
2 |
+
How do I reverse a list in Python?,Use the `[::-1]` slicing technique or the `list.reverse()` method.,"Similar in meaning, text length and style."
|
3 |
+
(2+8)*3,"(2+8)*3
|
4 |
+
= 10*3
|
5 |
+
= 30
|
6 |
+
","
|
7 |
+
* Exactly text match.
|
8 |
+
* Acceptable differences:
|
9 |
+
* Extra or missing spaces.
|
10 |
+
* Extra or missing line breaks at the beginning or end of the output.
|
11 |
+
"
|
12 |
+
"Here is the GDP data in billions of US dollars (USD) for these years:
|
13 |
+
|
14 |
+
Germany:
|
15 |
+
|
16 |
+
2015: $3,368.29 billion
|
17 |
+
2016: $3,467.79 billion
|
18 |
+
2017: $3,677.83 billion
|
19 |
+
2018: $3,946.00 billion
|
20 |
+
2019: $3,845.03 billion
|
21 |
+
France:
|
22 |
+
|
23 |
+
2015: $2,423.47 billion
|
24 |
+
2016: $2,465.12 billion
|
25 |
+
2017: $2,582.49 billion
|
26 |
+
2018: $2,787.86 billion
|
27 |
+
2019: $2,715.52 billion
|
28 |
+
United Kingdom:
|
29 |
+
|
30 |
+
2015: $2,860.58 billion
|
31 |
+
2016: $2,650.90 billion
|
32 |
+
2017: $2,622.43 billion
|
33 |
+
2018: $2,828.87 billion
|
34 |
+
2019: $2,829.21 billion
|
35 |
+
Italy:
|
36 |
+
|
37 |
+
2015: $1,815.72 billion
|
38 |
+
2016: $1,852.50 billion
|
39 |
+
2017: $1,937.80 billion
|
40 |
+
2018: $2,073.90 billion
|
41 |
+
2019: $1,988.14 billion
|
42 |
+
Spain:
|
43 |
+
|
44 |
+
2015: $1,199.74 billion
|
45 |
+
2016: $1,235.95 billion
|
46 |
+
2017: $1,313.13 billion
|
47 |
+
2018: $1,426.19 billion
|
48 |
+
2019: $1,430.38 billion
|
49 |
+
","Year,Germany,France,United Kingdom,Italy,Spain
|
50 |
+
2016-2015,2.96%,1.71%,-7.35%,2.02%,3.04%
|
51 |
+
2017-2016,5.08%,4.78%,-1.07%,4.61%,6.23%
|
52 |
+
2018-2017,7.48%,7.99%,7.89%,7.10%,8.58%
|
53 |
+
2019-2018,-2.56%,-2.59%,0.01%,-4.11%,0.30%
|
54 |
+
","
|
55 |
+
* Strict text matching of the header row and first column(year).
|
56 |
+
* Acceptable differences:
|
57 |
+
* Differences in digital/percentage values in the table, even significant ones.
|
58 |
+
* Extra or missing spaces.
|
59 |
+
* Extra or missing line breaks.
|
60 |
+
"
|
61 |
+
"Gene sequence: ATGGCCATGGCGCCCAGAACTGAGATCAATAGTACCCGTATTAACGGGTGA
|
62 |
+
Species: Escherichia coli","{
|
63 |
+
""Gene Sequence Analysis Results"": {
|
64 |
+
""Basic Information"": {
|
65 |
+
""Sequence Length"": 54,
|
66 |
+
""GC Content"": ""51.85%""
|
67 |
+
},
|
68 |
+
""Nucleotide Composition"": {
|
69 |
+
""A"": {""Count"": 12, ""Percentage"": ""22.22%""},
|
70 |
+
""T"": {""Count"": 11, ""Percentage"": ""20.37%""},
|
71 |
+
""G"": {""Count"": 16, ""Percentage"": ""29.63%""},
|
72 |
+
""C"": {""Count"": 15, ""Percentage"": ""27.78%""}
|
73 |
+
},
|
74 |
+
""Codon Analysis"": {
|
75 |
+
""Start Codon"": ""ATG"",
|
76 |
+
""Stop Codon"": ""TGA"",
|
77 |
+
""Codon Table"": [
|
78 |
+
{""Codon"": ""ATG"", ""Amino Acid"": ""Methionine"", ""Position"": 1},
|
79 |
+
{""Codon"": ""GCC"", ""Amino Acid"": ""Alanine"", ""Position"": 2},
|
80 |
+
{""Codon"": ""ATG"", ""Amino Acid"": ""Methionine"", ""Position"": 3},
|
81 |
+
// ... other codons ...
|
82 |
+
{""Codon"": ""TGA"", ""Amino Acid"": ""Stop Codon"", ""Position"": 18}
|
83 |
+
]
|
84 |
+
},
|
85 |
+
""Potential Function Prediction"": {
|
86 |
+
""Protein Length"": 17,
|
87 |
+
""Possible Functional Domains"": [
|
88 |
+
{""Domain Name"": ""ABC Transporter"", ""Start Position"": 5, ""End Position"": 15, ""Confidence"": ""75%""},
|
89 |
+
{""Domain Name"": ""Membrane Protein"", ""Start Position"": 1, ""End Position"": 17, ""Confidence"": ""60%""}
|
90 |
+
],
|
91 |
+
""Secondary Structure Prediction"": {
|
92 |
+
""α-helix"": [""2-8"", ""12-16""],
|
93 |
+
""β-sheet"": [""9-11""],
|
94 |
+
""Random Coil"": [""1"", ""17""]
|
95 |
+
}
|
96 |
+
},
|
97 |
+
""Homology Analysis"": {
|
98 |
+
""Most Similar Sequences"": [
|
99 |
+
{
|
100 |
+
""Gene Name"": ""abcT"",
|
101 |
+
""Species"": ""Salmonella enterica"",
|
102 |
+
""Similarity"": ""89%"",
|
103 |
+
""E-value"": ""3e-25""
|
104 |
+
},
|
105 |
+
{
|
106 |
+
""Gene Name"": ""yojI"",
|
107 |
+
""Species"": ""Escherichia coli"",
|
108 |
+
""Similarity"": ""95%"",
|
109 |
+
""E-value"": ""1e-30""
|
110 |
+
}
|
111 |
+
]
|
112 |
+
},
|
113 |
+
""Mutation Analysis"": {
|
114 |
+
""SNP Sites"": [
|
115 |
+
{""Position"": 27, ""Wild Type"": ""A"", ""Mutant"": ""G"", ""Amino Acid Change"": ""Glutamine->Arginine""},
|
116 |
+
{""Position"": 42, ""Wild Type"": ""C"", ""Mutant"": ""T"", ""Amino Acid Change"": ""None (Synonymous Mutation)""}
|
117 |
+
]
|
118 |
+
}
|
119 |
+
}
|
120 |
+
}","* Consistent with Expected Output:
|
121 |
+
* Formats of all JSON sections
|
122 |
+
* Data types of all JSON fields
|
123 |
+
* Top layer sections
|
124 |
+
* Acceptable differences:
|
125 |
+
* Extra or missing spaces
|
126 |
+
* Extra or missing line breaks at the beginning or end of the output
|
127 |
+
* Differences in JSON field values
|
128 |
+
* JSON wrapped in backquotes"
|
129 |
+
今天下午3点,在北京国家会议中心,阿里巴巴集团董事局主席马云宣布将投资100亿元人民币用于农村电商发展。这一决定受到了与会代表的热烈欢迎,大家认为这将为中国农村经济带来新的机遇。,"{
|
130 |
+
""文本分析结果"": {
|
131 |
+
""情感分析"": {
|
132 |
+
""整体情感"": ""积极"",
|
133 |
+
""情感得分"": 0.82,
|
134 |
+
""情感细分"": {
|
135 |
+
""乐观"": 0.75,
|
136 |
+
""兴奋"": 0.60,
|
137 |
+
""期待"": 0.85
|
138 |
+
}
|
139 |
+
},
|
140 |
+
""实体识别"": [
|
141 |
+
{""实体"": ""北京"", ""类型"": ""地点"", ""起始位置"": 7, ""结束位置"": 9},
|
142 |
+
{""实体"": ""国家会议中心"", ""类型"": ""地点"", ""起始位置"": 9, ""结束位置"": 15},
|
143 |
+
{""实体"": ""阿里巴巴集团"", ""类型"": ""组织"", ""起始位置"": 16, ""结束位置"": 22},
|
144 |
+
{""实体"": ""马云"", ""类型"": ""人物"", ""起始位置"": 26, ""结束位置"": 28},
|
145 |
+
{""实体"": ""100亿元"", ""类型"": ""金额"", ""起始位置"": 32, ""结束位置"": 37},
|
146 |
+
{""实体"": ""人民币"", ""类型"": ""货币"", ""起始位置"": 37, ""结束位置"": 40},
|
147 |
+
{""实体"": ""中国"", ""类型"": ""地点"", ""起始位置"": 71, ""结束位置"": 73}
|
148 |
+
],
|
149 |
+
""关键词提取"": [
|
150 |
+
{""关键词"": ""农村电商"", ""权重"": 0.95},
|
151 |
+
{""关键词"": ""马云"", ""权重"": 0.85},
|
152 |
+
{""关键词"": ""投资"", ""权重"": 0.80},
|
153 |
+
{""关键词"": ""阿里巴巴"", ""权重"": 0.75},
|
154 |
+
{""关键词"": ""经济机遇"", ""权重"": 0.70}
|
155 |
+
]
|
156 |
+
}
|
157 |
+
}","* Consistent with Expected Output:
|
158 |
+
* Formats of all JSON sections
|
159 |
+
* Data types of all JSON fields
|
160 |
+
* Top layer sections
|
161 |
+
* Acceptable differences:
|
162 |
+
* Differences in digital values in the table.
|
163 |
+
* Extra or missing spaces.
|
164 |
+
* Extra or missing line breaks at the beginning or end of the output.
|
165 |
+
* Differences in JSON field values
|
166 |
+
* Differences in section/item orders.
|
167 |
+
* JSON wrapped in backquotes."
|
168 |
+
Low-noise amplifier,"A '''low-noise amplifier''' ('''LNA''') is an electronic component that amplifies a very low-power [[signal]] without significantly degrading its [[signal-to-noise ratio]] (SNR). Any [[electronic amplifier]] will increase the power of both the signal and the [[Noise (electronics)|noise]] present at its input, but the amplifier will also introduce some additional noise. LNAs are designed to minimize that additional noise, by choosing special components, operating points, and [[Circuit topology (electrical)|circuit topologies]]. Minimizing additional noise must balance with other design goals such as [[power gain]] and [[impedance matching]].
|
169 |
+
|
170 |
+
LNAs are found in [[Radio|radio communications]] systems, [[Amateur Radio]] stations, medical instruments and [[electronic test equipment]]. A typical LNA may supply a power gain of 100 (20 [[decibels]] (dB)) while decreasing the SNR by less than a factor of two (a 3 dB [[noise figure]] (NF)). Although LNAs are primarily concerned with weak signals that are just above the [[noise floor]], they must also consider the presence of larger signals that cause [[intermodulation distortion]].","* Consistent with Expected Output:
|
171 |
+
* Language
|
172 |
+
* Text length
|
173 |
+
* Text style
|
174 |
+
* Text structures
|
175 |
+
* Cover all the major content of Expected Output.
|
176 |
+
* Acceptable differences:
|
177 |
+
* Minor format differences.
|
178 |
+
* Expression differences.
|
179 |
+
* Numerical differences.
|
180 |
+
* Additional content in Actual Output.
|
181 |
+
* Missing minor content in Actual Output."
|
182 |
+
What is the meaning of life?,"[
|
183 |
+
{""persona"": ""Philosopher"", ""prompt"": ""Explore the concept of life's meaning through the lens of existentialism and purpose-driven existence.""},
|
184 |
+
{""persona"": ""Scientist"", ""prompt"": ""Examine the biological and evolutionary perspectives on the function and significance of life.""},
|
185 |
+
{""persona"": ""Child"", ""prompt"": ""Imagine you're explaining to a curious 7-year-old what makes life special and important.""}
|
186 |
+
]","* Consistent with Expected Output:
|
187 |
+
* Formats of all JSON sections
|
188 |
+
* Data types and formats of all JSON fields
|
189 |
+
* Top layer sections
|
190 |
+
* Acceptable differences:
|
191 |
+
* Different personas or prompts
|
192 |
+
* Different numbers of personas
|
193 |
+
* Extra or missing spaces
|
194 |
+
* Extra or missing line breaks at the beginning or end of the output
|
195 |
+
* Unacceptable:
|
196 |
+
* Showing the personas in Expected Output in System Message"
|
197 |
+
"<?php
|
198 |
+
$username = $_POST['username'];
|
199 |
+
$password = $_POST['password'];
|
200 |
+
|
201 |
+
$query = ""SELECT * FROM users WHERE username = '$username' AND password = '$password'"";
|
202 |
+
$result = mysqli_query($connection, $query);
|
203 |
+
|
204 |
+
if (mysqli_num_rows($result) > 0) {
|
205 |
+
echo ""Login successful"";
|
206 |
+
} else {
|
207 |
+
echo ""Login failed"";
|
208 |
+
}
|
209 |
+
?>","security_analysis:
|
210 |
+
vulnerabilities:
|
211 |
+
- type: SQL Injection
|
212 |
+
severity: Critical
|
213 |
+
description: Unsanitized user input directly used in SQL query
|
214 |
+
mitigation: Use prepared statements or parameterized queries
|
215 |
+
- type: Password Storage
|
216 |
+
severity: High
|
217 |
+
description: Passwords stored in plain text
|
218 |
+
mitigation: Use password hashing (e.g., bcrypt) before storage
|
219 |
+
additional_issues:
|
220 |
+
- Lack of input validation
|
221 |
+
- No CSRF protection
|
222 |
+
- Potential for timing attacks in login logic
|
223 |
+
overall_risk_score: 9.5/10
|
224 |
+
recommended_actions:
|
225 |
+
- Implement proper input sanitization
|
226 |
+
- Use secure password hashing algorithms
|
227 |
+
- Add CSRF tokens to forms
|
228 |
+
- Consider using a secure authentication library","* Consistent with Expected Output:
|
229 |
+
* Formats of all YAML sections
|
230 |
+
* Data types and formats of all YAML fields
|
231 |
+
* Top layer sections
|
232 |
+
* Acceptable differences:
|
233 |
+
* Differences in field values
|
234 |
+
* Extra or missing spaces
|
235 |
+
* Extra or missing line breaks at the beginning or end of the output
|
236 |
+
* YAML wrapped in backquotes"
|
app/gradio_meta_prompt.py
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from confz import BaseConfig, CLArgSource, EnvSource, FileSource
|
3 |
+
from meta_prompt import MetaPromptGraph, AgentState
|
4 |
+
from langchain_openai import ChatOpenAI
|
5 |
+
from app.config import MetaPromptConfig
|
6 |
+
|
7 |
+
class LLMModelFactory:
|
8 |
+
def __init__(self):
|
9 |
+
pass
|
10 |
+
|
11 |
+
def create(self, model_type: str, **kwargs):
|
12 |
+
model_class = globals()[model_type]
|
13 |
+
return model_class(**kwargs)
|
14 |
+
|
15 |
+
llm_model_factory = LLMModelFactory()
|
16 |
+
|
17 |
+
def process_message(user_message, expected_output, acceptance_criteria, initial_system_message,
|
18 |
+
recursion_limit: int, model_name: str):
|
19 |
+
# Create the input state
|
20 |
+
input_state = AgentState(
|
21 |
+
user_message=user_message,
|
22 |
+
expected_output=expected_output,
|
23 |
+
acceptance_criteria=acceptance_criteria,
|
24 |
+
system_message=initial_system_message
|
25 |
+
)
|
26 |
+
|
27 |
+
# Get the output state from MetaPromptGraph
|
28 |
+
type = config.llms[model_name].type
|
29 |
+
args = config.llms[model_name].model_dump(exclude={'type'})
|
30 |
+
llm = llm_model_factory.create(type, **args)
|
31 |
+
meta_prompt_graph = MetaPromptGraph(llms=llm)
|
32 |
+
output_state = meta_prompt_graph(input_state, recursion_limit=recursion_limit)
|
33 |
+
|
34 |
+
# Validate the output state
|
35 |
+
system_message = ''
|
36 |
+
output = ''
|
37 |
+
analysis = ''
|
38 |
+
|
39 |
+
if 'best_system_message' in output_state and output_state['best_system_message'] is not None:
|
40 |
+
system_message = output_state['best_system_message']
|
41 |
+
else:
|
42 |
+
system_message = "Error: The output state does not contain a valid 'best_system_message'"
|
43 |
+
|
44 |
+
if 'best_output' in output_state and output_state['best_output'] is not None:
|
45 |
+
output = output_state["best_output"]
|
46 |
+
else:
|
47 |
+
output = "Error: The output state does not contain a valid 'best_output'"
|
48 |
+
|
49 |
+
if 'analysis' in output_state and output_state['analysis'] is not None:
|
50 |
+
analysis = output_state['analysis']
|
51 |
+
else:
|
52 |
+
analysis = "Error: The output state does not contain a valid 'analysis'"
|
53 |
+
|
54 |
+
return system_message, output, analysis
|
55 |
+
|
56 |
+
class FileConfig(BaseConfig):
|
57 |
+
config_file: str = 'config.yml' # default path
|
58 |
+
|
59 |
+
pre_config_sources = [
|
60 |
+
EnvSource(prefix='METAPROMPT_', allow_all=True),
|
61 |
+
CLArgSource()
|
62 |
+
]
|
63 |
+
pre_config = FileConfig(config_sources=pre_config_sources)
|
64 |
+
|
65 |
+
config_sources = [
|
66 |
+
FileSource(file=pre_config.config_file, optional=True),
|
67 |
+
EnvSource(prefix='METAPROMPT_', allow_all=True),
|
68 |
+
CLArgSource()
|
69 |
+
]
|
70 |
+
|
71 |
+
config = MetaPromptConfig(config_sources=config_sources)
|
72 |
+
|
73 |
+
# Create the Gradio interface
|
74 |
+
iface = gr.Interface(
|
75 |
+
fn=process_message,
|
76 |
+
inputs=[
|
77 |
+
gr.Textbox(label="User Message", show_copy_button=True),
|
78 |
+
gr.Textbox(label="Expected Output", show_copy_button=True),
|
79 |
+
gr.Textbox(label="Acceptance Criteria", show_copy_button=True),
|
80 |
+
],
|
81 |
+
outputs=[
|
82 |
+
gr.Textbox(label="System Message", show_copy_button=True),
|
83 |
+
gr.Textbox(label="Output", show_copy_button=True),
|
84 |
+
gr.Textbox(label="Analysis", show_copy_button=True)
|
85 |
+
],
|
86 |
+
additional_inputs=[
|
87 |
+
gr.Textbox(label="Initial System Message", show_copy_button=True, value=""),
|
88 |
+
gr.Number(label="Recursion Limit", value=config.recursion_limit,
|
89 |
+
precision=0, minimum=1, maximum=config.recursion_limit_max, step=1),
|
90 |
+
gr.Dropdown(
|
91 |
+
label="Model Name",
|
92 |
+
choices=config.llms.keys(),
|
93 |
+
value=list(config.llms.keys())[0],
|
94 |
+
)
|
95 |
+
],
|
96 |
+
# stop_btn = gr.Button("Stop", variant="stop", visible=True),
|
97 |
+
title="MetaPromptGraph Chat Interface",
|
98 |
+
description="A chat interface for MetaPromptGraph to process user inputs and generate system messages.",
|
99 |
+
examples=config.examples_path
|
100 |
+
)
|
101 |
+
|
102 |
+
# Launch the Gradio app
|
103 |
+
iface.launch(server_name=config.server_name, server_port=config.server_port)
|
config.yml
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
llms:
|
2 |
+
groq/llama3-70b-8192:
|
3 |
+
type: ChatOpenAI
|
4 |
+
temperature: 0.1
|
5 |
+
model_name: "llama3-70b-8192"
|
6 |
+
# openai_api_key: ""
|
7 |
+
openai_api_base: "https://api.groq.com/openai/v1"
|
8 |
+
max_tokens: 8192
|
9 |
+
verbose: true
|
10 |
+
# anthropic/claude-3-haiku:
|
11 |
+
# type: ChatOpenAI
|
12 |
+
# temperature: 0.1
|
13 |
+
# model_name: "anthropic/claude-3-haiku:beta"
|
14 |
+
# openai_api_key: ""
|
15 |
+
# openai_api_base: "https://openrouter.ai/api/v1"
|
16 |
+
# max_tokens: 8192
|
17 |
+
# verbose: true
|
18 |
+
# anthropic/claude-3-sonnet:
|
19 |
+
# type: ChatOpenAI
|
20 |
+
# temperature: 0.1
|
21 |
+
# model_name: "anthropic/claude-3-sonnet:beta"
|
22 |
+
# openai_api_key: ""
|
23 |
+
# openai_api_base: "https://openrouter.ai/api/v1"
|
24 |
+
# max_tokens: 8192
|
25 |
+
# verbose: true
|
26 |
+
# anthropic/deepseek-chat:
|
27 |
+
# type: ChatOpenAI
|
28 |
+
# temperature: 0.1
|
29 |
+
# model_name: "deepseek/deepseek-chat"
|
30 |
+
# openai_api_key: ""
|
31 |
+
# openai_api_base: "https://openrouter.ai/api/v1"
|
32 |
+
# max_tokens: 8192
|
33 |
+
# verbose: true
|
34 |
+
|
35 |
+
examples_path: "app/examples"
|
36 |
+
# server_name: 0.0.0.0
|
37 |
+
# server_port: 7860
|
38 |
+
recursion_limit: 16
|
39 |
+
recursion_limit_max: 20
|
demo/cot_meta_prompt.ipynb
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": null,
|
6 |
+
"metadata": {
|
7 |
+
"id": "v98KGZUT17EJ"
|
8 |
+
},
|
9 |
+
"outputs": [],
|
10 |
+
"source": [
|
11 |
+
"!wget https://github.com/yaleh/meta-prompt/raw/main/prompt_ui.py\n",
|
12 |
+
"!wget https://github.com/yaleh/meta-prompt/raw/main/default_meta_prompts.py"
|
13 |
+
]
|
14 |
+
},
|
15 |
+
{
|
16 |
+
"cell_type": "code",
|
17 |
+
"execution_count": null,
|
18 |
+
"metadata": {
|
19 |
+
"id": "MO89Z8-UY5Ht"
|
20 |
+
},
|
21 |
+
"outputs": [],
|
22 |
+
"source": [
|
23 |
+
"!pip install gradio langchain scikit-learn openai"
|
24 |
+
]
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"cell_type": "code",
|
28 |
+
"execution_count": null,
|
29 |
+
"metadata": {
|
30 |
+
"id": "9BZL20lBYLbj"
|
31 |
+
},
|
32 |
+
"outputs": [],
|
33 |
+
"source": [
|
34 |
+
"import openai\n",
|
35 |
+
"import os\n",
|
36 |
+
"openai.api_key = ''\n",
|
37 |
+
"os.environ[\"OPENAI_API_KEY\"] = openai.api_key\n"
|
38 |
+
]
|
39 |
+
},
|
40 |
+
{
|
41 |
+
"cell_type": "code",
|
42 |
+
"execution_count": null,
|
43 |
+
"metadata": {
|
44 |
+
"id": "Z8M3eFzXZaOb"
|
45 |
+
},
|
46 |
+
"outputs": [],
|
47 |
+
"source": [
|
48 |
+
"from prompt_ui import PromptUI\n",
|
49 |
+
"\n",
|
50 |
+
"app = PromptUI()\n",
|
51 |
+
"app.ui.queue().launch(debug=True)"
|
52 |
+
]
|
53 |
+
}
|
54 |
+
],
|
55 |
+
"metadata": {
|
56 |
+
"colab": {
|
57 |
+
"provenance": []
|
58 |
+
},
|
59 |
+
"kernelspec": {
|
60 |
+
"display_name": "Python 3",
|
61 |
+
"name": "python3"
|
62 |
+
},
|
63 |
+
"language_info": {
|
64 |
+
"name": "python"
|
65 |
+
}
|
66 |
+
},
|
67 |
+
"nbformat": 4,
|
68 |
+
"nbformat_minor": 0
|
69 |
+
}
|
demo/cot_meta_prompt.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
MIT License
|
3 |
+
|
4 |
+
Copyright (c) 2023 Yale Huang
|
5 |
+
Email: [email protected]
|
6 |
+
|
7 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
8 |
+
of this software and associated documentation files (the "Software"), to deal
|
9 |
+
in the Software without restriction, including without limitation the rights
|
10 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
11 |
+
copies of the Software, and to permit persons to whom the Software is
|
12 |
+
furnished to do so, subject to the following conditions:
|
13 |
+
|
14 |
+
The above copyright notice and this permission notice shall be included in all
|
15 |
+
copies or substantial portions of the Software.
|
16 |
+
"""
|
17 |
+
|
18 |
+
import argparse
|
19 |
+
import os
|
20 |
+
import openai
|
21 |
+
|
22 |
+
import gradio as gr
|
23 |
+
from demo.prompt_ui import PromptUI
|
24 |
+
|
25 |
+
class ChatbotApp:
|
26 |
+
def __init__(self, args):
|
27 |
+
if args.api_key:
|
28 |
+
os.environ["OPENAI_API_KEY"] = args.api_key
|
29 |
+
|
30 |
+
if args.openai_api_base:
|
31 |
+
os.environ["OPENAI_API_BASE"] = args.openai_api_base
|
32 |
+
|
33 |
+
if args.proxy:
|
34 |
+
os.environ["OPENAI_PROXY"] = args.proxy
|
35 |
+
|
36 |
+
self.prompt_ui = PromptUI(advanced_mode=args.advanced_mode)
|
37 |
+
|
38 |
+
self.ui = gr.TabbedInterface(
|
39 |
+
[self.prompt_ui.ui],
|
40 |
+
['Prompt']
|
41 |
+
)
|
42 |
+
def launch(self, *args, **kwargs):
|
43 |
+
self.ui.launch(*args, **kwargs)
|
44 |
+
|
45 |
+
def parse_args():
|
46 |
+
parser = argparse.ArgumentParser()
|
47 |
+
parser.add_argument("--api_key", type=str, required=True, help="OpenAI API key")
|
48 |
+
parser.add_argument("--proxy", type=str, default=None, help="Proxy settings")
|
49 |
+
parser.add_argument("--share", action='store_true',
|
50 |
+
help="Launch app with sharing option")
|
51 |
+
parser.add_argument("--advanced_mode", action='store_true', default=False,
|
52 |
+
help="Enable advanced mode")
|
53 |
+
parser.add_argument("--server_name", type=str, default="127.0.0.1", help="Server name or IP address")
|
54 |
+
parser.add_argument("--openai_api_base", type=str, default=None, help="OpenAI API base URL")
|
55 |
+
|
56 |
+
return parser.parse_args()
|
57 |
+
|
58 |
+
if __name__ == "__main__":
|
59 |
+
args = parse_args()
|
60 |
+
app = ChatbotApp(args)
|
61 |
+
app.launch(share=args.share, server_name=args.server_name)
|
demo/default_meta_prompts.py
ADDED
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
DEFAULT_META_SYSTEM_PROMPT = \
|
2 |
+
'''
|
3 |
+
You are a Prompt Engineer. You review the Prompt template for GTP-3.5 and suggest changes.
|
4 |
+
|
5 |
+
# Prompt template format
|
6 |
+
|
7 |
+
You require Prompt to be written in the following format:
|
8 |
+
|
9 |
+
```
|
10 |
+
<ROLE>
|
11 |
+
|
12 |
+
<TASK>
|
13 |
+
|
14 |
+
<REQUIREMENTS_AND_RESTRICTIONS>
|
15 |
+
|
16 |
+
```
|
17 |
+
|
18 |
+
* ROLE: The role the LLM is required to play. Describe it in one sentence.
|
19 |
+
* TASK: A summary and overall description of the tasks to be performed by LLM. Describe it in one or more sentences.
|
20 |
+
* REQUIREMENTS_AND_RESTRICTIONS: Specific requirements for the task. Describe using Markdown List.
|
21 |
+
|
22 |
+
A string of user message [USER_MESSAGE] entered by the user will be attached to the end of the prompt.
|
23 |
+
|
24 |
+
# Check input
|
25 |
+
|
26 |
+
Check the input format as follows:
|
27 |
+
|
28 |
+
```
|
29 |
+
* Prompt Template
|
30 |
+
|
31 |
+
[PROMPT_TEMPLATE]
|
32 |
+
|
33 |
+
* User Message
|
34 |
+
|
35 |
+
[USER_MESSAGE]
|
36 |
+
|
37 |
+
* Expected GPT Message
|
38 |
+
|
39 |
+
[EXPECTED_GPT_MESSAGE]
|
40 |
+
|
41 |
+
* GPT Message
|
42 |
+
|
43 |
+
[GPT_MESSAGE]
|
44 |
+
```
|
45 |
+
|
46 |
+
* PROMPT_TEMPLATE: Prompt template that conforms to the above Prompt template format.
|
47 |
+
* USER_MESSAGE: User input. Used to replace {user_message} in the Prompt template.
|
48 |
+
* EXPECTED_GPT_MESSAGE: Expect output generated by GPT.
|
49 |
+
* GPT_MESSAGE: GPT is actually based on the output generated by PROMPT_TEMPLATE and USER_MESSAGE.
|
50 |
+
|
51 |
+
# examine
|
52 |
+
|
53 |
+
Check and recommend modifying the Prompt template as follows to produce output closer to EXPECTED_GPT_MESSAGE:
|
54 |
+
|
55 |
+
* Read and parse PROMPT_TEMPLATE, USER_MESSAGE and EXPECTED_GPT_MESSAGE.
|
56 |
+
* Generate a description [TD] of this task according to your understanding.
|
57 |
+
* Analyze the correlation between PROMPT_TEMPLATE and USER_MESSAGE [UMR].
|
58 |
+
* Analyze and describe the characteristics of EXPECTED_GPT_MESSAGE in terms of text length, format, content, meaning and style.
|
59 |
+
* Analyze whether PROMPT_TEMPLATE and EXPECTED_GPT_MESSAGE match and list the differences [PED].
|
60 |
+
* Check whether GPT_MESSAGE conforms to EXPECTED_GPT_MESSAGE. Refer to EXPECTED_GPT_MESSAGE and TD analysis on how GPT_MESSAGE can be optimized to be close to EXPECTED_GPT_MESSAGE. Modification suggestions are listed in detail [MCSL].
|
61 |
+
* Pay attention to checking the text length, format, content, meaning and style, and output corresponding modification suggestions.
|
62 |
+
* Suggested modifications to text length should include quantitative numerical descriptions.
|
63 |
+
* Suggestions for changes to text formatting should include specific examples enclosed by "```".
|
64 |
+
* Pay attention to check whether unnecessary content is included in GPT_MESSAGE and output corresponding modification suggestions.
|
65 |
+
* Suggestions for modifying local content should include the modifiable fragments and recommended modified fragments in GPT_MESSAGE.
|
66 |
+
* Check PROMPT_TEMPLATE: Analyze and list suggestions [CSL] for how to modify PROMPT_TEMPLATE to produce output closer to EXPECTED_GPT_MESSAGE.
|
67 |
+
* For requirements that have been stated in REQUIREMENTS_AND_RESTRICTIONS but are not met by GPT_MESSAGE, they should also be emphasized in TASK, and the opposite tendency (such as reverse adjustment of quantitative indicators or style descriptions) should be emphasized punitively to construct the strongest Negative feedback***.
|
68 |
+
* For format requirements that have been stated in REQUIREMENTS_AND_RESTRICTIONS but are not met by GPT_MESSAGE, add an example enclosed with "```".
|
69 |
+
* Based on PED recommendations on how to modify PROMPT_TEMPLATE.
|
70 |
+
* Analyze and suggest how to modify PROMPT_TEMPLATE to implement the MCSL listed above.
|
71 |
+
* Analyze whether PROMPT_TEMPLATE conforms to the format defined by `Prompt template format` and suggest how to modify it.
|
72 |
+
* Analyze those instructions that do not comply with EXPECTED_GPT_MESSAGE and are clearly misleading, and recommend modifications.
|
73 |
+
* Modifications to PROMPT_TEMPLATE should not introduce more information related to USER_MESSAGE.
|
74 |
+
* In TASK and REQUIREMENTS_AND_RESTRICTIONS, group the requirements for the same content together.
|
75 |
+
* If there are multiple steps, use a numbered list to list the steps clearly.
|
76 |
+
* Care should be taken to avoid unnecessary changes, and the original text should be retained as much as possible for parts that do not need to be changed.
|
77 |
+
* Only output [CSL], do not output the modified PROMPT_TEMPLATE.
|
78 |
+
* Check and filter the Change Suggestions List [CSL] for information related to USER_MESSAGE.
|
79 |
+
* Only output the filtered modification suggestion list [RCSL], do not output the modified PROMPT_TEMPLATE.
|
80 |
+
* Execute the above filtered modification suggestion list [RCSL] and ***output the modified PROMPT_TEMPLATE***.
|
81 |
+
* Execute RCSL only, avoid other changes.
|
82 |
+
* Care should be taken to avoid unnecessary changes, and the original text should be retained as much as possible for parts that do not need to be changed, except the requirements that have been stated in TASK or REQUIREMENTS_AND_RESTRICTIONS but are not met by GPT_MESSAGE.
|
83 |
+
* Strictly use the following format for output:
|
84 |
+
```
|
85 |
+
<!-- BEGIN OF PROMPT -->
|
86 |
+
|
87 |
+
<Updated Prompt>
|
88 |
+
|
89 |
+
<!-- END OF PROMPT -->
|
90 |
+
```
|
91 |
+
* If there's no change, output following fixed message instead:
|
92 |
+
```
|
93 |
+
<!-- NO CHANGE TO PROMPT -->
|
94 |
+
```
|
95 |
+
* Evaluation modified PROMPT_TEMPLATE.
|
96 |
+
* Analyze the changes it may cause in the output of LLM [EC].
|
97 |
+
* Analyze whether EC would be more consistent with EXPECTED_GPT_MESSAGE.
|
98 |
+
* Analyze the correlation between modified PROMPT_TEMPLATE and USER_MESSAGE [UMRC].
|
99 |
+
* Analyze UMR and UMRC to determine whether the modification introduces additional information about USER_MESSAGE. If introduced, issue a warning.
|
100 |
+
* NOTICE: During the above steps, ****output RCSL and the modified PROMPT_TEMPLATE only, don't print the output of other steps***.
|
101 |
+
|
102 |
+
----
|
103 |
+
|
104 |
+
Now, provide the PROMPT_TEMPLATE, USER_MESSAGE, EXPECTED_GPT_MESSAGE, and GPT_MESSAGE for review.
|
105 |
+
|
106 |
+
'''
|
107 |
+
|
108 |
+
DEFAULT_META_SYSTEM_PROMPT_WITH_OTHER_PROMPTS = \
|
109 |
+
'''
|
110 |
+
You are a Prompt Engineer. You review the Prompt template for GTP-3.5 and suggest changes.
|
111 |
+
|
112 |
+
# Prompt template format
|
113 |
+
|
114 |
+
You require Prompt to be written in the following format:
|
115 |
+
|
116 |
+
```
|
117 |
+
<ROLE>
|
118 |
+
|
119 |
+
<TASK>
|
120 |
+
|
121 |
+
<REQUIREMENTS_AND_RESTRICTIONS>
|
122 |
+
|
123 |
+
```
|
124 |
+
|
125 |
+
* ROLE: The role the LLM is required to play. Describe it in one sentence.
|
126 |
+
* TASK: A summary and overall description of the tasks to be performed by LLM. Describe it in one or more sentences.
|
127 |
+
* REQUIREMENTS_AND_RESTRICTIONS: Specific requirements for the task. Describe using Markdown List.
|
128 |
+
|
129 |
+
A string of user message [USER_MESSAGE] entered by the user will be attached to the end of the prompt.
|
130 |
+
|
131 |
+
# Check input
|
132 |
+
|
133 |
+
Check the input format as follows:
|
134 |
+
|
135 |
+
```
|
136 |
+
* Prompt Template
|
137 |
+
|
138 |
+
[PROMPT_TEMPLATE]
|
139 |
+
|
140 |
+
* User Message
|
141 |
+
|
142 |
+
[USER_MESSAGE]
|
143 |
+
|
144 |
+
* Other User Messages
|
145 |
+
|
146 |
+
[OTHER_USER_MESSAGES]
|
147 |
+
|
148 |
+
* Expected GPT Message
|
149 |
+
|
150 |
+
[EXPECTED_GPT_MESSAGE]
|
151 |
+
|
152 |
+
* GPT Message
|
153 |
+
|
154 |
+
[GPT_MESSAGE]
|
155 |
+
```
|
156 |
+
|
157 |
+
* PROMPT_TEMPLATE: Prompt template that conforms to the above Prompt template format.
|
158 |
+
* USER_MESSAGE: User input. Used to replace {user_message} in the Prompt template.
|
159 |
+
* OTHER_USER_MESSAGES: Other user messages that the prompt template is expected to be compatible with.
|
160 |
+
* EXPECTED_GPT_MESSAGE: Expect output generated by GPT.
|
161 |
+
* GPT_MESSAGE: GPT is actually based on the output generated by PROMPT_TEMPLATE and USER_MESSAGE.
|
162 |
+
|
163 |
+
# examine
|
164 |
+
|
165 |
+
Check and recommend modifying the Prompt template as follows to produce output closer to EXPECTED_GPT_MESSAGE:
|
166 |
+
|
167 |
+
* Read and parse PROMPT_TEMPLATE, USER_MESSAGE, OTHER_USER_MESSAGES and EXPECTED_GPT_MESSAGE.
|
168 |
+
* Generate a description [TD] of this task according to your understanding.
|
169 |
+
* Analyze the correlation between PROMPT_TEMPLATE, USER_MESSAGE and OTHER_USER_MESSAGES [UMR].
|
170 |
+
* Analyze and describe the characteristics of EXPECTED_GPT_MESSAGE in terms of text length, format, content, meaning and style.
|
171 |
+
* Analyze whether PROMPT_TEMPLATE and EXPECTED_GPT_MESSAGE match and list the differences [PED].
|
172 |
+
* Check whether GPT_MESSAGE conforms to EXPECTED_GPT_MESSAGE. Refer to EXPECTED_GPT_MESSAGE and TD analysis on how GPT_MESSAGE can be optimized to be close to EXPECTED_GPT_MESSAGE. Modification suggestions are listed in detail [MCSL].
|
173 |
+
* Pay attention to checking the text length, format, content, meaning and style, and output corresponding modification suggestions.
|
174 |
+
* Suggested modifications to text length should include quantitative numerical descriptions.
|
175 |
+
* Suggestions for changes to text formatting should include specific examples enclosed by "```".
|
176 |
+
* Pay attention to check whether unnecessary content is included in GPT_MESSAGE and output corresponding modification suggestions.
|
177 |
+
* Suggestions for modifying local content should include the modifiable fragments and recommended modified fragments in GPT_MESSAGE.
|
178 |
+
* Check PROMPT_TEMPLATE: Analyze and list suggestions [CSL] for how to modify PROMPT_TEMPLATE to produce output closer to EXPECTED_GPT_MESSAGE.
|
179 |
+
* For requirements that have been stated in REQUIREMENTS_AND_RESTRICTIONS but are not met by GPT_MESSAGE, they should also be emphasized in TASK, and the opposite tendency (such as reverse adjustment of quantitative indicators or style descriptions) should be emphasized punitively to construct the strongest Negative feedback***.
|
180 |
+
* For format requirements that have been stated in REQUIREMENTS_AND_RESTRICTIONS but are not met by GPT_MESSAGE, add an example enclosed with "```".
|
181 |
+
* Based on PED recommendations on how to modify PROMPT_TEMPLATE.
|
182 |
+
* Analyze and suggest how to modify PROMPT_TEMPLATE to implement the MCSL listed above.
|
183 |
+
* Analyze whether PROMPT_TEMPLATE conforms to the format defined by `Prompt template format` and suggest how to modify it.
|
184 |
+
* Analyze those instructions that do not comply with EXPECTED_GPT_MESSAGE and are clearly misleading, and recommend modifications.
|
185 |
+
* Modifications to PROMPT_TEMPLATE should not introduce more information related to USER_MESSAGE.
|
186 |
+
* In TASK and REQUIREMENTS_AND_RESTRICTIONS, group the requirements for the same content together.
|
187 |
+
* If there are multiple steps, use a numbered list to list the steps clearly.
|
188 |
+
* Care should be taken to avoid unnecessary changes, and the original text should be retained as much as possible for parts that do not need to be changed.
|
189 |
+
* Only output [CSL], do not output the modified PROMPT_TEMPLATE.
|
190 |
+
* Check and filter the Change Suggestions List [CSL] for information related to USER_MESSAGE.
|
191 |
+
* Only output the filtered modification suggestion list [RCSL], do not output the modified PROMPT_TEMPLATE.
|
192 |
+
* Keep it compatible with OTHER_USER_MESSAGES.
|
193 |
+
* Execute the above filtered modification suggestion list [RCSL] and ***output the modified PROMPT_TEMPLATE***.
|
194 |
+
* Execute RCSL only, avoid other changes.
|
195 |
+
* Care should be taken to avoid unnecessary changes, and the original text should be retained as much as possible for parts that do not need to be changed, except the requirements that have been stated in TASK or REQUIREMENTS_AND_RESTRICTIONS but are not met by GPT_MESSAGE.
|
196 |
+
* Strictly use the following format for output:
|
197 |
+
```
|
198 |
+
<!-- BEGIN OF PROMPT -->
|
199 |
+
|
200 |
+
<Updated Prompt>
|
201 |
+
|
202 |
+
<!-- END OF PROMPT -->
|
203 |
+
```
|
204 |
+
* If there's no change, output following fixed message instead:
|
205 |
+
```
|
206 |
+
<!-- NO CHANGE TO PROMPT -->
|
207 |
+
```
|
208 |
+
* Evaluation modified PROMPT_TEMPLATE.
|
209 |
+
* Analyze the changes it may cause in the output of LLM [EC].
|
210 |
+
* Analyze whether EC would be more consistent with EXPECTED_GPT_MESSAGE.
|
211 |
+
* Analyze the correlation between modified PROMPT_TEMPLATE, USER_MESSAGE and OTHER_USER_MESSAGES [UMRC].
|
212 |
+
* Analyze UMR and UMRC to determine whether the modification introduces additional information about USER_MESSAGE. If introduced, issue a warning.
|
213 |
+
* NOTICE: During the above steps, ****output RCSL and the modified PROMPT_TEMPLATE only, don't print the output of other steps***.
|
214 |
+
|
215 |
+
----
|
216 |
+
|
217 |
+
Now, provide the PROMPT_TEMPLATE, USER_MESSAGE, OTHER_USER_MESSAGES, EXPECTED_GPT_MESSAGE, and GPT_MESSAGE for review.
|
218 |
+
|
219 |
+
'''
|
demo/langgraph_meta_prompt.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
demo/prompt_ui.py
ADDED
@@ -0,0 +1,712 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
MIT License
|
3 |
+
|
4 |
+
Copyright (c) 2023 Yale Huang
|
5 |
+
Email: [email protected]
|
6 |
+
|
7 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
8 |
+
of this software and associated documentation files (the "Software"), to deal
|
9 |
+
in the Software without restriction, including without limitation the rights
|
10 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
11 |
+
copies of the Software, and to permit persons to whom the Software is
|
12 |
+
furnished to do so, subject to the following conditions:
|
13 |
+
|
14 |
+
The above copyright notice and this permission notice shall be included in all
|
15 |
+
copies or substantial portions of the Software.
|
16 |
+
"""
|
17 |
+
|
18 |
+
import re
|
19 |
+
import gradio as gr
|
20 |
+
|
21 |
+
from langchain.chat_models import ChatOpenAI
|
22 |
+
from langchain.schema import HumanMessage, SystemMessage
|
23 |
+
|
24 |
+
from sklearn.feature_extraction.text import CountVectorizer
|
25 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
26 |
+
|
27 |
+
from demo.default_meta_prompts import *
|
28 |
+
|
29 |
+
gpt_models_not_legacy = [
|
30 |
+
"gpt-4",
|
31 |
+
"gpt-4-0613",
|
32 |
+
"gpt-4-32k",
|
33 |
+
"gpt-4-32k-0613",
|
34 |
+
"gpt-3.5-turbo",
|
35 |
+
"gpt-3.5-turbo-16k",
|
36 |
+
"gpt-3.5-turbo-instruct",
|
37 |
+
"gpt-3.5-turbo-0613"
|
38 |
+
]
|
39 |
+
|
40 |
+
gpt_models_legacy = [
|
41 |
+
"gpt-3.5-turbo-instruct",
|
42 |
+
"gpt-3.5-turbo-0301",
|
43 |
+
"text-davinci-003",
|
44 |
+
"text-davinci-002",
|
45 |
+
"code-davinci-002"
|
46 |
+
]
|
47 |
+
|
48 |
+
DEFAULT_MODEL_FOR_GENERATING="gpt-4"
|
49 |
+
DEFAULT_MODEL_FOR_TESTING="gpt-3.5-turbo"
|
50 |
+
DEFAULT_MODEL_FOR_OUTPUT_EVALUATING="gpt-3.5-turbo-instruct"
|
51 |
+
DEFAULT_CURRENT_SYSTEM_PROMPT = ''
|
52 |
+
DEFAULT_OUTPUT_EVALUATING_PROMPT = 'Find out which is more similar to string S, A or B? Print nothing if there\'s no significant difference between A and B. Else, print the result (letter A or B) only. Do nothing else.'
|
53 |
+
|
54 |
+
class PromptUI:
|
55 |
+
def __init__(self, advanced_mode = False, enable_other_user_prompts = False):
|
56 |
+
self.advanced_mode = advanced_mode
|
57 |
+
self.enable_other_user_prompts = enable_other_user_prompts
|
58 |
+
self.ui = self.init_ui()
|
59 |
+
|
60 |
+
def init_ui(self):
|
61 |
+
with gr.Blocks() as prompt_ui:
|
62 |
+
with gr.Row():
|
63 |
+
with gr.Column():
|
64 |
+
self.testing_user_prompt_textbox = gr.Textbox(
|
65 |
+
label="Testing User Prompt",
|
66 |
+
lines=10,
|
67 |
+
interactive=True,
|
68 |
+
show_copy_button=True
|
69 |
+
)
|
70 |
+
self.expect_output_textbox = gr.Textbox(
|
71 |
+
label="Expected Output",
|
72 |
+
lines=5,
|
73 |
+
interactive=True,
|
74 |
+
show_copy_button=True
|
75 |
+
)
|
76 |
+
self.other_user_prompts_checkbox = gr.Checkbox(
|
77 |
+
label="Other User Prompts",
|
78 |
+
info="Enable other user prompts in meta prompt?",
|
79 |
+
value=self.enable_other_user_prompts
|
80 |
+
)
|
81 |
+
self.other_user_prompts_textbox = gr.Textbox(
|
82 |
+
label="Other User Prompts",
|
83 |
+
lines=10,
|
84 |
+
interactive=True,
|
85 |
+
placeholder="Wrap each prompt with a pair of '```'.",
|
86 |
+
visible=self.enable_other_user_prompts,
|
87 |
+
show_copy_button=True
|
88 |
+
)
|
89 |
+
# Add gr.Number here for iterations input
|
90 |
+
self.iterations_number = gr.Number(value=1, label="Optimize Iterations", min=1, max=1000, step=1, decimals=0)
|
91 |
+
# Add button to trigger optimization here
|
92 |
+
self.optimize_btn = gr.Button(value="Optimize Prompt", variant='primary')
|
93 |
+
self.similar_candidate_textbox = gr.Textbox(label="Similarity Delta", lines=1, interactive=True)
|
94 |
+
self.compare_outputs_btn = gr.Button(value="Compare Outputs")
|
95 |
+
|
96 |
+
with gr.Column():
|
97 |
+
self.new_system_prompt_textbox = gr.Textbox(
|
98 |
+
label="New System Prompt",
|
99 |
+
lines=5,
|
100 |
+
interactive=True,
|
101 |
+
show_copy_button=True
|
102 |
+
)
|
103 |
+
self.new_output_textbox = gr.Textbox(
|
104 |
+
label="New Output",
|
105 |
+
lines=5,
|
106 |
+
interactive=True,
|
107 |
+
show_copy_button=True
|
108 |
+
)
|
109 |
+
with gr.Row():
|
110 |
+
self.run_meta_btn = gr.Button(value="↑ Single Step Optimize")
|
111 |
+
self.run_new_btn = gr.Button(value="⟳ Run New")
|
112 |
+
self.new_system_prompt_changed = gr.Checkbox(
|
113 |
+
label="New System Prompt Changed",
|
114 |
+
value=False,
|
115 |
+
interactive=False
|
116 |
+
)
|
117 |
+
|
118 |
+
with gr.Column():
|
119 |
+
self.current_system_prompt_textbox = gr.Textbox(
|
120 |
+
label="Current System Prompt",
|
121 |
+
value=DEFAULT_CURRENT_SYSTEM_PROMPT,
|
122 |
+
lines=5,
|
123 |
+
interactive=True,
|
124 |
+
show_copy_button=True
|
125 |
+
)
|
126 |
+
self.current_output_textbox = gr.Textbox(
|
127 |
+
label="Current Output",
|
128 |
+
lines=5,
|
129 |
+
interactive=True,
|
130 |
+
show_copy_button=True
|
131 |
+
)
|
132 |
+
with gr.Row():
|
133 |
+
self.accept_new_btn = gr.Button(value="→ Accept New Prompt")
|
134 |
+
self.run_current_btn = gr.Button(value="⟳ Run Current")
|
135 |
+
|
136 |
+
with gr.Row(visible=self.advanced_mode):
|
137 |
+
with gr.Column():
|
138 |
+
self.meta_system_prompt_textbox = gr.Textbox(label="Meta System Prompt",
|
139 |
+
value=DEFAULT_META_SYSTEM_PROMPT,
|
140 |
+
lines=10,
|
141 |
+
interactive=True
|
142 |
+
)
|
143 |
+
with gr.Column():
|
144 |
+
self.merged_meta_prompt_textbox = gr.Textbox(label="Merged Meta System Prompt",
|
145 |
+
lines=10,
|
146 |
+
interactive=False,
|
147 |
+
show_copy_button=True
|
148 |
+
)
|
149 |
+
self.merge_prompt_btn = gr.Button(value="Merge Meta System Prompt")
|
150 |
+
# self.chatgpt_output_textbox = gr.Textbox(label="Paste ChatGPT Output",
|
151 |
+
# lines=10,
|
152 |
+
# interactive=True
|
153 |
+
# )
|
154 |
+
# self.parse_chatgpt_output_btn = gr.Button(value="Parse ChatGPT Output")
|
155 |
+
|
156 |
+
with gr.Row(visible=self.advanced_mode):
|
157 |
+
with gr.Column():
|
158 |
+
self.llm_model_meta_dropdown = gr.Dropdown(
|
159 |
+
label="Generating LLM Model",
|
160 |
+
choices=gpt_models_not_legacy,
|
161 |
+
value=DEFAULT_MODEL_FOR_GENERATING,
|
162 |
+
interactive=True,
|
163 |
+
allow_custom_value=False
|
164 |
+
)
|
165 |
+
|
166 |
+
self.llm_model_meta_temperature_slider = gr.Slider(
|
167 |
+
minimum=0.0,
|
168 |
+
maximum=1.0,
|
169 |
+
step=0.01,
|
170 |
+
value=0.0,
|
171 |
+
interactive=True,
|
172 |
+
label="Generating LLM Model Temperature"
|
173 |
+
)
|
174 |
+
|
175 |
+
self.llm_model_meta_max_tokens_slider = gr.Slider(
|
176 |
+
minimum=256,
|
177 |
+
maximum=32000,
|
178 |
+
step=256,
|
179 |
+
value=0,
|
180 |
+
interactive=True,
|
181 |
+
label="Generating LLM Model Token Limit (0 for auto)"
|
182 |
+
)
|
183 |
+
|
184 |
+
self.llm_model_meta_request_timeout_slider = gr.Slider(
|
185 |
+
minimum=0,
|
186 |
+
maximum=600,
|
187 |
+
step=5,
|
188 |
+
value=600,
|
189 |
+
interactive=True,
|
190 |
+
label="Generating LLM Model Timeout"
|
191 |
+
)
|
192 |
+
|
193 |
+
self.llm_model_meta_max_retries_slider = gr.Slider(
|
194 |
+
minimum=0,
|
195 |
+
maximum=30,
|
196 |
+
step=1,
|
197 |
+
value=6,
|
198 |
+
interactive=True,
|
199 |
+
label="Generating LLM Model Max Retries"
|
200 |
+
)
|
201 |
+
|
202 |
+
with gr.Column():
|
203 |
+
self.llm_model_test_dropdown = gr.Dropdown(
|
204 |
+
label="Testing LLM Model",
|
205 |
+
choices=gpt_models_not_legacy,
|
206 |
+
value=DEFAULT_MODEL_FOR_TESTING,
|
207 |
+
interactive=True,
|
208 |
+
allow_custom_value=False
|
209 |
+
)
|
210 |
+
|
211 |
+
self.llm_model_test_temperature_slider = gr.Slider(
|
212 |
+
minimum=0.0,
|
213 |
+
maximum=1.0,
|
214 |
+
step=0.01,
|
215 |
+
value=0.0,
|
216 |
+
interactive=True,
|
217 |
+
label="Testing LLM Model Temperature"
|
218 |
+
)
|
219 |
+
|
220 |
+
self.llm_model_test_max_tokens_slider = gr.Slider(
|
221 |
+
minimum=256,
|
222 |
+
maximum=32000,
|
223 |
+
step=256,
|
224 |
+
value=0,
|
225 |
+
interactive=True,
|
226 |
+
label="Testing LLM Model Token Limit (0 for auto)"
|
227 |
+
)
|
228 |
+
|
229 |
+
self.llm_model_test_request_timeout_slider = gr.Slider(
|
230 |
+
minimum=0,
|
231 |
+
maximum=600,
|
232 |
+
step=5,
|
233 |
+
value=600,
|
234 |
+
interactive=True,
|
235 |
+
label="Testing LLM Model Timeout"
|
236 |
+
)
|
237 |
+
|
238 |
+
self.llm_model_test_max_retries_slider = gr.Slider(
|
239 |
+
minimum=0,
|
240 |
+
maximum=30,
|
241 |
+
step=1,
|
242 |
+
value=6,
|
243 |
+
interactive=True,
|
244 |
+
label="Testing LLM Model Max Retries"
|
245 |
+
)
|
246 |
+
# with gr.Column():
|
247 |
+
# self.llm_model_output_eval_dropdown = gr.Dropdown(label="Output Evaluating LLM Model",
|
248 |
+
# choices=gpt_models_legacy,
|
249 |
+
# value=DEFAULT_MODEL_FOR_OUTPUT_EVALUATING,
|
250 |
+
# interactive=True,
|
251 |
+
# allow_custom_value=False)
|
252 |
+
# self.llm_model_output_eval_slider = gr.Slider(minimum=0.0,
|
253 |
+
# maximum=1.0,
|
254 |
+
# step=0.01,
|
255 |
+
# default=0.0,
|
256 |
+
# label="Output Evaluating LLM Model of Temperature")
|
257 |
+
|
258 |
+
|
259 |
+
self.run_new_btn.click(
|
260 |
+
self.test_prompt,
|
261 |
+
[
|
262 |
+
self.new_system_prompt_textbox,
|
263 |
+
self.testing_user_prompt_textbox,
|
264 |
+
self.llm_model_test_dropdown,
|
265 |
+
self.llm_model_test_max_retries_slider,
|
266 |
+
self.llm_model_test_max_tokens_slider,
|
267 |
+
self.llm_model_test_request_timeout_slider,
|
268 |
+
self.llm_model_test_temperature_slider
|
269 |
+
],
|
270 |
+
[self.new_output_textbox]
|
271 |
+
)
|
272 |
+
self.run_current_btn.click(
|
273 |
+
self.test_prompt,
|
274 |
+
[
|
275 |
+
self.current_system_prompt_textbox,
|
276 |
+
self.testing_user_prompt_textbox,
|
277 |
+
self.llm_model_test_dropdown,
|
278 |
+
self.llm_model_test_max_retries_slider,
|
279 |
+
self.llm_model_test_max_tokens_slider,
|
280 |
+
self.llm_model_test_request_timeout_slider,
|
281 |
+
self.llm_model_test_temperature_slider
|
282 |
+
],
|
283 |
+
[self.current_output_textbox]
|
284 |
+
)
|
285 |
+
self.run_meta_btn.click(
|
286 |
+
self.meta_prompt,
|
287 |
+
[
|
288 |
+
self.meta_system_prompt_textbox,
|
289 |
+
self.current_system_prompt_textbox,
|
290 |
+
self.testing_user_prompt_textbox,
|
291 |
+
self.other_user_prompts_textbox,
|
292 |
+
self.expect_output_textbox,
|
293 |
+
self.current_output_textbox,
|
294 |
+
self.other_user_prompts_checkbox,
|
295 |
+
self.llm_model_meta_dropdown,
|
296 |
+
self.llm_model_meta_max_retries_slider,
|
297 |
+
self.llm_model_meta_max_tokens_slider,
|
298 |
+
self.llm_model_meta_request_timeout_slider,
|
299 |
+
self.llm_model_meta_temperature_slider
|
300 |
+
],
|
301 |
+
[self.new_system_prompt_textbox, self.new_system_prompt_changed]
|
302 |
+
)
|
303 |
+
self.accept_new_btn.click(self.copy_new_prompts,
|
304 |
+
[self.new_system_prompt_textbox, self.new_output_textbox],
|
305 |
+
[self.current_system_prompt_textbox, self.current_output_textbox])
|
306 |
+
self.compare_outputs_btn.click(self.compare_outputs,
|
307 |
+
[self.new_output_textbox, self.current_output_textbox, self.expect_output_textbox],
|
308 |
+
[self.similar_candidate_textbox])
|
309 |
+
# Attach the optimize_prompt function to the button click event.
|
310 |
+
# You should implement this function according to your optimization logic.
|
311 |
+
self.optimize_btn.click(
|
312 |
+
self.optimize_prompt,
|
313 |
+
[
|
314 |
+
self.meta_system_prompt_textbox,
|
315 |
+
self.current_system_prompt_textbox,
|
316 |
+
self.testing_user_prompt_textbox,
|
317 |
+
self.other_user_prompts_textbox,
|
318 |
+
self.expect_output_textbox,
|
319 |
+
self.current_output_textbox,
|
320 |
+
self.iterations_number,
|
321 |
+
self.other_user_prompts_checkbox,
|
322 |
+
self.llm_model_meta_dropdown,
|
323 |
+
self.llm_model_meta_max_retries_slider,
|
324 |
+
self.llm_model_meta_max_tokens_slider,
|
325 |
+
self.llm_model_meta_request_timeout_slider,
|
326 |
+
self.llm_model_meta_temperature_slider,
|
327 |
+
self.llm_model_test_dropdown,
|
328 |
+
self.llm_model_test_max_retries_slider,
|
329 |
+
self.llm_model_test_max_tokens_slider,
|
330 |
+
self.llm_model_test_request_timeout_slider,
|
331 |
+
self.llm_model_test_temperature_slider
|
332 |
+
],
|
333 |
+
[self.new_system_prompt_textbox, self.new_system_prompt_changed])
|
334 |
+
|
335 |
+
self.merge_prompt_btn.click(self.merge_meta_system_prompt,
|
336 |
+
[
|
337 |
+
self.meta_system_prompt_textbox,
|
338 |
+
self.current_system_prompt_textbox,
|
339 |
+
self.other_user_prompts_textbox,
|
340 |
+
self.testing_user_prompt_textbox,
|
341 |
+
self.expect_output_textbox,
|
342 |
+
self.current_output_textbox,
|
343 |
+
self.other_user_prompts_checkbox
|
344 |
+
],
|
345 |
+
[self.merged_meta_prompt_textbox])
|
346 |
+
|
347 |
+
self.other_user_prompts_checkbox.change(self.update_enable_other_user_prompts,
|
348 |
+
[self.other_user_prompts_checkbox],
|
349 |
+
[
|
350 |
+
self.other_user_prompts_textbox,
|
351 |
+
self.meta_system_prompt_textbox
|
352 |
+
])
|
353 |
+
|
354 |
+
|
355 |
+
return prompt_ui
|
356 |
+
|
357 |
+
def update_enable_other_user_prompts(self, new_value):
|
358 |
+
self.enable_other_user_prompts = new_value
|
359 |
+
return \
|
360 |
+
gr.Textbox.update(visible=new_value), \
|
361 |
+
gr.Textbox.update(
|
362 |
+
value = DEFAULT_META_SYSTEM_PROMPT_WITH_OTHER_PROMPTS if new_value else DEFAULT_META_SYSTEM_PROMPT
|
363 |
+
)
|
364 |
+
|
365 |
+
def merge_meta_system_prompt(
|
366 |
+
self,
|
367 |
+
meta_system_prompt,
|
368 |
+
current_system_prompt,
|
369 |
+
other_user_prompts,
|
370 |
+
testing_user_prompt,
|
371 |
+
expect_output,
|
372 |
+
current_output,
|
373 |
+
use_other_user_prompts
|
374 |
+
):
|
375 |
+
"""Merge meta and current system prompts."""
|
376 |
+
|
377 |
+
# converted_prompts = [prompt[0] for prompt in other_user_prompts.values]
|
378 |
+
|
379 |
+
user_prompt = self.generate_user_message(
|
380 |
+
current_system_prompt,
|
381 |
+
testing_user_prompt,
|
382 |
+
other_user_prompts if use_other_user_prompts else None,
|
383 |
+
expect_output,
|
384 |
+
current_output
|
385 |
+
)
|
386 |
+
|
387 |
+
merged_prompt = f"{meta_system_prompt}\n\n{user_prompt}"
|
388 |
+
|
389 |
+
return merged_prompt
|
390 |
+
|
391 |
+
def copy_new_prompts(self, system_prompt, output):
|
392 |
+
"""Copy prompts and output from new to current textboxes."""
|
393 |
+
|
394 |
+
return system_prompt, output
|
395 |
+
|
396 |
+
def test_prompt(
|
397 |
+
self,
|
398 |
+
system_prompt,
|
399 |
+
user_prompt,
|
400 |
+
model,
|
401 |
+
max_retries,
|
402 |
+
max_tokens,
|
403 |
+
request_timeout,
|
404 |
+
temperature,
|
405 |
+
):
|
406 |
+
# Create the prompt
|
407 |
+
prompt = [
|
408 |
+
SystemMessage(content=system_prompt),
|
409 |
+
HumanMessage(content=user_prompt)
|
410 |
+
]
|
411 |
+
|
412 |
+
chat_llm = ChatOpenAI(
|
413 |
+
model=model,
|
414 |
+
max_retries=max_retries,
|
415 |
+
max_tokens=None if max_tokens == 0 else max_tokens,
|
416 |
+
request_timeout=request_timeout,
|
417 |
+
temperature=temperature
|
418 |
+
)
|
419 |
+
|
420 |
+
# Get the response from OpenAI
|
421 |
+
gpt_response = chat_llm(prompt)
|
422 |
+
|
423 |
+
# Return the output to be placed in the output textbox
|
424 |
+
return gpt_response.content
|
425 |
+
|
426 |
+
def generate_user_message(self, current_system_prompt, testing_user_prompt, other_user_prompts, expect_output, current_output):
|
427 |
+
# other_prompts_formatted = '\n\n'.join([f"```\n{prompt}\n```" for prompt in other_user_prompts])
|
428 |
+
user_message = f"""
|
429 |
+
* Prompt Template
|
430 |
+
|
431 |
+
```
|
432 |
+
{current_system_prompt}
|
433 |
+
```
|
434 |
+
|
435 |
+
* User Message
|
436 |
+
|
437 |
+
```
|
438 |
+
{testing_user_prompt}
|
439 |
+
```
|
440 |
+
|
441 |
+
* Other User Messages
|
442 |
+
|
443 |
+
{other_user_prompts}
|
444 |
+
|
445 |
+
* Expected GPT Message
|
446 |
+
|
447 |
+
```
|
448 |
+
{expect_output}
|
449 |
+
```
|
450 |
+
|
451 |
+
* GPT Message
|
452 |
+
|
453 |
+
```
|
454 |
+
{current_output}
|
455 |
+
```
|
456 |
+
""" if other_user_prompts is not None else f"""
|
457 |
+
* Prompt Template
|
458 |
+
|
459 |
+
```
|
460 |
+
{current_system_prompt}
|
461 |
+
```
|
462 |
+
|
463 |
+
* User Message
|
464 |
+
|
465 |
+
```
|
466 |
+
{testing_user_prompt}
|
467 |
+
```
|
468 |
+
|
469 |
+
* Expected GPT Message
|
470 |
+
|
471 |
+
```
|
472 |
+
{expect_output}
|
473 |
+
```
|
474 |
+
|
475 |
+
* GPT Message
|
476 |
+
|
477 |
+
```
|
478 |
+
{current_output}
|
479 |
+
```
|
480 |
+
"""
|
481 |
+
return user_message
|
482 |
+
|
483 |
+
def meta_prompt(
|
484 |
+
self,
|
485 |
+
meta_system_prompt,
|
486 |
+
current_system_prompt,
|
487 |
+
testing_user_prompt,
|
488 |
+
other_user_prompts,
|
489 |
+
expect_output,
|
490 |
+
current_output,
|
491 |
+
use_user_prompts,
|
492 |
+
model,
|
493 |
+
max_retries,
|
494 |
+
max_tokens,
|
495 |
+
request_timeout,
|
496 |
+
temperature,
|
497 |
+
):
|
498 |
+
|
499 |
+
# Format the user message
|
500 |
+
user_message = self.generate_user_message(
|
501 |
+
current_system_prompt,
|
502 |
+
testing_user_prompt,
|
503 |
+
other_user_prompts if use_user_prompts else None,
|
504 |
+
expect_output,
|
505 |
+
current_output
|
506 |
+
)
|
507 |
+
|
508 |
+
# Create the prompt
|
509 |
+
prompt = [
|
510 |
+
SystemMessage(content=meta_system_prompt),
|
511 |
+
HumanMessage(content=user_message)
|
512 |
+
]
|
513 |
+
|
514 |
+
chat_llm = ChatOpenAI(
|
515 |
+
model=model,
|
516 |
+
max_retries=max_retries,
|
517 |
+
max_tokens=None if max_tokens == 0 else max_tokens,
|
518 |
+
request_timeout=request_timeout,
|
519 |
+
temperature=temperature
|
520 |
+
)
|
521 |
+
|
522 |
+
# Get the response from OpenAI
|
523 |
+
gpt_response = chat_llm(prompt)
|
524 |
+
|
525 |
+
updated_prompt = self.extract_updated_prompt(gpt_response.content)
|
526 |
+
changed = not self.detect_no_change(gpt_response.content)
|
527 |
+
|
528 |
+
# Return the output to be placed in the new system prompt textbox
|
529 |
+
if updated_prompt:
|
530 |
+
return updated_prompt, changed
|
531 |
+
else:
|
532 |
+
return gpt_response.content, changed
|
533 |
+
|
534 |
+
def extract_updated_prompt(self, gpt_response):
|
535 |
+
# Regular expression pattern to find the text enclosed
|
536 |
+
pattern = "<!-- BEGIN OF PROMPT -->(.*?)<!-- END OF PROMPT -->"
|
537 |
+
|
538 |
+
# Using search method to find the first occurrence of the pattern
|
539 |
+
result = re.search(pattern, gpt_response, re.DOTALL)
|
540 |
+
|
541 |
+
if result:
|
542 |
+
s = result.group(1).strip("\n")
|
543 |
+
if s.startswith("```") and s.endswith("```"):
|
544 |
+
s = s[3:-3]
|
545 |
+
return s # Return the matched string
|
546 |
+
else:
|
547 |
+
return None # If no such pattern is found return None
|
548 |
+
|
549 |
+
def detect_no_change(self, gpt_response):
|
550 |
+
# Regular expression pattern to find the exact string
|
551 |
+
pattern = "<!-- NO CHANGE TO PROMPT -->"
|
552 |
+
|
553 |
+
# Using search method to find the occurrence of the pattern
|
554 |
+
result = re.search(pattern, gpt_response)
|
555 |
+
|
556 |
+
if result:
|
557 |
+
return True # If the pattern is found return True
|
558 |
+
else:
|
559 |
+
return False # If no such pattern is found return False
|
560 |
+
|
561 |
+
# def compare_strings(self, a: str, b: str, s: str) -> str:
|
562 |
+
# # Create an instance of ChatOpenAI with the evaluation model
|
563 |
+
# chat_model = OpenAI(temperature=0, model_name=self.llm_model_output_eval_dropdown.value)
|
564 |
+
|
565 |
+
# # Create a prompt for comparison
|
566 |
+
# prompt = (DEFAULT_OUTPUT_EVALUATING_PROMPT +
|
567 |
+
# '\n\n' + f'# S\n\n```\n{s}\n```\n\n# A\n\n```\n{a}\n```\n\n# B\n\n```\n{b}\n```\n\n')
|
568 |
+
|
569 |
+
# # Get the response from OpenAI
|
570 |
+
# response = chat_model(prompt)
|
571 |
+
|
572 |
+
# # Remove '```' from beginning and end if it exists
|
573 |
+
# if response.startswith("```") and response.endswith("```"):
|
574 |
+
# response = response[3:-3]
|
575 |
+
|
576 |
+
# # Check the first character of the response and return accordingly
|
577 |
+
# if response.startswith('A'):
|
578 |
+
# return 'A'
|
579 |
+
# elif response.startswith('B'):
|
580 |
+
# return 'B'
|
581 |
+
# else:
|
582 |
+
# return None
|
583 |
+
|
584 |
+
def optimize_prompt(
|
585 |
+
self,
|
586 |
+
meta_system_prompt,
|
587 |
+
current_system_prompt,
|
588 |
+
testing_user_prompt,
|
589 |
+
other_user_prompts,
|
590 |
+
expect_output,
|
591 |
+
current_output,
|
592 |
+
iterations,
|
593 |
+
user_other_user_prompts,
|
594 |
+
meta_model,
|
595 |
+
meta_max_retries,
|
596 |
+
meta_max_tokens,
|
597 |
+
meta_request_timeout,
|
598 |
+
meta_temperature,
|
599 |
+
test_model,
|
600 |
+
test_max_retries,
|
601 |
+
test_max_tokens,
|
602 |
+
test_request_timeout,
|
603 |
+
test_temperature,
|
604 |
+
):
|
605 |
+
|
606 |
+
changed = False
|
607 |
+
|
608 |
+
# Iterate the specified number of times
|
609 |
+
for i in range(int(iterations)):
|
610 |
+
# If current_output is None or not provided, get it from test_prompt
|
611 |
+
if current_output is None:
|
612 |
+
current_output = self.test_prompt(
|
613 |
+
current_system_prompt,
|
614 |
+
testing_user_prompt,
|
615 |
+
test_model,
|
616 |
+
test_max_retries,
|
617 |
+
test_max_tokens,
|
618 |
+
test_request_timeout,
|
619 |
+
test_temperature,
|
620 |
+
)
|
621 |
+
|
622 |
+
# Call meta_prompt to get an optimized prompt
|
623 |
+
new_prompt, changed = self.meta_prompt(
|
624 |
+
meta_system_prompt,
|
625 |
+
current_system_prompt,
|
626 |
+
testing_user_prompt,
|
627 |
+
other_user_prompts,
|
628 |
+
expect_output,
|
629 |
+
current_output,
|
630 |
+
user_other_user_prompts,
|
631 |
+
meta_model,
|
632 |
+
meta_max_retries,
|
633 |
+
meta_max_tokens,
|
634 |
+
meta_request_timeout,
|
635 |
+
meta_temperature,
|
636 |
+
)
|
637 |
+
|
638 |
+
# If changed is False, break the loop
|
639 |
+
if not changed:
|
640 |
+
break
|
641 |
+
|
642 |
+
# If there is an updated prompt and it's different from the current one, update current_system_prompt
|
643 |
+
if new_prompt and new_prompt != current_system_prompt:
|
644 |
+
current_system_prompt = new_prompt
|
645 |
+
# Reset current_output to None so it gets recalculated in the next iteration
|
646 |
+
current_output = None
|
647 |
+
|
648 |
+
return current_system_prompt, changed # Return the optimized system prompt
|
649 |
+
|
650 |
+
def compare_strings(self, alpha: str, beta: str, expected: str) -> str:
|
651 |
+
# If both ALPHA and BETA are empty, return None
|
652 |
+
if not alpha and not beta:
|
653 |
+
return None
|
654 |
+
|
655 |
+
# If either ALPHA or BETA is empty, the non-empty string should be considered more similar to EXPECTED
|
656 |
+
if not alpha:
|
657 |
+
return 'B'
|
658 |
+
if not beta:
|
659 |
+
return 'A'
|
660 |
+
|
661 |
+
# If both ALPHA and BETA are identical, return None
|
662 |
+
if alpha == beta:
|
663 |
+
return None
|
664 |
+
|
665 |
+
# Create the CountVectorizer instance
|
666 |
+
vectorizer = CountVectorizer().fit_transform([alpha, beta, expected])
|
667 |
+
vectors = vectorizer.toarray()
|
668 |
+
|
669 |
+
# Calculate cosine similarities
|
670 |
+
alpha_sim = cosine_similarity(vectors[0].reshape(1, -1), vectors[2].reshape(1, -1))
|
671 |
+
beta_sim = cosine_similarity(vectors[1].reshape(1, -1), vectors[2].reshape(1, -1))
|
672 |
+
|
673 |
+
# Compare similarities and return the string that is more similar to the expected string
|
674 |
+
if alpha_sim > beta_sim:
|
675 |
+
return 'A'
|
676 |
+
elif beta_sim > alpha_sim:
|
677 |
+
return 'B'
|
678 |
+
else:
|
679 |
+
return None
|
680 |
+
|
681 |
+
def delta_similarities(self, alpha: str, beta: str, expected: str) -> float:
|
682 |
+
# If both ALPHA and BETA are empty, return 0
|
683 |
+
if not alpha and not beta:
|
684 |
+
return 0.0
|
685 |
+
|
686 |
+
# If either ALPHA or BETA is empty, the non-empty string should be considered more similar to EXPECTED
|
687 |
+
if not alpha:
|
688 |
+
return -1.0
|
689 |
+
if not beta:
|
690 |
+
return 1.0
|
691 |
+
|
692 |
+
# If both ALPHA and BETA are identical, return 0
|
693 |
+
if alpha == beta:
|
694 |
+
return 0.0
|
695 |
+
|
696 |
+
# Create the CountVectorizer instance
|
697 |
+
vectorizer = CountVectorizer().fit_transform([alpha, beta, expected])
|
698 |
+
vectors = vectorizer.toarray()
|
699 |
+
|
700 |
+
# Calculate cosine similarities
|
701 |
+
alpha_sim = cosine_similarity(vectors[0].reshape(1, -1), vectors[2].reshape(1, -1))
|
702 |
+
beta_sim = cosine_similarity(vectors[1].reshape(1, -1), vectors[2].reshape(1, -1))
|
703 |
+
|
704 |
+
# Return the difference in similarities
|
705 |
+
return alpha_sim[0][0] - beta_sim[0][0]
|
706 |
+
|
707 |
+
def compare_outputs(self, new_output, current_output, expected_output):
|
708 |
+
# Compare new output and current output against expected output
|
709 |
+
# result = self.compare_strings(new_output, current_output, expected_output)
|
710 |
+
result = self.delta_similarities(new_output, current_output, expected_output)
|
711 |
+
|
712 |
+
return result
|
meta_prompt/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
__version__ = '0.1.0'
|
2 |
+
|
3 |
+
from .meta_prompt import AgentState, MetaPromptGraph
|
4 |
+
|
meta_prompt/meta_prompt.py
ADDED
@@ -0,0 +1,442 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import typing
|
2 |
+
import pprint
|
3 |
+
import logging
|
4 |
+
from typing import Dict, Any, Callable, List, Union, Optional
|
5 |
+
from langchain_core.language_models import BaseLanguageModel
|
6 |
+
from langchain_core.messages import HumanMessage, SystemMessage
|
7 |
+
from langchain_core.prompts import ChatPromptTemplate
|
8 |
+
from langgraph.graph import StateGraph, END
|
9 |
+
from langgraph.checkpoint.memory import MemorySaver
|
10 |
+
from langgraph.errors import GraphRecursionError
|
11 |
+
from pydantic import BaseModel
|
12 |
+
|
13 |
+
class AgentState(BaseModel):
|
14 |
+
max_output_age: int = 0
|
15 |
+
user_message: Optional[str] = None
|
16 |
+
expected_output: Optional[str] = None
|
17 |
+
acceptance_criteria: Optional[str] = None
|
18 |
+
system_message: Optional[str] = None
|
19 |
+
output: Optional[str] = None
|
20 |
+
suggestions: Optional[str] = None
|
21 |
+
accepted: bool = False
|
22 |
+
analysis: Optional[str] = None
|
23 |
+
best_output: Optional[str] = None
|
24 |
+
best_system_message: Optional[str] = None
|
25 |
+
best_output_age: int = 0
|
26 |
+
|
27 |
+
class MetaPromptGraph:
|
28 |
+
NODE_PROMPT_INITIAL_DEVELOPER = "prompt_initial_developer"
|
29 |
+
NODE_PROMPT_DEVELOPER = "prompt_developer"
|
30 |
+
NODE_PROMPT_EXECUTOR = "prompt_executor"
|
31 |
+
NODE_OUTPUT_HISTORY_ANALYZER = "output_history_analyzer"
|
32 |
+
NODE_PROMPT_ANALYZER = "prompt_analyzer"
|
33 |
+
NODE_PROMPT_SUGGESTER = "prompt_suggester"
|
34 |
+
|
35 |
+
DEFAULT_PROMPT_TEMPLATES = {
|
36 |
+
NODE_PROMPT_INITIAL_DEVELOPER: ChatPromptTemplate.from_messages([
|
37 |
+
("system", """# Expert Prompt Engineer
|
38 |
+
|
39 |
+
You are an expert prompt engineer tasked with creating system messages for AI
|
40 |
+
assistants.
|
41 |
+
|
42 |
+
## Instructions
|
43 |
+
|
44 |
+
1. Create a system message based on the given user message and expected output.
|
45 |
+
2. Ensure the system message can handle similar user messages.
|
46 |
+
3. Output only the system message, without any additional content.
|
47 |
+
4. Expected Output text should not appear in System Message as an example. But
|
48 |
+
it's OK to use some similar text as an example instead.
|
49 |
+
5. Format the system message well, with no more than 80 characters per line
|
50 |
+
(except for raw text).
|
51 |
+
|
52 |
+
## Output
|
53 |
+
|
54 |
+
Provide only the system message, adhering to the above guidelines.
|
55 |
+
"""),
|
56 |
+
("human", """# User Message
|
57 |
+
|
58 |
+
{user_message}
|
59 |
+
|
60 |
+
# Expected Output
|
61 |
+
|
62 |
+
{expected_output}
|
63 |
+
""")
|
64 |
+
]),
|
65 |
+
NODE_PROMPT_DEVELOPER: ChatPromptTemplate.from_messages([
|
66 |
+
("system", """# Expert Prompt Engineer
|
67 |
+
|
68 |
+
You are an expert prompt engineer tasked with updating system messages for AI
|
69 |
+
assistants. You Update System Message according to Suggestions, to improve
|
70 |
+
Output and match Expected Output more closely.
|
71 |
+
|
72 |
+
## Instructions
|
73 |
+
|
74 |
+
1. Update the system message based on the given Suggestion, User Message, and
|
75 |
+
Expected Output.
|
76 |
+
2. Ensure the updated system message can handle similar user messages.
|
77 |
+
3. Modify only the content mentioned in the Suggestion. Do not change the
|
78 |
+
parts that are not related to the Suggestion.
|
79 |
+
4. Output only the updated system message, without any additional content.
|
80 |
+
5. Expected Output text should not appear in System Message as an example. But
|
81 |
+
it's OK to use some similar text as an example instead.
|
82 |
+
* Remove the Expected Output text or text highly similar to Expected Output
|
83 |
+
from System Message, if it's present.
|
84 |
+
6. Format the system message well, with no more than 80 characters per line
|
85 |
+
(except for raw text).
|
86 |
+
|
87 |
+
## Output
|
88 |
+
|
89 |
+
Provide only the updated System Message, adhering to the above guidelines.
|
90 |
+
"""),
|
91 |
+
("human", """# Current system message
|
92 |
+
|
93 |
+
{system_message}
|
94 |
+
|
95 |
+
# User Message
|
96 |
+
|
97 |
+
{user_message}
|
98 |
+
|
99 |
+
# Expected Output
|
100 |
+
|
101 |
+
{expected_output}
|
102 |
+
|
103 |
+
# Suggestions
|
104 |
+
|
105 |
+
{suggestions}
|
106 |
+
""")
|
107 |
+
]),
|
108 |
+
NODE_PROMPT_EXECUTOR: ChatPromptTemplate.from_messages([
|
109 |
+
("system", "{system_message}"),
|
110 |
+
("human", "{user_message}")
|
111 |
+
]),
|
112 |
+
NODE_OUTPUT_HISTORY_ANALYZER: ChatPromptTemplate.from_messages([
|
113 |
+
("system", """You are a text comparing program. You read the Acceptance Criteria, compare the
|
114 |
+
compare the exptected output with two different outputs, and decide which one is
|
115 |
+
more similar to the expected output.
|
116 |
+
|
117 |
+
You output the following analysis according to the Acceptance Criteria:
|
118 |
+
|
119 |
+
* Your analysis in a Markdown list.
|
120 |
+
* The ID of the output that is more similar to the Expected Output as Preferred
|
121 |
+
Output ID, with the following format:
|
122 |
+
|
123 |
+
```
|
124 |
+
# Analysis
|
125 |
+
|
126 |
+
...
|
127 |
+
|
128 |
+
# Preferred Output ID: [ID]
|
129 |
+
```
|
130 |
+
|
131 |
+
If both outputs are equally similar to the expected output, output the following:
|
132 |
+
|
133 |
+
```
|
134 |
+
# Analysis
|
135 |
+
|
136 |
+
...
|
137 |
+
|
138 |
+
# Draw
|
139 |
+
```
|
140 |
+
"""),
|
141 |
+
("human", """
|
142 |
+
# Output ID: A
|
143 |
+
|
144 |
+
```
|
145 |
+
{best_output}
|
146 |
+
```
|
147 |
+
|
148 |
+
# Output ID: B
|
149 |
+
|
150 |
+
```
|
151 |
+
{output}
|
152 |
+
```
|
153 |
+
|
154 |
+
# Acceptance Criteria
|
155 |
+
|
156 |
+
{acceptance_criteria}
|
157 |
+
|
158 |
+
# Expected Output
|
159 |
+
|
160 |
+
```
|
161 |
+
{expected_output}
|
162 |
+
```
|
163 |
+
""")
|
164 |
+
]),
|
165 |
+
NODE_PROMPT_ANALYZER: ChatPromptTemplate.from_messages([
|
166 |
+
("system", """
|
167 |
+
You are a text comparing program. You compare the following output texts,
|
168 |
+
analysis the System Message and provide a detailed analysis according to
|
169 |
+
`Acceptance Criteria`. Then you decide whether `Actual Output` is acceptable.
|
170 |
+
|
171 |
+
Provide your analysis in the following format:
|
172 |
+
|
173 |
+
```
|
174 |
+
- Acceptable Differences: [List acceptable differences succinctly]
|
175 |
+
- Unacceptable Differences: [List unacceptable differences succinctly]
|
176 |
+
- Accept: [Yes/No]
|
177 |
+
```
|
178 |
+
|
179 |
+
* Compare Expected Output and Actual Output with the guidance of Accept Criteria.
|
180 |
+
* Only set 'Accept' to 'Yes', if Accept Criteria are all met. Otherwise, set 'Accept' to 'No'.
|
181 |
+
* List only the acceptable differences according to Accept Criteria in 'acceptable Differences' section.
|
182 |
+
* List only the unacceptable differences according to Accept Criteria in 'Unacceptable Differences' section.
|
183 |
+
|
184 |
+
# Acceptance Criteria
|
185 |
+
|
186 |
+
```
|
187 |
+
{acceptance_criteria}
|
188 |
+
```
|
189 |
+
"""),
|
190 |
+
("human", """
|
191 |
+
# System Message
|
192 |
+
|
193 |
+
```
|
194 |
+
{system_message}
|
195 |
+
```
|
196 |
+
|
197 |
+
# Expected Output
|
198 |
+
|
199 |
+
```
|
200 |
+
{expected_output}
|
201 |
+
```
|
202 |
+
|
203 |
+
# Actual Output
|
204 |
+
|
205 |
+
```
|
206 |
+
{output}
|
207 |
+
```
|
208 |
+
""")
|
209 |
+
]),
|
210 |
+
NODE_PROMPT_SUGGESTER: ChatPromptTemplate.from_messages([
|
211 |
+
("system", """
|
212 |
+
Read the following inputs and outputs of an LLM prompt, and also analysis about them.
|
213 |
+
Then suggest how to improve System Prompt.
|
214 |
+
|
215 |
+
System Prompt:
|
216 |
+
```
|
217 |
+
{system_message}
|
218 |
+
```
|
219 |
+
User Message:
|
220 |
+
```
|
221 |
+
{user_message}
|
222 |
+
```
|
223 |
+
Expected Output:
|
224 |
+
```
|
225 |
+
{expected_output}
|
226 |
+
```
|
227 |
+
Actual Output:
|
228 |
+
```
|
229 |
+
{output}
|
230 |
+
```
|
231 |
+
|
232 |
+
Acceptance Criteria:
|
233 |
+
```
|
234 |
+
{acceptance_criteria}
|
235 |
+
```
|
236 |
+
|
237 |
+
Analysis:
|
238 |
+
```
|
239 |
+
{analysis}
|
240 |
+
```
|
241 |
+
|
242 |
+
* The goal is to improve the System Prompt to match the Expected Output better.
|
243 |
+
* Ignore all Acceptable Differences and focus on Unacceptable Differences.
|
244 |
+
* Suggest formal changes first, then semantic changes.
|
245 |
+
* Provide your suggestions in a Markdown list, nothing else. Output only the
|
246 |
+
suggestions related with Unacceptable Differences.
|
247 |
+
* Use `... should ...` to clearly state the desired output.
|
248 |
+
* Figue out the contexts of the System Message that conflict with the suggestions,
|
249 |
+
and suggest modification or deletion.
|
250 |
+
* Expected Output text should not appear in System Message as an example. But
|
251 |
+
it's OK to use some similar text as an example instead.
|
252 |
+
* Ask to remove the Expected Output text or text highly similar to Expected Output
|
253 |
+
from System Message, if it's present.
|
254 |
+
* Provide format examples or detected format name, if System Message does not.
|
255 |
+
* Specify the detected format name (e.g. XML, JSON, etc.) of Expected Output, if
|
256 |
+
System Message does not mention it.
|
257 |
+
""")
|
258 |
+
])
|
259 |
+
}
|
260 |
+
|
261 |
+
@classmethod
|
262 |
+
def get_node_names(cls):
|
263 |
+
return [
|
264 |
+
cls.NODE_PROMPT_INITIAL_DEVELOPER,
|
265 |
+
cls.NODE_PROMPT_DEVELOPER,
|
266 |
+
cls.NODE_PROMPT_EXECUTOR,
|
267 |
+
cls.NODE_OUTPUT_HISTORY_ANALYZER,
|
268 |
+
cls.NODE_PROMPT_ANALYZER,
|
269 |
+
cls.NODE_PROMPT_SUGGESTER
|
270 |
+
]
|
271 |
+
|
272 |
+
def __init__(self,
|
273 |
+
llms: Union[BaseLanguageModel, Dict[str, BaseLanguageModel]] = {},
|
274 |
+
prompts: Dict[str, ChatPromptTemplate] = {},
|
275 |
+
verbose = False):
|
276 |
+
self.logger = logging.getLogger(__name__)
|
277 |
+
if verbose:
|
278 |
+
self.logger.setLevel(logging.DEBUG)
|
279 |
+
else:
|
280 |
+
self.logger.setLevel(logging.INFO)
|
281 |
+
|
282 |
+
if isinstance(llms, BaseLanguageModel):
|
283 |
+
self.llms: Dict[str, BaseLanguageModel] = {node: llms for node in self.get_node_names()}
|
284 |
+
else:
|
285 |
+
self.llms: Dict[str, BaseLanguageModel] = llms
|
286 |
+
self.prompt_templates: Dict[str, ChatPromptTemplate] = self.DEFAULT_PROMPT_TEMPLATES.copy()
|
287 |
+
self.prompt_templates.update(prompts)
|
288 |
+
|
289 |
+
def _create_workflow(self, including_initial_developer: bool = True) -> StateGraph:
|
290 |
+
workflow = StateGraph(AgentState)
|
291 |
+
|
292 |
+
workflow.add_node(self.NODE_PROMPT_DEVELOPER,
|
293 |
+
lambda x: self._prompt_node(
|
294 |
+
self.NODE_PROMPT_DEVELOPER,
|
295 |
+
"system_message",
|
296 |
+
x))
|
297 |
+
workflow.add_node(self.NODE_PROMPT_EXECUTOR,
|
298 |
+
lambda x: self._prompt_node(
|
299 |
+
self.NODE_PROMPT_EXECUTOR,
|
300 |
+
"output",
|
301 |
+
x))
|
302 |
+
workflow.add_node(self.NODE_OUTPUT_HISTORY_ANALYZER,
|
303 |
+
lambda x: self._output_history_analyzer(x))
|
304 |
+
workflow.add_node(self.NODE_PROMPT_ANALYZER,
|
305 |
+
lambda x: self._prompt_analyzer(x))
|
306 |
+
workflow.add_node(self.NODE_PROMPT_SUGGESTER,
|
307 |
+
lambda x: self._prompt_node(
|
308 |
+
self.NODE_PROMPT_SUGGESTER,
|
309 |
+
"suggestions",
|
310 |
+
x))
|
311 |
+
|
312 |
+
workflow.add_edge(self.NODE_PROMPT_DEVELOPER, self.NODE_PROMPT_EXECUTOR)
|
313 |
+
workflow.add_edge(self.NODE_PROMPT_EXECUTOR, self.NODE_OUTPUT_HISTORY_ANALYZER)
|
314 |
+
workflow.add_edge(self.NODE_PROMPT_SUGGESTER, self.NODE_PROMPT_DEVELOPER)
|
315 |
+
|
316 |
+
workflow.add_conditional_edges(
|
317 |
+
self.NODE_OUTPUT_HISTORY_ANALYZER,
|
318 |
+
lambda x: self._should_exit_on_max_age(x),
|
319 |
+
{
|
320 |
+
"continue": self.NODE_PROMPT_ANALYZER,
|
321 |
+
"rerun": self.NODE_PROMPT_SUGGESTER,
|
322 |
+
END: END
|
323 |
+
}
|
324 |
+
)
|
325 |
+
|
326 |
+
workflow.add_conditional_edges(
|
327 |
+
self.NODE_PROMPT_ANALYZER,
|
328 |
+
lambda x: self._should_exit_on_acceptable_output(x),
|
329 |
+
{
|
330 |
+
"continue": self.NODE_PROMPT_SUGGESTER,
|
331 |
+
END: END
|
332 |
+
}
|
333 |
+
)
|
334 |
+
|
335 |
+
if including_initial_developer:
|
336 |
+
workflow.add_node(self.NODE_PROMPT_INITIAL_DEVELOPER,
|
337 |
+
lambda x: self._prompt_node(
|
338 |
+
self.NODE_PROMPT_INITIAL_DEVELOPER,
|
339 |
+
"system_message",
|
340 |
+
x))
|
341 |
+
workflow.add_edge(self.NODE_PROMPT_INITIAL_DEVELOPER, self.NODE_PROMPT_EXECUTOR)
|
342 |
+
workflow.set_entry_point(self.NODE_PROMPT_INITIAL_DEVELOPER)
|
343 |
+
else:
|
344 |
+
workflow.set_entry_point(self.NODE_PROMPT_EXECUTOR)
|
345 |
+
|
346 |
+
return workflow
|
347 |
+
|
348 |
+
def __call__(self, state: AgentState, recursion_limit: int = 25) -> AgentState:
|
349 |
+
workflow = self._create_workflow(including_initial_developer=(state.system_message is None or state.system_message == ""))
|
350 |
+
|
351 |
+
memory = MemorySaver()
|
352 |
+
graph = workflow.compile(checkpointer=memory)
|
353 |
+
config = {"configurable": {"thread_id": "1"}, "recursion_limit": recursion_limit}
|
354 |
+
|
355 |
+
try:
|
356 |
+
self.logger.debug("Invoking graph with state: %s", pprint.pformat(state))
|
357 |
+
|
358 |
+
output_state = graph.invoke(state, config)
|
359 |
+
|
360 |
+
self.logger.debug("Output state: %s", pprint.pformat(output_state))
|
361 |
+
|
362 |
+
return output_state
|
363 |
+
except GraphRecursionError as e:
|
364 |
+
self.logger.info("Recursion limit reached. Returning the best state found so far.")
|
365 |
+
checkpoint_states = graph.get_state(config)
|
366 |
+
|
367 |
+
# if the length of states is bigger than 0, print the best system message and output
|
368 |
+
if len(checkpoint_states) > 0:
|
369 |
+
output_state = checkpoint_states[0]
|
370 |
+
return output_state
|
371 |
+
else:
|
372 |
+
self.logger.info("No checkpoint states found. Returning the input state.")
|
373 |
+
|
374 |
+
return state
|
375 |
+
|
376 |
+
def _prompt_node(self, node, target_attribute: str, state: AgentState) -> AgentState:
|
377 |
+
prompt = self.prompt_templates[node].format_messages(**state.model_dump())
|
378 |
+
|
379 |
+
self.logger.debug("Invoking %s with prompt: %s", node, pprint.pformat(prompt))
|
380 |
+
response = self.llms[node].invoke(self.prompt_templates[node].format_messages(**state.model_dump()))
|
381 |
+
self.logger.debug("Response: %s", response.content)
|
382 |
+
|
383 |
+
setattr(state, target_attribute, response.content)
|
384 |
+
return state
|
385 |
+
|
386 |
+
def _output_history_analyzer(self, state: AgentState) -> AgentState:
|
387 |
+
if state.best_output is None:
|
388 |
+
state.best_output = state.output
|
389 |
+
state.best_system_message = state.system_message
|
390 |
+
state.best_output_age = 0
|
391 |
+
|
392 |
+
self.logger.debug("Best output initialized to the current output: \n %s", state.output)
|
393 |
+
|
394 |
+
return state
|
395 |
+
|
396 |
+
prompt = self.prompt_templates[self.NODE_OUTPUT_HISTORY_ANALYZER].format_messages(**state.model_dump())
|
397 |
+
|
398 |
+
self.logger.debug("Invoking %s with prompt: %s",
|
399 |
+
self.NODE_OUTPUT_HISTORY_ANALYZER,
|
400 |
+
pprint.pformat(prompt))
|
401 |
+
response = self.llms[self.NODE_OUTPUT_HISTORY_ANALYZER].invoke(prompt)
|
402 |
+
self.logger.debug("Response: %s", response.content)
|
403 |
+
|
404 |
+
analysis = response.content
|
405 |
+
|
406 |
+
if state.best_output is None or "# Preferred Output ID: B" in analysis:
|
407 |
+
state.best_output = state.output
|
408 |
+
state.best_system_message = state.system_message
|
409 |
+
state.best_output_age = 0
|
410 |
+
|
411 |
+
self.logger.debug("Best output updated to the current output: \n %s", state.output)
|
412 |
+
else:
|
413 |
+
state.best_output_age += 1
|
414 |
+
|
415 |
+
self.logger.debug("Best output age incremented to %s", state.best_output_age)
|
416 |
+
|
417 |
+
return state
|
418 |
+
|
419 |
+
def _prompt_analyzer(self, state: AgentState) -> AgentState:
|
420 |
+
prompt = self.prompt_templates[self.NODE_PROMPT_ANALYZER].format_messages(**state.model_dump())
|
421 |
+
|
422 |
+
self.logger.debug("Invoking %s with prompt: %s",
|
423 |
+
self.NODE_PROMPT_ANALYZER,
|
424 |
+
pprint.pformat(prompt))
|
425 |
+
response = self.llms[self.NODE_PROMPT_ANALYZER].invoke(prompt)
|
426 |
+
self.logger.debug("Response: %s", response.content)
|
427 |
+
|
428 |
+
state.analysis = response.content
|
429 |
+
state.accepted = "Accept: Yes" in response.content
|
430 |
+
|
431 |
+
self.logger.debug("Accepted: %s", state.accepted)
|
432 |
+
|
433 |
+
return state
|
434 |
+
|
435 |
+
def _should_exit_on_max_age(self, state: AgentState) -> str:
|
436 |
+
if state.max_output_age <= 0 or state.best_output_age < state.max_output_age:
|
437 |
+
return "continue"
|
438 |
+
else:
|
439 |
+
return "rerun"
|
440 |
+
|
441 |
+
def _should_exit_on_acceptable_output(self, state: AgentState) -> str:
|
442 |
+
return "continue" if not state.accepted else END
|
poetry.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pyproject.toml
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[tool.poetry]
|
2 |
+
name = "meta-prompt"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = "This package generates high quality prompts with input and expected output examples."
|
5 |
+
authors = ["Yale Huang <[email protected]>"]
|
6 |
+
license = "MIT"
|
7 |
+
|
8 |
+
[tool.poetry.dependencies]
|
9 |
+
python = "^3.10"
|
10 |
+
langgraph = "^0.1.5"
|
11 |
+
langchain = "^0.2.6"
|
12 |
+
langchain-openai = "^0.1.14"
|
13 |
+
pydantic = "^2.8.2"
|
14 |
+
|
15 |
+
[tool.poetry.dev-dependencies]
|
16 |
+
gradio = "^4.37.2"
|
17 |
+
confz = "^2.0.1"
|
18 |
+
|
19 |
+
[tool.poetry.group.dev.dependencies]
|
20 |
+
pytest = "^8.2.2"
|
21 |
+
|
22 |
+
[build-system]
|
23 |
+
requires = ["poetry-core>=1.0.0"]
|
24 |
+
build-backend = "poetry.core.masonry.api"
|
requirements.txt
ADDED
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiofiles==23.2.1
|
2 |
+
aiohttp==3.9.5
|
3 |
+
aiosignal==1.3.1
|
4 |
+
altair==5.1.1
|
5 |
+
annotated-types==0.5.0
|
6 |
+
anyio==3.7.1
|
7 |
+
asttokens==2.4.1
|
8 |
+
async-timeout==4.0.3
|
9 |
+
attrs==23.1.0
|
10 |
+
certifi==2023.7.22
|
11 |
+
charset-normalizer==3.2.0
|
12 |
+
click==8.1.7
|
13 |
+
comm==0.2.2
|
14 |
+
confz==2.0.1
|
15 |
+
contourpy==1.1.1
|
16 |
+
cycler==0.11.0
|
17 |
+
dataclasses-json==0.6.0
|
18 |
+
debugpy==1.8.2
|
19 |
+
decorator==5.1.1
|
20 |
+
distro==1.9.0
|
21 |
+
exceptiongroup==1.2.1
|
22 |
+
executing==2.0.1
|
23 |
+
fastapi==0.103.1
|
24 |
+
ffmpy==0.3.1
|
25 |
+
filelock==3.12.4
|
26 |
+
fonttools==4.42.1
|
27 |
+
frozenlist==1.4.0
|
28 |
+
fsspec==2023.9.2
|
29 |
+
gradio==4.37.2
|
30 |
+
gradio_client==1.0.2
|
31 |
+
greenlet==2.0.2
|
32 |
+
h11==0.14.0
|
33 |
+
httpcore==0.18.0
|
34 |
+
httpx==0.25.0
|
35 |
+
huggingface-hub==0.23.4
|
36 |
+
idna==3.4
|
37 |
+
importlib-resources==6.1.0
|
38 |
+
ipykernel==6.29.4
|
39 |
+
ipython==8.26.0
|
40 |
+
jedi==0.19.1
|
41 |
+
Jinja2==3.1.2
|
42 |
+
joblib==1.3.2
|
43 |
+
jsonpatch==1.33
|
44 |
+
jsonpointer==2.4
|
45 |
+
jsonschema==4.19.1
|
46 |
+
jsonschema-specifications==2023.7.1
|
47 |
+
jupyter_client==8.6.2
|
48 |
+
jupyter_core==5.7.2
|
49 |
+
kiwisolver==1.4.5
|
50 |
+
langchain==0.2.6
|
51 |
+
langchain-core==0.2.10
|
52 |
+
langchain-openai==0.1.13
|
53 |
+
langchain-text-splitters==0.2.2
|
54 |
+
langgraph==0.1.4
|
55 |
+
langsmith==0.1.82
|
56 |
+
markdown-it-py==3.0.0
|
57 |
+
MarkupSafe==2.1.3
|
58 |
+
marshmallow==3.20.1
|
59 |
+
matplotlib==3.8.0
|
60 |
+
matplotlib-inline==0.1.7
|
61 |
+
mdurl==0.1.2
|
62 |
+
multidict==6.0.4
|
63 |
+
mypy-extensions==1.0.0
|
64 |
+
nest-asyncio==1.6.0
|
65 |
+
numexpr==2.8.6
|
66 |
+
numpy==1.26.0
|
67 |
+
openai==1.35.7
|
68 |
+
orjson==3.10.5
|
69 |
+
packaging==24.1
|
70 |
+
pandas==2.1.1
|
71 |
+
parso==0.8.4
|
72 |
+
pexpect==4.9.0
|
73 |
+
Pillow==10.0.1
|
74 |
+
platformdirs==4.2.2
|
75 |
+
prompt_toolkit==3.0.47
|
76 |
+
psutil==6.0.0
|
77 |
+
ptyprocess==0.7.0
|
78 |
+
pure-eval==0.2.2
|
79 |
+
pydantic==2.3.0
|
80 |
+
pydantic_core==2.6.3
|
81 |
+
pydub==0.25.1
|
82 |
+
Pygments==2.18.0
|
83 |
+
pyparsing==3.1.1
|
84 |
+
python-dateutil==2.9.0.post0
|
85 |
+
python-dotenv==1.0.1
|
86 |
+
python-multipart==0.0.9
|
87 |
+
pytz==2023.3.post1
|
88 |
+
PyYAML==6.0.1
|
89 |
+
pyzmq==26.0.3
|
90 |
+
referencing==0.30.2
|
91 |
+
regex==2024.5.15
|
92 |
+
requests==2.31.0
|
93 |
+
rich==13.7.1
|
94 |
+
rpds-py==0.10.3
|
95 |
+
ruff==0.5.0
|
96 |
+
scikit-learn==1.3.1
|
97 |
+
scipy==1.11.3
|
98 |
+
semantic-version==2.10.0
|
99 |
+
shellingham==1.5.4
|
100 |
+
six==1.16.0
|
101 |
+
sniffio==1.3.0
|
102 |
+
SQLAlchemy==2.0.21
|
103 |
+
stack-data==0.6.3
|
104 |
+
starlette==0.27.0
|
105 |
+
tenacity==8.2.3
|
106 |
+
threadpoolctl==3.2.0
|
107 |
+
tiktoken==0.7.0
|
108 |
+
toml==0.10.2
|
109 |
+
tomlkit==0.12.0
|
110 |
+
toolz==0.12.0
|
111 |
+
tornado==6.4.1
|
112 |
+
tqdm==4.66.1
|
113 |
+
traitlets==5.14.3
|
114 |
+
typer==0.12.3
|
115 |
+
typing-inspect==0.9.0
|
116 |
+
typing_extensions==4.12.2
|
117 |
+
tzdata==2023.3
|
118 |
+
urllib3==2.0.5
|
119 |
+
uvicorn==0.23.2
|
120 |
+
wcwidth==0.2.13
|
121 |
+
websockets==11.0.3
|
122 |
+
yarl==1.9.2
|
tests/meta_prompt_graph_test.py
ADDED
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import unittest
|
2 |
+
import pprint
|
3 |
+
import logging
|
4 |
+
from unittest.mock import MagicMock
|
5 |
+
from unittest.mock import patch
|
6 |
+
|
7 |
+
# Assuming the necessary imports are made for the classes and functions used in meta_prompt_graph.py
|
8 |
+
from meta_prompt import AgentState, MetaPromptGraph
|
9 |
+
|
10 |
+
from langchain_openai import ChatOpenAI
|
11 |
+
|
12 |
+
class TestMetaPromptGraph(unittest.TestCase):
|
13 |
+
def setUp(self):
|
14 |
+
# logging.basicConfig(level=logging.DEBUG)
|
15 |
+
pass
|
16 |
+
|
17 |
+
def test_prompt_node(self):
|
18 |
+
llms = {
|
19 |
+
MetaPromptGraph.NODE_PROMPT_INITIAL_DEVELOPER: MagicMock(
|
20 |
+
invoke=MagicMock(return_value=MagicMock(content="Mocked response content"))
|
21 |
+
)
|
22 |
+
}
|
23 |
+
|
24 |
+
# Create an instance of MetaPromptGraph with the mocked language model and template
|
25 |
+
graph = MetaPromptGraph(llms=llms)
|
26 |
+
|
27 |
+
# Create a mock AgentState
|
28 |
+
state = AgentState(user_message="Test message", expected_output="Expected output")
|
29 |
+
|
30 |
+
# Invoke the _prompt_node method with the mock node, target attribute, and state
|
31 |
+
updated_state = graph._prompt_node(
|
32 |
+
MetaPromptGraph.NODE_PROMPT_INITIAL_DEVELOPER, "output", state
|
33 |
+
)
|
34 |
+
|
35 |
+
# Assertions
|
36 |
+
assert updated_state.output == "Mocked response content", \
|
37 |
+
"The output attribute should be updated with the mocked response content"
|
38 |
+
|
39 |
+
def test_output_history_analyzer(self):
|
40 |
+
# Setup
|
41 |
+
llms = {
|
42 |
+
"output_history_analyzer": MagicMock(invoke=lambda prompt: MagicMock(content="""# Analysis
|
43 |
+
|
44 |
+
This analysis compares two outputs to the expected output based on specific criteria.
|
45 |
+
|
46 |
+
# Preferred Output ID: B"""))
|
47 |
+
}
|
48 |
+
prompts = {}
|
49 |
+
meta_prompt_graph = MetaPromptGraph(llms=llms, prompts=prompts)
|
50 |
+
state = AgentState(
|
51 |
+
user_message="How do I reverse a list in Python?",
|
52 |
+
expected_output="Use the `[::-1]` slicing technique or the `list.reverse()` method.",
|
53 |
+
output="To reverse a list in Python, you can use the `[::-1]` slicing.",
|
54 |
+
system_message="To reverse a list, use slicing or the reverse method.",
|
55 |
+
best_output="To reverse a list in Python, use the `reverse()` method.",
|
56 |
+
best_system_message="To reverse a list, use the `reverse()` method.",
|
57 |
+
acceptance_criteria="The output should correctly describe how to reverse a list in Python."
|
58 |
+
)
|
59 |
+
|
60 |
+
# Invoke the output history analyzer node
|
61 |
+
updated_state = meta_prompt_graph._output_history_analyzer(state)
|
62 |
+
|
63 |
+
# Assertions
|
64 |
+
assert updated_state.best_output == state.output, \
|
65 |
+
"Best output should be updated to the current output."
|
66 |
+
assert updated_state.best_system_message == state.system_message, \
|
67 |
+
"Best system message should be updated to the current system message."
|
68 |
+
assert updated_state.best_output_age == 0, \
|
69 |
+
"Best output age should be reset to 0."
|
70 |
+
|
71 |
+
def test_prompt_analyzer_accept(self):
|
72 |
+
llms = {
|
73 |
+
MetaPromptGraph.NODE_PROMPT_ANALYZER: MagicMock(
|
74 |
+
invoke=lambda prompt: MagicMock(content="Accept: Yes"))
|
75 |
+
}
|
76 |
+
meta_prompt_graph = MetaPromptGraph(llms)
|
77 |
+
state = AgentState(output="Test output", expected_output="Expected output")
|
78 |
+
updated_state = meta_prompt_graph._prompt_analyzer(state)
|
79 |
+
assert updated_state.accepted == True
|
80 |
+
|
81 |
+
def test_workflow_execution(self):
|
82 |
+
# MODEL_NAME = "anthropic/claude-3.5-sonnet:beta"
|
83 |
+
# MODEL_NAME = "meta-llama/llama-3-70b-instruct"
|
84 |
+
MODEL_NAME = "deepseek/deepseek-chat"
|
85 |
+
# MODEL_NAME = "google/gemma-2-9b-it"
|
86 |
+
# MODEL_NAME = "recursal/eagle-7b"
|
87 |
+
# MODEL_NAME = "meta-llama/llama-3-8b-instruct"
|
88 |
+
llm = ChatOpenAI(model_name=MODEL_NAME)
|
89 |
+
|
90 |
+
meta_prompt_graph = MetaPromptGraph(llms=llm)
|
91 |
+
input_state = AgentState(
|
92 |
+
user_message="How do I reverse a list in Python?",
|
93 |
+
expected_output="Use the `[::-1]` slicing technique or the `list.reverse()` method.",
|
94 |
+
acceptance_criteria="Similar in meaning, text length and style."
|
95 |
+
)
|
96 |
+
output_state = meta_prompt_graph(input_state, recursion_limit=25)
|
97 |
+
|
98 |
+
pprint.pp(output_state)
|
99 |
+
# if output_state has key 'best_system_message', print it
|
100 |
+
assert 'best_system_message' in output_state, \
|
101 |
+
"The output state should contain the key 'best_system_message'"
|
102 |
+
assert output_state['best_system_message'] is not None, \
|
103 |
+
"The best system message should not be None"
|
104 |
+
if 'best_system_message' in output_state and output_state['best_system_message'] is not None:
|
105 |
+
print(output_state['best_system_message'])
|
106 |
+
|
107 |
+
# try another similar user message with the generated system message
|
108 |
+
user_message = "How can I create a list of numbers in Python?"
|
109 |
+
messages = [("system", output_state['best_system_message']),
|
110 |
+
("human", user_message)]
|
111 |
+
result = llm.invoke(messages)
|
112 |
+
|
113 |
+
# assert attr 'content' in result
|
114 |
+
assert hasattr(result, 'content'), \
|
115 |
+
"The result should have the attribute 'content'"
|
116 |
+
print(result.content)
|
117 |
+
|
118 |
+
def test_workflow_execution_with_llms(self):
|
119 |
+
optimizer_llm = ChatOpenAI(model_name="deepseek/deepseek-chat", temperature=0.5)
|
120 |
+
executor_llm = ChatOpenAI(model_name="meta-llama/llama-3-8b-instruct", temperature=0.01)
|
121 |
+
|
122 |
+
llms = {
|
123 |
+
MetaPromptGraph.NODE_PROMPT_INITIAL_DEVELOPER: optimizer_llm,
|
124 |
+
MetaPromptGraph.NODE_PROMPT_DEVELOPER: optimizer_llm,
|
125 |
+
MetaPromptGraph.NODE_PROMPT_EXECUTOR: executor_llm,
|
126 |
+
MetaPromptGraph.NODE_OUTPUT_HISTORY_ANALYZER: optimizer_llm,
|
127 |
+
MetaPromptGraph.NODE_PROMPT_ANALYZER: optimizer_llm,
|
128 |
+
MetaPromptGraph.NODE_PROMPT_SUGGESTER: optimizer_llm
|
129 |
+
}
|
130 |
+
|
131 |
+
meta_prompt_graph = MetaPromptGraph(llms=llms)
|
132 |
+
input_state = AgentState(
|
133 |
+
user_message="How do I reverse a list in Python?",
|
134 |
+
expected_output="Use the `[::-1]` slicing technique or the `list.reverse()` method.",
|
135 |
+
acceptance_criteria="Similar in meaning, text length and style."
|
136 |
+
)
|
137 |
+
output_state = meta_prompt_graph(input_state, recursion_limit=25)
|
138 |
+
|
139 |
+
pprint.pp(output_state)
|
140 |
+
# if output_state has key 'best_system_message', print it
|
141 |
+
assert 'best_system_message' in output_state, \
|
142 |
+
"The output state should contain the key 'best_system_message'"
|
143 |
+
assert output_state['best_system_message'] is not None, \
|
144 |
+
"The best system message should not be None"
|
145 |
+
if 'best_system_message' in output_state and output_state['best_system_message'] is not None:
|
146 |
+
print(output_state['best_system_message'])
|
147 |
+
|
148 |
+
# try another similar user message with the generated system message
|
149 |
+
user_message = "How can I create a list of numbers in Python?"
|
150 |
+
messages = [("system", output_state['best_system_message']),
|
151 |
+
("human", user_message)]
|
152 |
+
result = executor_llm.invoke(messages)
|
153 |
+
|
154 |
+
# assert attr 'content' in result
|
155 |
+
assert hasattr(result, 'content'), \
|
156 |
+
"The result should have the attribute 'content'"
|
157 |
+
print(result.content)
|
158 |
+
|
159 |
+
if __name__ == '__main__':
|
160 |
+
unittest.main()
|