Spaces:
Running
Running
Commit
·
bd26b8c
0
Parent(s):
Prepare for GitHub.
Browse files- .gitignore +3 -0
- README +40 -0
- meta_prompt.py +54 -0
- prompt_ui.py +736 -0
- requirements.txt +80 -0
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
.venv
|
2 |
+
.vscode
|
3 |
+
__pycache__
|
README
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Meta Prompt Demo
|
2 |
+
|
3 |
+
This project is a demonstration of the concept of Meta Prompt, which involves generating a language model prompt using another language model. The demo showcases how a language model can be used to generate high-quality prompts for another language model.
|
4 |
+
|
5 |
+
## Overview
|
6 |
+
|
7 |
+
The demo utilizes OpenAI's language models and provides a user interface for interacting with the chatbot. It allows users to input prompts, execute model calls, compare outputs, and optimize prompts based on desired criteria.
|
8 |
+
|
9 |
+
## Installation
|
10 |
+
|
11 |
+
To use this demo, please follow these steps:
|
12 |
+
|
13 |
+
1. Clone the repository: `git clone https://github.com/your-username/meta-prompt-demo.git`
|
14 |
+
2. Change into the project directory: `cd meta-prompt-demo`
|
15 |
+
3. Install the required dependencies: `pip install -r requirements.txt`
|
16 |
+
|
17 |
+
Please note that you need to have Python and pip installed on your system.
|
18 |
+
|
19 |
+
## Usage
|
20 |
+
|
21 |
+
To run the demo, execute the following command:
|
22 |
+
|
23 |
+
```
|
24 |
+
python main.py --api_key YOUR_API_KEY
|
25 |
+
```
|
26 |
+
|
27 |
+
Replace `YOUR_API_KEY` with your OpenAI API key. Other optional parameters can be specified as well, such as proxy settings, model name, API base URL, maximum message length, sharing option, and advanced mode. Please refer to the command-line argument options in the script for more details.
|
28 |
+
|
29 |
+
Once the demo is running, you can interact with the chatbot through the user interface provided. Enter prompts, execute model calls, compare outputs, and explore the functionality of the Meta Prompt concept.
|
30 |
+
|
31 |
+
## License
|
32 |
+
|
33 |
+
This project is licensed under the MIT License. Please see the [LICENSE](LICENSE) file for more information.
|
34 |
+
|
35 |
+
## Contact
|
36 |
+
|
37 |
+
For any questions or feedback regarding this project, please feel free to reach out to Yale Huang at [email protected].
|
38 |
+
|
39 |
+
---
|
40 |
+
*Note: This README serves as a general guideline for setting up and using the Meta Prompt demo. Make sure to update it with more specific information if necessary.*
|
meta_prompt.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
MIT License
|
3 |
+
|
4 |
+
Copyright (c) 2023 Yale Huang
|
5 |
+
Email: [email protected]
|
6 |
+
|
7 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
8 |
+
of this software and associated documentation files (the "Software"), to deal
|
9 |
+
in the Software without restriction, including without limitation the rights
|
10 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
11 |
+
copies of the Software, and to permit persons to whom the Software is
|
12 |
+
furnished to do so, subject to the following conditions:
|
13 |
+
|
14 |
+
The above copyright notice and this permission notice shall be included in all
|
15 |
+
copies or substantial portions of the Software.
|
16 |
+
"""
|
17 |
+
|
18 |
+
import argparse
|
19 |
+
import os
|
20 |
+
import openai
|
21 |
+
|
22 |
+
import gradio as gr
|
23 |
+
from prompt_ui import PromptUI
|
24 |
+
|
25 |
+
class ChatbotApp:
|
26 |
+
def __init__(self, args):
|
27 |
+
os.environ["OPENAI_API_KEY"] = args.api_key
|
28 |
+
if args.proxy:
|
29 |
+
openai.proxy = eval(args.proxy)
|
30 |
+
|
31 |
+
self.prompt_ui = PromptUI(advanced_mode=args.advanced_mode)
|
32 |
+
|
33 |
+
self.ui = gr.TabbedInterface(
|
34 |
+
[self.prompt_ui.ui],
|
35 |
+
['Prompt']
|
36 |
+
)
|
37 |
+
def launch(self, *args, **kwargs):
|
38 |
+
self.ui.launch(args, kwargs)
|
39 |
+
|
40 |
+
def parse_args():
|
41 |
+
parser = argparse.ArgumentParser()
|
42 |
+
parser.add_argument("--api_key", type=str, required=True, help="OpenAI API key")
|
43 |
+
parser.add_argument("--proxy", type=str, default=None, help="Proxy settings")
|
44 |
+
parser.add_argument("--share", action='store_true',
|
45 |
+
help="Launch app with sharing option")
|
46 |
+
parser.add_argument("--advanced_mode", action='store_true', default=False,
|
47 |
+
help="Enable advanced mode")
|
48 |
+
|
49 |
+
return parser.parse_args()
|
50 |
+
|
51 |
+
if __name__ == "__main__":
|
52 |
+
args = parse_args()
|
53 |
+
app = ChatbotApp(args)
|
54 |
+
app.ui.queue().launch(share=args.share)
|
prompt_ui.py
ADDED
@@ -0,0 +1,736 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
MIT License
|
3 |
+
|
4 |
+
Copyright (c) 2023 Yale Huang
|
5 |
+
Email: [email protected]
|
6 |
+
|
7 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
8 |
+
of this software and associated documentation files (the "Software"), to deal
|
9 |
+
in the Software without restriction, including without limitation the rights
|
10 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
11 |
+
copies of the Software, and to permit persons to whom the Software is
|
12 |
+
furnished to do so, subject to the following conditions:
|
13 |
+
|
14 |
+
The above copyright notice and this permission notice shall be included in all
|
15 |
+
copies or substantial portions of the Software.
|
16 |
+
"""
|
17 |
+
|
18 |
+
import re
|
19 |
+
import gradio as gr
|
20 |
+
|
21 |
+
from langchain.chat_models import ChatOpenAI
|
22 |
+
from langchain.llms.openai import OpenAI
|
23 |
+
from langchain.schema import AIMessage, HumanMessage, SystemMessage
|
24 |
+
from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate
|
25 |
+
from langchain.prompts.chat import ChatPromptTemplate
|
26 |
+
|
27 |
+
from sklearn.feature_extraction.text import CountVectorizer
|
28 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
29 |
+
|
30 |
+
gpt_models_not_legacy = [
|
31 |
+
"gpt-4",
|
32 |
+
"gpt-4-0613",
|
33 |
+
"gpt-4-32k",
|
34 |
+
"gpt-4-32k-0613",
|
35 |
+
"gpt-3.5-turbo",
|
36 |
+
"gpt-3.5-turbo-16k",
|
37 |
+
"gpt-3.5-turbo-instruct",
|
38 |
+
"gpt-3.5-turbo-0613"
|
39 |
+
]
|
40 |
+
|
41 |
+
gpt_models_legacy = [
|
42 |
+
"gpt-3.5-turbo-instruct",
|
43 |
+
"gpt-3.5-turbo-0301",
|
44 |
+
"text-davinci-003",
|
45 |
+
"text-davinci-002",
|
46 |
+
"code-davinci-002"
|
47 |
+
]
|
48 |
+
|
49 |
+
DEFAULT_MODEL_FOR_GENERATING="gpt-4"
|
50 |
+
DEFAULT_MODEL_FOR_TESTING="gpt-3.5-turbo"
|
51 |
+
DEFAULT_MODEL_FOR_OUTPUT_EVALUATING="gpt-3.5-turbo-instruct"
|
52 |
+
|
53 |
+
DEFAULT_META_SYSTEM_PROMPT = \
|
54 |
+
'''
|
55 |
+
You are a Prompt Engineer. You review the Prompt template for GTP-3.5 and suggest changes.
|
56 |
+
|
57 |
+
# Prompt template format
|
58 |
+
|
59 |
+
You require Prompt to be written in the following format:
|
60 |
+
|
61 |
+
```
|
62 |
+
<ROLE>
|
63 |
+
|
64 |
+
<TASK>
|
65 |
+
|
66 |
+
<REQUIREMENTS_AND_RESTRICTIONS>
|
67 |
+
|
68 |
+
```
|
69 |
+
|
70 |
+
* ROLE: The role the LLM is required to play. Describe it in one sentence.
|
71 |
+
* TASK: A summary and overall description of the tasks to be performed by LLM. Describe it in one or more sentences.
|
72 |
+
* REQUIREMENTS_AND_RESTRICTIONS: Specific requirements for the task. Describe using Markdown List.
|
73 |
+
|
74 |
+
A string of user message [USER_MESSAGE] entered by the user will be attached to the end of the prompt.
|
75 |
+
|
76 |
+
# Check input
|
77 |
+
|
78 |
+
Check the input format as follows:
|
79 |
+
|
80 |
+
```
|
81 |
+
* Prompt Template
|
82 |
+
|
83 |
+
[PROMPT_TEMPLATE]
|
84 |
+
|
85 |
+
* User Message
|
86 |
+
|
87 |
+
[USER_MESSAGE]
|
88 |
+
|
89 |
+
* Expected GPT Message
|
90 |
+
|
91 |
+
[EXPECTED_GPT_MESSAGE]
|
92 |
+
|
93 |
+
* GPT Message
|
94 |
+
|
95 |
+
[GPT_MESSAGE]
|
96 |
+
```
|
97 |
+
|
98 |
+
* PROMPT_TEMPLATE: Prompt template that conforms to the above Prompt template format.
|
99 |
+
* USER_MESSAGE: User input. Used to replace {user_message} in the Prompt template.
|
100 |
+
* EXPECTED_GPT_MESSAGE: Expect output generated by GPT.
|
101 |
+
* GPT_MESSAGE: GPT is actually based on the output generated by PROMPT_TEMPLATE and USER_MESSAGE.
|
102 |
+
|
103 |
+
# examine
|
104 |
+
|
105 |
+
Check and recommend modifying the Prompt template as follows to produce output closer to EXPECTED_GPT_MESSAGE:
|
106 |
+
|
107 |
+
* Read and parse PROMPT_TEMPLATE, USER_MESSAGE and EXPECTED_GPT_MESSAGE.
|
108 |
+
* Generate a description [TD] of this task according to your understanding.
|
109 |
+
* Analyze the correlation between PROMPT_TEMPLATE and USER_MESSAGE [UMR].
|
110 |
+
* Analyze and describe the characteristics of EXPECTED_GPT_MESSAGE in terms of text length, format, content, meaning and style.
|
111 |
+
* Analyze whether PROMPT_TEMPLATE and EXPECTED_GPT_MESSAGE match and list the differences [PED].
|
112 |
+
* Check whether GPT_MESSAGE conforms to EXPECTED_GPT_MESSAGE. Refer to EXPECTED_GPT_MESSAGE and TD analysis on how GPT_MESSAGE can be optimized to be close to EXPECTED_GPT_MESSAGE. Modification suggestions are listed in detail [MCSL].
|
113 |
+
* Pay attention to checking the text length, format, content, meaning and style, and output corresponding modification suggestions.
|
114 |
+
* Suggested modifications to text length should include quantitative numerical descriptions.
|
115 |
+
* Suggestions for changes to text formatting should include specific examples enclosed by "```".
|
116 |
+
* Pay attention to check whether unnecessary content is included in GPT_MESSAGE and output corresponding modification suggestions.
|
117 |
+
* Suggestions for modifying local content should include the modifiable fragments and recommended modified fragments in GPT_MESSAGE.
|
118 |
+
* Check PROMPT_TEMPLATE: Analyze and list suggestions [CSL] for how to modify PROMPT_TEMPLATE to produce output closer to EXPECTED_GPT_MESSAGE.
|
119 |
+
* For requirements that have been stated in REQUIREMENTS_AND_RESTRICTIONS but are not met by GPT_MESSAGE, they should also be emphasized in TASK, and the opposite tendency (such as reverse adjustment of quantitative indicators or style descriptions) should be emphasized punitively to construct the strongest Negative feedback***.
|
120 |
+
* For format requirements that have been stated in REQUIREMENTS_AND_RESTRICTIONS but are not met by GPT_MESSAGE, add an example enclosed with "```".
|
121 |
+
* Based on PED recommendations on how to modify PROMPT_TEMPLATE.
|
122 |
+
* Analyze and suggest how to modify PROMPT_TEMPLATE to implement the MCSL listed above.
|
123 |
+
* Analyze whether PROMPT_TEMPLATE conforms to the format defined by `Prompt template format` and suggest how to modify it.
|
124 |
+
* Analyze those instructions that do not comply with EXPECTED_GPT_MESSAGE and are clearly misleading, and recommend modifications.
|
125 |
+
* Modifications to PROMPT_TEMPLATE should not introduce more information related to USER_MESSAGE.
|
126 |
+
* In TASK and REQUIREMENTS_AND_RESTRICTIONS, group the requirements for the same content together.
|
127 |
+
* If there are multiple steps, use a numbered list to list the steps clearly.
|
128 |
+
* Care should be taken to avoid unnecessary changes, and the original text should be retained as much as possible for parts that do not need to be changed.
|
129 |
+
* Only output [CSL], do not output the modified PROMPT_TEMPLATE.
|
130 |
+
* Check and filter the Change Suggestions List [CSL] for information related to USER_MESSAGE.
|
131 |
+
* Only output the filtered modification suggestion list [RCSL], do not output the modified PROMPT_TEMPLATE.
|
132 |
+
* Execute the above filtered modification suggestion list [RCSL] and ***output the modified PROMPT_TEMPLATE***.
|
133 |
+
* Execute RCSL only, avoid other changes.
|
134 |
+
* Care should be taken to avoid unnecessary changes, and the original text should be retained as much as possible for parts that do not need to be changed, except the requirements that have been stated in TASK or REQUIREMENTS_AND_RESTRICTIONS but are not met by GPT_MESSAGE.
|
135 |
+
* Strictly use the following format for output:
|
136 |
+
```
|
137 |
+
<!-- BEGIN OF PROMPT -->
|
138 |
+
|
139 |
+
<Updated Prompt>
|
140 |
+
|
141 |
+
<!-- END OF PROMPT -->
|
142 |
+
```
|
143 |
+
* If there's no change, output following fixed message instead:
|
144 |
+
```
|
145 |
+
<!-- NO CHANGE TO PROMPT -->
|
146 |
+
```
|
147 |
+
* Evaluation modified PROMPT_TEMPLATE.
|
148 |
+
* Analyze the changes it may cause in the output of LLM [EC].
|
149 |
+
* Analyze whether EC would be more consistent with EXPECTED_GPT_MESSAGE.
|
150 |
+
* Analyze the correlation between modified PROMPT_TEMPLATE and USER_MESSAGE [UMRC].
|
151 |
+
* Analyze UMR and UMRC to determine whether the modification introduces additional information about USER_MESSAGE. If introduced, issue a warning.
|
152 |
+
* NOTICE: During the above steps, ****output RCSL and the modified PROMPT_TEMPLATE only, don't print the output of other steps***.
|
153 |
+
|
154 |
+
----
|
155 |
+
|
156 |
+
Now, provide the PROMPT_TEMPLATE, USER_MESSAGE, EXPECTED_GPT_MESSAGE, and GPT_MESSAGE for review.
|
157 |
+
|
158 |
+
'''
|
159 |
+
|
160 |
+
DEFAULT_CURRENT_SYSTEM_PROMPT = ''
|
161 |
+
DEFAULT_OUTPUT_EVALUATING_PROMPT = 'Find out which is more similar to string S, A or B? Print nothing if there\'s no significant difference between A and B. Else, print the result (letter A or B) only. Do nothing else.'
|
162 |
+
|
163 |
+
class PromptUI:
|
164 |
+
def __init__(self, advanced_mode = False):
|
165 |
+
self.advanced_mode = advanced_mode
|
166 |
+
self.ui = self.init_ui()
|
167 |
+
|
168 |
+
def init_ui(self):
|
169 |
+
with gr.Blocks() as prompt_ui:
|
170 |
+
with gr.Row():
|
171 |
+
with gr.Column():
|
172 |
+
self.testing_user_prompt_textbox = gr.Textbox(label="Testing User Prompt", lines=10, interactive=True)
|
173 |
+
self.expect_output_textbox = gr.Textbox(label="Expected Output", lines=5, interactive=True)
|
174 |
+
# Add gr.Number here for iterations input
|
175 |
+
self.iterations_number = gr.Number(value=1, label="Optimize Iterations", min=1, max=1000, step=1, decimals=0)
|
176 |
+
# Add button to trigger optimization here
|
177 |
+
self.optimize_btn = gr.Button(value="Optimize Prompt", variant='primary')
|
178 |
+
self.similar_candidate_textbox = gr.Textbox(label="Similarity Delta", lines=1, interactive=True)
|
179 |
+
self.compare_outputs_btn = gr.Button(value="Compare Outputs")
|
180 |
+
|
181 |
+
with gr.Column():
|
182 |
+
self.new_system_prompt_textbox = gr.Textbox(label="New System Prompt", lines=5, interactive=True)
|
183 |
+
self.new_output_textbox = gr.Textbox(label="New Output", lines=5, interactive=True)
|
184 |
+
with gr.Row():
|
185 |
+
self.run_meta_btn = gr.Button(value="↑ Single Step Optimize")
|
186 |
+
self.run_new_btn = gr.Button(value="⟳ Run New")
|
187 |
+
self.new_system_prompt_changed = gr.Checkbox(
|
188 |
+
label="New System Prompt Changed",
|
189 |
+
value=False,
|
190 |
+
interactive=False
|
191 |
+
)
|
192 |
+
|
193 |
+
with gr.Column():
|
194 |
+
self.current_system_prompt_textbox = gr.Textbox(label="Current System Prompt",
|
195 |
+
value=DEFAULT_CURRENT_SYSTEM_PROMPT,
|
196 |
+
lines=5,
|
197 |
+
interactive=True)
|
198 |
+
self.current_output_textbox = gr.Textbox(label="Current Output", lines=5, interactive=True)
|
199 |
+
with gr.Row():
|
200 |
+
self.accept_new_btn = gr.Button(value="→ Accept New Prompt")
|
201 |
+
self.run_current_btn = gr.Button(value="⟳ Run Current")
|
202 |
+
|
203 |
+
with gr.Row(visible=self.advanced_mode):
|
204 |
+
with gr.Column():
|
205 |
+
self.meta_system_prompt_textbox = gr.Textbox(label="Meta System Prompt",
|
206 |
+
value=DEFAULT_META_SYSTEM_PROMPT,
|
207 |
+
lines=10,
|
208 |
+
interactive=True
|
209 |
+
)
|
210 |
+
with gr.Column():
|
211 |
+
self.merged_meta_prompt_textbox = gr.Textbox(label="Merged Meta System Prompt",
|
212 |
+
lines=10,
|
213 |
+
interactive=False,
|
214 |
+
show_copy_button=True
|
215 |
+
)
|
216 |
+
self.merge_prompt_btn = gr.Button(value="Merge Meta System Prompt")
|
217 |
+
# self.chatgpt_output_textbox = gr.Textbox(label="Paste ChatGPT Output",
|
218 |
+
# lines=10,
|
219 |
+
# interactive=True
|
220 |
+
# )
|
221 |
+
# self.parse_chatgpt_output_btn = gr.Button(value="Parse ChatGPT Output")
|
222 |
+
|
223 |
+
with gr.Row(visible=self.advanced_mode):
|
224 |
+
with gr.Column():
|
225 |
+
self.llm_model_meta_dropdown = gr.Dropdown(
|
226 |
+
label="Generating LLM Model",
|
227 |
+
choices=gpt_models_not_legacy,
|
228 |
+
value=DEFAULT_MODEL_FOR_GENERATING,
|
229 |
+
interactive=True,
|
230 |
+
allow_custom_value=False
|
231 |
+
)
|
232 |
+
|
233 |
+
self.llm_model_meta_temperature_slider = gr.Slider(
|
234 |
+
minimum=0.0,
|
235 |
+
maximum=1.0,
|
236 |
+
step=0.01,
|
237 |
+
value=0.0,
|
238 |
+
interactive=True,
|
239 |
+
label="Generating LLM Model Temperature"
|
240 |
+
)
|
241 |
+
|
242 |
+
self.llm_model_meta_max_tokens_slider = gr.Slider(
|
243 |
+
minimum=256,
|
244 |
+
maximum=32000,
|
245 |
+
step=256,
|
246 |
+
value=0,
|
247 |
+
interactive=True,
|
248 |
+
label="Generating LLM Model Token Limit (0 for auto)"
|
249 |
+
)
|
250 |
+
|
251 |
+
self.llm_model_meta_request_timeout_slider = gr.Slider(
|
252 |
+
minimum=0,
|
253 |
+
maximum=600,
|
254 |
+
step=5,
|
255 |
+
value=600,
|
256 |
+
interactive=True,
|
257 |
+
label="Generating LLM Model Timeout"
|
258 |
+
)
|
259 |
+
|
260 |
+
self.llm_model_meta_max_retries_slider = gr.Slider(
|
261 |
+
minimum=0,
|
262 |
+
maximum=30,
|
263 |
+
step=1,
|
264 |
+
value=6,
|
265 |
+
interactive=True,
|
266 |
+
label="Generating LLM Model Max Retries"
|
267 |
+
)
|
268 |
+
|
269 |
+
with gr.Column():
|
270 |
+
self.llm_model_test_dropdown = gr.Dropdown(
|
271 |
+
label="Testing LLM Model",
|
272 |
+
choices=gpt_models_not_legacy,
|
273 |
+
value=DEFAULT_MODEL_FOR_TESTING,
|
274 |
+
interactive=True,
|
275 |
+
allow_custom_value=False
|
276 |
+
)
|
277 |
+
|
278 |
+
self.llm_model_test_temperature_slider = gr.Slider(
|
279 |
+
minimum=0.0,
|
280 |
+
maximum=1.0,
|
281 |
+
step=0.01,
|
282 |
+
value=0.0,
|
283 |
+
interactive=True,
|
284 |
+
label="Testing LLM Model Temperature"
|
285 |
+
)
|
286 |
+
|
287 |
+
self.llm_model_test_max_tokens_slider = gr.Slider(
|
288 |
+
minimum=256,
|
289 |
+
maximum=32000,
|
290 |
+
step=256,
|
291 |
+
value=0,
|
292 |
+
interactive=True,
|
293 |
+
label="Testing LLM Model Token Limit (0 for auto)"
|
294 |
+
)
|
295 |
+
|
296 |
+
self.llm_model_test_request_timeout_slider = gr.Slider(
|
297 |
+
minimum=0,
|
298 |
+
maximum=600,
|
299 |
+
step=5,
|
300 |
+
value=600,
|
301 |
+
interactive=True,
|
302 |
+
label="Testing LLM Model Timeout"
|
303 |
+
)
|
304 |
+
|
305 |
+
self.llm_model_test_max_retries_slider = gr.Slider(
|
306 |
+
minimum=0,
|
307 |
+
maximum=30,
|
308 |
+
step=1,
|
309 |
+
value=6,
|
310 |
+
interactive=True,
|
311 |
+
label="Testing LLM Model Max Retries"
|
312 |
+
)
|
313 |
+
# with gr.Column():
|
314 |
+
# self.llm_model_output_eval_dropdown = gr.Dropdown(label="Output Evaluating LLM Model",
|
315 |
+
# choices=gpt_models_legacy,
|
316 |
+
# value=DEFAULT_MODEL_FOR_OUTPUT_EVALUATING,
|
317 |
+
# interactive=True,
|
318 |
+
# allow_custom_value=False)
|
319 |
+
# self.llm_model_output_eval_slider = gr.Slider(minimum=0.0,
|
320 |
+
# maximum=1.0,
|
321 |
+
# step=0.01,
|
322 |
+
# default=0.0,
|
323 |
+
# label="Output Evaluating LLM Model of Temperature")
|
324 |
+
|
325 |
+
|
326 |
+
self.run_new_btn.click(
|
327 |
+
self.test_prompt,
|
328 |
+
[
|
329 |
+
self.new_system_prompt_textbox,
|
330 |
+
self.testing_user_prompt_textbox,
|
331 |
+
self.llm_model_test_dropdown,
|
332 |
+
self.llm_model_test_max_retries_slider,
|
333 |
+
self.llm_model_test_max_tokens_slider,
|
334 |
+
self.llm_model_test_request_timeout_slider,
|
335 |
+
self.llm_model_test_temperature_slider
|
336 |
+
],
|
337 |
+
[self.new_output_textbox]
|
338 |
+
)
|
339 |
+
self.run_current_btn.click(
|
340 |
+
self.test_prompt,
|
341 |
+
[
|
342 |
+
self.current_system_prompt_textbox,
|
343 |
+
self.testing_user_prompt_textbox,
|
344 |
+
self.llm_model_test_dropdown,
|
345 |
+
self.llm_model_test_max_retries_slider,
|
346 |
+
self.llm_model_test_max_tokens_slider,
|
347 |
+
self.llm_model_test_request_timeout_slider,
|
348 |
+
self.llm_model_test_temperature_slider
|
349 |
+
],
|
350 |
+
[self.current_output_textbox]
|
351 |
+
)
|
352 |
+
self.run_meta_btn.click(
|
353 |
+
self.meta_prompt,
|
354 |
+
[
|
355 |
+
self.meta_system_prompt_textbox,
|
356 |
+
self.current_system_prompt_textbox,
|
357 |
+
self.testing_user_prompt_textbox,
|
358 |
+
self.expect_output_textbox,
|
359 |
+
self.current_output_textbox,
|
360 |
+
self.llm_model_meta_dropdown,
|
361 |
+
self.llm_model_meta_max_retries_slider,
|
362 |
+
self.llm_model_meta_max_tokens_slider,
|
363 |
+
self.llm_model_meta_request_timeout_slider,
|
364 |
+
self.llm_model_meta_temperature_slider
|
365 |
+
],
|
366 |
+
[self.new_system_prompt_textbox, self.new_system_prompt_changed]
|
367 |
+
)
|
368 |
+
self.accept_new_btn.click(self.copy_new_prompts,
|
369 |
+
[self.new_system_prompt_textbox, self.new_output_textbox],
|
370 |
+
[self.current_system_prompt_textbox, self.current_output_textbox])
|
371 |
+
self.compare_outputs_btn.click(self.compare_outputs,
|
372 |
+
[self.new_output_textbox, self.current_output_textbox, self.expect_output_textbox],
|
373 |
+
[self.similar_candidate_textbox])
|
374 |
+
# Attach the optimize_prompt function to the button click event.
|
375 |
+
# You should implement this function according to your optimization logic.
|
376 |
+
self.optimize_btn.click(
|
377 |
+
self.optimize_prompt,
|
378 |
+
[
|
379 |
+
self.meta_system_prompt_textbox,
|
380 |
+
self.current_system_prompt_textbox,
|
381 |
+
self.testing_user_prompt_textbox,
|
382 |
+
self.expect_output_textbox,
|
383 |
+
self.current_output_textbox,
|
384 |
+
self.iterations_number,
|
385 |
+
self.llm_model_meta_dropdown,
|
386 |
+
self.llm_model_meta_max_retries_slider,
|
387 |
+
self.llm_model_meta_max_tokens_slider,
|
388 |
+
self.llm_model_meta_request_timeout_slider,
|
389 |
+
self.llm_model_meta_temperature_slider,
|
390 |
+
self.llm_model_test_dropdown,
|
391 |
+
self.llm_model_test_max_retries_slider,
|
392 |
+
self.llm_model_test_max_tokens_slider,
|
393 |
+
self.llm_model_test_request_timeout_slider,
|
394 |
+
self.llm_model_test_temperature_slider
|
395 |
+
],
|
396 |
+
[self.new_system_prompt_textbox, self.new_system_prompt_changed])
|
397 |
+
|
398 |
+
self.merge_prompt_btn.click(self.merge_meta_system_prompt,
|
399 |
+
[
|
400 |
+
self.meta_system_prompt_textbox,
|
401 |
+
self.current_system_prompt_textbox,
|
402 |
+
self.testing_user_prompt_textbox,
|
403 |
+
self.expect_output_textbox,
|
404 |
+
self.current_output_textbox
|
405 |
+
],
|
406 |
+
[self.merged_meta_prompt_textbox])
|
407 |
+
|
408 |
+
|
409 |
+
return prompt_ui
|
410 |
+
|
411 |
+
def merge_meta_system_prompt(
|
412 |
+
self,
|
413 |
+
meta_system_prompt,
|
414 |
+
current_system_prompt,
|
415 |
+
testing_user_prompt,
|
416 |
+
expect_output,
|
417 |
+
current_output
|
418 |
+
):
|
419 |
+
"""Merge meta and current system prompts."""
|
420 |
+
|
421 |
+
user_prompt = self.generate_user_message(
|
422 |
+
current_system_prompt,
|
423 |
+
testing_user_prompt,
|
424 |
+
expect_output,
|
425 |
+
current_output
|
426 |
+
)
|
427 |
+
|
428 |
+
merged_prompt = f"{meta_system_prompt}\n\n{user_prompt}"
|
429 |
+
|
430 |
+
return merged_prompt
|
431 |
+
|
432 |
+
def copy_new_prompts(self, system_prompt, output):
|
433 |
+
"""Copy prompts and output from new to current textboxes."""
|
434 |
+
|
435 |
+
return system_prompt, output
|
436 |
+
|
437 |
+
def test_prompt(
|
438 |
+
self,
|
439 |
+
system_prompt,
|
440 |
+
user_prompt,
|
441 |
+
model,
|
442 |
+
max_retries,
|
443 |
+
max_tokens,
|
444 |
+
request_timeout,
|
445 |
+
temperature,
|
446 |
+
):
|
447 |
+
# Create the prompt
|
448 |
+
prompt = [
|
449 |
+
SystemMessage(content=system_prompt),
|
450 |
+
HumanMessage(content=user_prompt)
|
451 |
+
]
|
452 |
+
|
453 |
+
chat_llm = ChatOpenAI(
|
454 |
+
model=model,
|
455 |
+
max_retries=max_retries,
|
456 |
+
max_tokens=None if max_tokens == 0 else max_tokens,
|
457 |
+
request_timeout=request_timeout,
|
458 |
+
temperature=temperature
|
459 |
+
)
|
460 |
+
|
461 |
+
# Get the response from OpenAI
|
462 |
+
gpt_response = chat_llm(prompt)
|
463 |
+
|
464 |
+
# Return the output to be placed in the output textbox
|
465 |
+
return gpt_response.content
|
466 |
+
|
467 |
+
def generate_user_message(self, current_system_prompt, testing_user_prompt, expect_output, current_output):
|
468 |
+
user_message = f"""
|
469 |
+
* Prompt Template
|
470 |
+
|
471 |
+
```
|
472 |
+
{current_system_prompt}
|
473 |
+
```
|
474 |
+
|
475 |
+
* User Message
|
476 |
+
|
477 |
+
```
|
478 |
+
{testing_user_prompt}
|
479 |
+
```
|
480 |
+
|
481 |
+
* Expected GPT Message
|
482 |
+
|
483 |
+
```
|
484 |
+
{expect_output}
|
485 |
+
```
|
486 |
+
|
487 |
+
* GPT Message
|
488 |
+
|
489 |
+
```
|
490 |
+
{current_output}
|
491 |
+
```
|
492 |
+
"""
|
493 |
+
return user_message
|
494 |
+
|
495 |
+
def meta_prompt(
|
496 |
+
self,
|
497 |
+
meta_system_prompt, current_system_prompt, testing_user_prompt, expect_output, current_output,
|
498 |
+
model,
|
499 |
+
max_retries,
|
500 |
+
max_tokens,
|
501 |
+
request_timeout,
|
502 |
+
temperature,
|
503 |
+
):
|
504 |
+
# Format the user message
|
505 |
+
# user_message = f"""
|
506 |
+
# * Prompt Template
|
507 |
+
|
508 |
+
# ```
|
509 |
+
# {current_system_prompt}
|
510 |
+
# ```
|
511 |
+
|
512 |
+
# * User Message
|
513 |
+
|
514 |
+
# ```
|
515 |
+
# {testing_user_prompt}
|
516 |
+
# ```
|
517 |
+
|
518 |
+
# * Expected GPT Message
|
519 |
+
|
520 |
+
# ```
|
521 |
+
# {expect_output}
|
522 |
+
# ```
|
523 |
+
|
524 |
+
# * GPT Message
|
525 |
+
|
526 |
+
# ```
|
527 |
+
# {current_output}
|
528 |
+
# ```
|
529 |
+
# """
|
530 |
+
|
531 |
+
# Format the user message
|
532 |
+
user_message = self.generate_user_message(
|
533 |
+
current_system_prompt, testing_user_prompt, expect_output, current_output
|
534 |
+
)
|
535 |
+
|
536 |
+
# Create the prompt
|
537 |
+
prompt = [
|
538 |
+
SystemMessage(content=meta_system_prompt),
|
539 |
+
HumanMessage(content=user_message)
|
540 |
+
]
|
541 |
+
|
542 |
+
chat_llm = ChatOpenAI(
|
543 |
+
model=model,
|
544 |
+
max_retries=max_retries,
|
545 |
+
max_tokens=None if max_tokens == 0 else max_tokens,
|
546 |
+
request_timeout=request_timeout,
|
547 |
+
temperature=temperature
|
548 |
+
)
|
549 |
+
|
550 |
+
# Get the response from OpenAI
|
551 |
+
gpt_response = chat_llm(prompt)
|
552 |
+
|
553 |
+
updated_prompt = self.extract_updated_prompt(gpt_response.content)
|
554 |
+
changed = not self.detect_no_change(gpt_response.content)
|
555 |
+
|
556 |
+
# Return the output to be placed in the new system prompt textbox
|
557 |
+
if updated_prompt:
|
558 |
+
return updated_prompt, changed
|
559 |
+
else:
|
560 |
+
return gpt_response.content, changed
|
561 |
+
|
562 |
+
def extract_updated_prompt(self, gpt_response):
|
563 |
+
# Regular expression pattern to find the text enclosed
|
564 |
+
pattern = "<!-- BEGIN OF PROMPT -->(.*?)<!-- END OF PROMPT -->"
|
565 |
+
|
566 |
+
# Using search method to find the first occurrence of the pattern
|
567 |
+
result = re.search(pattern, gpt_response, re.DOTALL)
|
568 |
+
|
569 |
+
if result:
|
570 |
+
s = result.group(1).strip("\n")
|
571 |
+
if s.startswith("```") and s.endswith("```"):
|
572 |
+
s = s[3:-3]
|
573 |
+
return s # Return the matched string
|
574 |
+
else:
|
575 |
+
return None # If no such pattern is found return None
|
576 |
+
|
577 |
+
def detect_no_change(self, gpt_response):
|
578 |
+
# Regular expression pattern to find the exact string
|
579 |
+
pattern = "<!-- NO CHANGE TO PROMPT -->"
|
580 |
+
|
581 |
+
# Using search method to find the occurrence of the pattern
|
582 |
+
result = re.search(pattern, gpt_response)
|
583 |
+
|
584 |
+
if result:
|
585 |
+
return True # If the pattern is found return True
|
586 |
+
else:
|
587 |
+
return False # If no such pattern is found return False
|
588 |
+
|
589 |
+
# def compare_strings(self, a: str, b: str, s: str) -> str:
|
590 |
+
# # Create an instance of ChatOpenAI with the evaluation model
|
591 |
+
# chat_model = OpenAI(temperature=0, model_name=self.llm_model_output_eval_dropdown.value)
|
592 |
+
|
593 |
+
# # Create a prompt for comparison
|
594 |
+
# prompt = (DEFAULT_OUTPUT_EVALUATING_PROMPT +
|
595 |
+
# '\n\n' + f'# S\n\n```\n{s}\n```\n\n# A\n\n```\n{a}\n```\n\n# B\n\n```\n{b}\n```\n\n')
|
596 |
+
|
597 |
+
# # Get the response from OpenAI
|
598 |
+
# response = chat_model(prompt)
|
599 |
+
|
600 |
+
# # Remove '```' from beginning and end if it exists
|
601 |
+
# if response.startswith("```") and response.endswith("```"):
|
602 |
+
# response = response[3:-3]
|
603 |
+
|
604 |
+
# # Check the first character of the response and return accordingly
|
605 |
+
# if response.startswith('A'):
|
606 |
+
# return 'A'
|
607 |
+
# elif response.startswith('B'):
|
608 |
+
# return 'B'
|
609 |
+
# else:
|
610 |
+
# return None
|
611 |
+
|
612 |
+
def optimize_prompt(
|
613 |
+
self,
|
614 |
+
meta_system_prompt,
|
615 |
+
current_system_prompt,
|
616 |
+
testing_user_prompt,
|
617 |
+
expect_output,
|
618 |
+
current_output,
|
619 |
+
iterations,
|
620 |
+
meta_model,
|
621 |
+
meta_max_retries,
|
622 |
+
meta_max_tokens,
|
623 |
+
meta_request_timeout,
|
624 |
+
meta_temperature,
|
625 |
+
test_model,
|
626 |
+
test_max_retries,
|
627 |
+
test_max_tokens,
|
628 |
+
test_request_timeout,
|
629 |
+
test_temperature,
|
630 |
+
):
|
631 |
+
|
632 |
+
changed = False
|
633 |
+
|
634 |
+
# Iterate the specified number of times
|
635 |
+
for i in range(int(iterations)):
|
636 |
+
# If current_output is None or not provided, get it from test_prompt
|
637 |
+
if current_output is None:
|
638 |
+
current_output = self.test_prompt(
|
639 |
+
current_system_prompt,
|
640 |
+
testing_user_prompt,
|
641 |
+
test_model,
|
642 |
+
test_max_retries,
|
643 |
+
test_max_tokens,
|
644 |
+
test_request_timeout,
|
645 |
+
test_temperature,
|
646 |
+
)
|
647 |
+
|
648 |
+
# Call meta_prompt to get an optimized prompt
|
649 |
+
new_prompt, changed = self.meta_prompt(
|
650 |
+
meta_system_prompt,
|
651 |
+
current_system_prompt,
|
652 |
+
testing_user_prompt,
|
653 |
+
expect_output,
|
654 |
+
current_output,
|
655 |
+
meta_model,
|
656 |
+
meta_max_retries,
|
657 |
+
meta_max_tokens,
|
658 |
+
meta_request_timeout,
|
659 |
+
meta_temperature,
|
660 |
+
)
|
661 |
+
|
662 |
+
# If changed is False, break the loop
|
663 |
+
if not changed:
|
664 |
+
break
|
665 |
+
|
666 |
+
# If there is an updated prompt and it's different from the current one, update current_system_prompt
|
667 |
+
if new_prompt and new_prompt != current_system_prompt:
|
668 |
+
current_system_prompt = new_prompt
|
669 |
+
# Reset current_output to None so it gets recalculated in the next iteration
|
670 |
+
current_output = None
|
671 |
+
|
672 |
+
return current_system_prompt, changed # Return the optimized system prompt
|
673 |
+
|
674 |
+
def compare_strings(self, alpha: str, beta: str, expected: str) -> str:
|
675 |
+
# If both ALPHA and BETA are empty, return None
|
676 |
+
if not alpha and not beta:
|
677 |
+
return None
|
678 |
+
|
679 |
+
# If either ALPHA or BETA is empty, the non-empty string should be considered more similar to EXPECTED
|
680 |
+
if not alpha:
|
681 |
+
return 'B'
|
682 |
+
if not beta:
|
683 |
+
return 'A'
|
684 |
+
|
685 |
+
# If both ALPHA and BETA are identical, return None
|
686 |
+
if alpha == beta:
|
687 |
+
return None
|
688 |
+
|
689 |
+
# Create the CountVectorizer instance
|
690 |
+
vectorizer = CountVectorizer().fit_transform([alpha, beta, expected])
|
691 |
+
vectors = vectorizer.toarray()
|
692 |
+
|
693 |
+
# Calculate cosine similarities
|
694 |
+
alpha_sim = cosine_similarity(vectors[0].reshape(1, -1), vectors[2].reshape(1, -1))
|
695 |
+
beta_sim = cosine_similarity(vectors[1].reshape(1, -1), vectors[2].reshape(1, -1))
|
696 |
+
|
697 |
+
# Compare similarities and return the string that is more similar to the expected string
|
698 |
+
if alpha_sim > beta_sim:
|
699 |
+
return 'A'
|
700 |
+
elif beta_sim > alpha_sim:
|
701 |
+
return 'B'
|
702 |
+
else:
|
703 |
+
return None
|
704 |
+
|
705 |
+
def delta_similarities(self, alpha: str, beta: str, expected: str) -> float:
|
706 |
+
# If both ALPHA and BETA are empty, return 0
|
707 |
+
if not alpha and not beta:
|
708 |
+
return 0.0
|
709 |
+
|
710 |
+
# If either ALPHA or BETA is empty, the non-empty string should be considered more similar to EXPECTED
|
711 |
+
if not alpha:
|
712 |
+
return -1.0
|
713 |
+
if not beta:
|
714 |
+
return 1.0
|
715 |
+
|
716 |
+
# If both ALPHA and BETA are identical, return 0
|
717 |
+
if alpha == beta:
|
718 |
+
return 0.0
|
719 |
+
|
720 |
+
# Create the CountVectorizer instance
|
721 |
+
vectorizer = CountVectorizer().fit_transform([alpha, beta, expected])
|
722 |
+
vectors = vectorizer.toarray()
|
723 |
+
|
724 |
+
# Calculate cosine similarities
|
725 |
+
alpha_sim = cosine_similarity(vectors[0].reshape(1, -1), vectors[2].reshape(1, -1))
|
726 |
+
beta_sim = cosine_similarity(vectors[1].reshape(1, -1), vectors[2].reshape(1, -1))
|
727 |
+
|
728 |
+
# Return the difference in similarities
|
729 |
+
return alpha_sim[0][0] - beta_sim[0][0]
|
730 |
+
|
731 |
+
def compare_outputs(self, new_output, current_output, expected_output):
|
732 |
+
# Compare new output and current output against expected output
|
733 |
+
# result = self.compare_strings(new_output, current_output, expected_output)
|
734 |
+
result = self.delta_similarities(new_output, current_output, expected_output)
|
735 |
+
|
736 |
+
return result
|
requirements.txt
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiofiles==23.2.1
|
2 |
+
aiohttp==3.8.5
|
3 |
+
aiosignal==1.3.1
|
4 |
+
altair==5.1.1
|
5 |
+
annotated-types==0.5.0
|
6 |
+
anyio==3.7.1
|
7 |
+
async-timeout==4.0.3
|
8 |
+
attrs==23.1.0
|
9 |
+
certifi==2023.7.22
|
10 |
+
charset-normalizer==3.2.0
|
11 |
+
click==8.1.7
|
12 |
+
contourpy==1.1.1
|
13 |
+
cycler==0.11.0
|
14 |
+
dataclasses-json==0.6.0
|
15 |
+
exceptiongroup==1.1.3
|
16 |
+
fastapi==0.103.1
|
17 |
+
ffmpy==0.3.1
|
18 |
+
filelock==3.12.4
|
19 |
+
fonttools==4.42.1
|
20 |
+
frozenlist==1.4.0
|
21 |
+
fsspec==2023.9.2
|
22 |
+
gradio==3.44.4
|
23 |
+
gradio_client==0.5.1
|
24 |
+
greenlet==2.0.2
|
25 |
+
h11==0.14.0
|
26 |
+
httpcore==0.18.0
|
27 |
+
httpx==0.25.0
|
28 |
+
huggingface-hub==0.17.2
|
29 |
+
idna==3.4
|
30 |
+
importlib-resources==6.1.0
|
31 |
+
Jinja2==3.1.2
|
32 |
+
joblib==1.3.2
|
33 |
+
jsonpatch==1.33
|
34 |
+
jsonpointer==2.4
|
35 |
+
jsonschema==4.19.1
|
36 |
+
jsonschema-specifications==2023.7.1
|
37 |
+
kiwisolver==1.4.5
|
38 |
+
langchain==0.0.300
|
39 |
+
langsmith==0.0.40
|
40 |
+
MarkupSafe==2.1.3
|
41 |
+
marshmallow==3.20.1
|
42 |
+
matplotlib==3.8.0
|
43 |
+
multidict==6.0.4
|
44 |
+
mypy-extensions==1.0.0
|
45 |
+
numexpr==2.8.6
|
46 |
+
numpy==1.26.0
|
47 |
+
openai==0.28.0
|
48 |
+
orjson==3.9.7
|
49 |
+
packaging==23.1
|
50 |
+
pandas==2.1.1
|
51 |
+
Pillow==10.0.1
|
52 |
+
pydantic==2.3.0
|
53 |
+
pydantic_core==2.6.3
|
54 |
+
pydub==0.25.1
|
55 |
+
pyparsing==3.1.1
|
56 |
+
python-dateutil==2.8.2
|
57 |
+
python-multipart==0.0.6
|
58 |
+
pytz==2023.3.post1
|
59 |
+
PyYAML==6.0.1
|
60 |
+
referencing==0.30.2
|
61 |
+
requests==2.31.0
|
62 |
+
rpds-py==0.10.3
|
63 |
+
scikit-learn==1.3.1
|
64 |
+
scipy==1.11.3
|
65 |
+
semantic-version==2.10.0
|
66 |
+
six==1.16.0
|
67 |
+
sniffio==1.3.0
|
68 |
+
SQLAlchemy==2.0.21
|
69 |
+
starlette==0.27.0
|
70 |
+
tenacity==8.2.3
|
71 |
+
threadpoolctl==3.2.0
|
72 |
+
toolz==0.12.0
|
73 |
+
tqdm==4.66.1
|
74 |
+
typing-inspect==0.9.0
|
75 |
+
typing_extensions==4.8.0
|
76 |
+
tzdata==2023.3
|
77 |
+
urllib3==2.0.5
|
78 |
+
uvicorn==0.23.2
|
79 |
+
websockets==11.0.3
|
80 |
+
yarl==1.9.2
|