Spaces:
Running
Running
Added input dimension analysis.
Browse files- demo/sample_generator.ipynb +128 -39
demo/sample_generator.ipynb
CHANGED
@@ -2,67 +2,86 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"metadata": {},
|
7 |
"outputs": [],
|
8 |
"source": [
|
9 |
-
"import os\n",
|
10 |
-
"\n",
|
11 |
-
"# Load configuration from YAML file\n",
|
12 |
-
"config = {\n",
|
13 |
-
"
|
14 |
-
"
|
15 |
-
"
|
16 |
-
"
|
17 |
-
"
|
18 |
-
"}\n"
|
19 |
]
|
20 |
},
|
21 |
{
|
22 |
"cell_type": "code",
|
23 |
-
"execution_count":
|
24 |
"metadata": {},
|
25 |
"outputs": [],
|
26 |
"source": [
|
27 |
-
"import json\n",
|
28 |
-
"import yaml\n",
|
29 |
-
"from langchain.prompts import ChatPromptTemplate\n",
|
30 |
-
"from langchain.chat_models import ChatOpenAI\n",
|
31 |
-
"from langchain.schema.output_parser import StrOutputParser\n",
|
32 |
-
"from langchain_core.runnables import RunnablePassthrough, RunnableParallel, RunnableLambda\n",
|
33 |
-
"from langchain_core.output_parsers import JsonOutputParser\n",
|
34 |
-
"from langchain.output_parsers import YamlOutputParser\n",
|
35 |
-
"\n",
|
36 |
"# Define prompt strings as constants\n",
|
37 |
"DESCRIPTION_PROMPT = [\n",
|
38 |
-
" (\"system\", \"\"\"Given the following JSON example for a task type:\n",
|
|
|
39 |
"{raw_example}\n",
|
40 |
"\n",
|
41 |
-
"Provide a concise description of the task type, including the format and
|
|
|
|
|
42 |
"\n",
|
43 |
"Format your response as follows:\n",
|
44 |
"Task Description: [Your description here]\n",
|
45 |
"\"\"\")\n",
|
46 |
"]\n",
|
47 |
"\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
"BRIEFS_PROMPT = [\n",
|
49 |
-
" (\"system\", \"\"\"Given the task type description,
|
50 |
-
"
|
|
|
|
|
|
|
|
|
51 |
"\n",
|
52 |
-
"Format your response as a valid YAML object with a single key\n",
|
53 |
-
"
|
54 |
-
"
|
55 |
"\"\"\"),\n",
|
56 |
" (\"user\", \"\"\"Task Description:\n",
|
57 |
"\n",
|
58 |
"{description}\n",
|
59 |
"\n",
|
|
|
|
|
|
|
|
|
60 |
"\"\"\")\n",
|
61 |
"]\n",
|
62 |
"\n",
|
63 |
"EXAMPLES_FROM_BRIEFS_PROMPT = [\n",
|
64 |
" (\"system\", \"\"\"Given the task type description, brief descriptions for new examples, \n",
|
65 |
-
"and JSON example, generate
|
66 |
"strictly based on the brief descriptions. Ensure that the new examples are\n",
|
67 |
"consistent with the brief descriptions and do not introduce any new information\n",
|
68 |
"not present in the briefs.\n",
|
@@ -78,7 +97,7 @@
|
|
78 |
"\n",
|
79 |
"{new_example_briefs}\n",
|
80 |
"\n",
|
81 |
-
"Example:\n",
|
82 |
"\n",
|
83 |
"{raw_example}\n",
|
84 |
"\n",
|
@@ -86,7 +105,7 @@
|
|
86 |
"]\n",
|
87 |
"\n",
|
88 |
"EXAMPLES_PROMPT = [\n",
|
89 |
-
" (\"system\", \"\"\"Given the task type description, and input/output example, generate {generating_batch_size}\n",
|
90 |
"new input/output examples for this task type.\n",
|
91 |
"\n",
|
92 |
"Format your response as a valid JSON object with a single key 'examples' \n",
|
@@ -96,16 +115,34 @@
|
|
96 |
"\n",
|
97 |
"{description}\n",
|
98 |
"\n",
|
99 |
-
"Example:\n",
|
100 |
"\n",
|
101 |
"{raw_example}\n",
|
102 |
"\n",
|
103 |
"\"\"\")\n",
|
104 |
-
"]\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
"\n",
|
106 |
"class TaskDescriptionGenerator:\n",
|
107 |
" def __init__(self, model): \n",
|
108 |
" self.description_prompt = ChatPromptTemplate.from_messages(DESCRIPTION_PROMPT)\n",
|
|
|
109 |
" self.briefs_prompt = ChatPromptTemplate.from_messages(BRIEFS_PROMPT)\n",
|
110 |
" self.examples_from_briefs_prompt = ChatPromptTemplate.from_messages(EXAMPLES_FROM_BRIEFS_PROMPT)\n",
|
111 |
" self.examples_prompt = ChatPromptTemplate.from_messages(EXAMPLES_PROMPT)\n",
|
@@ -116,6 +153,7 @@
|
|
116 |
" json_parse = JsonOutputParser()\n",
|
117 |
"\n",
|
118 |
" self.description_chain = self.description_prompt | model | output_parser\n",
|
|
|
119 |
" self.briefs_chain = self.briefs_prompt | model | output_parser\n",
|
120 |
" self.examples_from_briefs_chain = self.examples_from_briefs_prompt | json_model | json_parse\n",
|
121 |
" self.examples_chain = self.examples_prompt | json_model | json_parse\n",
|
@@ -125,7 +163,9 @@
|
|
125 |
" | RunnablePassthrough.assign(description = self.description_chain)\n",
|
126 |
" | {\n",
|
127 |
" \"description\": lambda x: x[\"description\"],\n",
|
128 |
-
" \"examples_from_briefs\": RunnablePassthrough.assign(
|
|
|
|
|
129 |
" \"examples\": self.examples_chain\n",
|
130 |
" }\n",
|
131 |
" | RunnablePassthrough.assign(\n",
|
@@ -138,9 +178,20 @@
|
|
138 |
"\n",
|
139 |
" def process(self, input_str, generating_batch_size=3):\n",
|
140 |
" try:\n",
|
141 |
-
"
|
142 |
-
"
|
143 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
144 |
" raise ValueError(\"Invalid input format. Expected an object with 'input' and 'output' fields.\")\n",
|
145 |
"\n",
|
146 |
" # Move the original content to a key named 'example'\n",
|
@@ -156,9 +207,47 @@
|
|
156 |
},
|
157 |
{
|
158 |
"cell_type": "code",
|
159 |
-
"execution_count":
|
160 |
"metadata": {},
|
161 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
"source": [
|
163 |
"import gradio as gr\n",
|
164 |
"\n",
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
"metadata": {},
|
7 |
"outputs": [],
|
8 |
"source": [
|
9 |
+
"# import os\n",
|
10 |
+
"\n",
|
11 |
+
"# # Load configuration from YAML file\n",
|
12 |
+
"# config = {\n",
|
13 |
+
"# \"model_name\": \"llama3-70b-8192\",\n",
|
14 |
+
"# # \"model_name\": \"llama3-8b-8192\",\n",
|
15 |
+
"# # \"model_name\": \"llama-3.1-70b-versatile\",\n",
|
16 |
+
"# # \"model_name\": \"llama-3.1-8b-instant\",\n",
|
17 |
+
"# # \"model_name\": \"gemma2-9b-it\",\n",
|
18 |
+
"# }\n"
|
19 |
]
|
20 |
},
|
21 |
{
|
22 |
"cell_type": "code",
|
23 |
+
"execution_count": 2,
|
24 |
"metadata": {},
|
25 |
"outputs": [],
|
26 |
"source": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
"# Define prompt strings as constants\n",
|
28 |
"DESCRIPTION_PROMPT = [\n",
|
29 |
+
" (\"system\", \"\"\"Given the following JSON example(s) for a task type:\n",
|
30 |
+
" \n",
|
31 |
"{raw_example}\n",
|
32 |
"\n",
|
33 |
+
"Provide a concise description of the task type, including the format and\n",
|
34 |
+
"style of the output. If there are multiple examples, provide a description\n",
|
35 |
+
"for the task type as a whole, ignore the unique parts of the examples.\n",
|
36 |
"\n",
|
37 |
"Format your response as follows:\n",
|
38 |
"Task Description: [Your description here]\n",
|
39 |
"\"\"\")\n",
|
40 |
"]\n",
|
41 |
"\n",
|
42 |
+
"INPUT_ANALYSIS_PROMPT = [\n",
|
43 |
+
" (\"system\", \"\"\"Describe input dimensions, attributes, ranges, and typical values\n",
|
44 |
+
"for a specific task type. Identify main inputs, their impacts, and interactions.\n",
|
45 |
+
"Provide names, descriptions, ranges, and examples for each. Explain how they\n",
|
46 |
+
"affect task execution or results. Include an example of generating comprehensive\n",
|
47 |
+
"input samples using these dimensions and attributes.\n",
|
48 |
+
"\n",
|
49 |
+
"Format your response as follows:\n",
|
50 |
+
"Input Analysis: [Your analysis here]\n",
|
51 |
+
"\"\"\"),\n",
|
52 |
+
" (\"user\", \"\"\"Task Description:\n",
|
53 |
+
"\n",
|
54 |
+
"{description}\n",
|
55 |
+
"\n",
|
56 |
+
"\"\"\")\n",
|
57 |
+
"]\n",
|
58 |
+
"\n",
|
59 |
"BRIEFS_PROMPT = [\n",
|
60 |
+
" (\"system\", \"\"\"Given the task type description, and input analysis, generate\n",
|
61 |
+
"descriptions for {generating_batch_size} new examples with detailed attributes\n",
|
62 |
+
"based on this task type. But don't provide any detailed task output.\n",
|
63 |
+
"\n",
|
64 |
+
"Use the input analysis to create diverse and comprehensive example briefs that\n",
|
65 |
+
"cover various input dimensions and attribute ranges.\n",
|
66 |
"\n",
|
67 |
+
"Format your response as a valid YAML object with a single key 'new_example_briefs'\n",
|
68 |
+
"containing a YAML array of {generating_batch_size} objects, each with a\n",
|
69 |
+
"'example_brief' field.\n",
|
70 |
"\"\"\"),\n",
|
71 |
" (\"user\", \"\"\"Task Description:\n",
|
72 |
"\n",
|
73 |
"{description}\n",
|
74 |
"\n",
|
75 |
+
"Input Analysis:\n",
|
76 |
+
"\n",
|
77 |
+
"{input_analysis}\n",
|
78 |
+
"\n",
|
79 |
"\"\"\")\n",
|
80 |
"]\n",
|
81 |
"\n",
|
82 |
"EXAMPLES_FROM_BRIEFS_PROMPT = [\n",
|
83 |
" (\"system\", \"\"\"Given the task type description, brief descriptions for new examples, \n",
|
84 |
+
"and JSON example(s), generate {generating_batch_size} more input/output examples for this task type,\n",
|
85 |
"strictly based on the brief descriptions. Ensure that the new examples are\n",
|
86 |
"consistent with the brief descriptions and do not introduce any new information\n",
|
87 |
"not present in the briefs.\n",
|
|
|
97 |
"\n",
|
98 |
"{new_example_briefs}\n",
|
99 |
"\n",
|
100 |
+
"Example(s):\n",
|
101 |
"\n",
|
102 |
"{raw_example}\n",
|
103 |
"\n",
|
|
|
105 |
"]\n",
|
106 |
"\n",
|
107 |
"EXAMPLES_PROMPT = [\n",
|
108 |
+
" (\"system\", \"\"\"Given the task type description, and input/output example(s), generate {generating_batch_size}\n",
|
109 |
"new input/output examples for this task type.\n",
|
110 |
"\n",
|
111 |
"Format your response as a valid JSON object with a single key 'examples' \n",
|
|
|
115 |
"\n",
|
116 |
"{description}\n",
|
117 |
"\n",
|
118 |
+
"Example(s):\n",
|
119 |
"\n",
|
120 |
"{raw_example}\n",
|
121 |
"\n",
|
122 |
"\"\"\")\n",
|
123 |
+
"]\n"
|
124 |
+
]
|
125 |
+
},
|
126 |
+
{
|
127 |
+
"cell_type": "code",
|
128 |
+
"execution_count": 3,
|
129 |
+
"metadata": {},
|
130 |
+
"outputs": [],
|
131 |
+
"source": [
|
132 |
+
"import json\n",
|
133 |
+
"import yaml\n",
|
134 |
+
"from langchain.prompts import ChatPromptTemplate\n",
|
135 |
+
"from langchain.chat_models import ChatOpenAI\n",
|
136 |
+
"from langchain.schema.output_parser import StrOutputParser\n",
|
137 |
+
"from langchain_core.runnables import RunnablePassthrough, RunnableParallel, RunnableLambda\n",
|
138 |
+
"from langchain_core.output_parsers import JsonOutputParser\n",
|
139 |
+
"from langchain.output_parsers import YamlOutputParser\n",
|
140 |
+
"\n",
|
141 |
"\n",
|
142 |
"class TaskDescriptionGenerator:\n",
|
143 |
" def __init__(self, model): \n",
|
144 |
" self.description_prompt = ChatPromptTemplate.from_messages(DESCRIPTION_PROMPT)\n",
|
145 |
+
" self.input_analysis_prompt = ChatPromptTemplate.from_messages(INPUT_ANALYSIS_PROMPT)\n",
|
146 |
" self.briefs_prompt = ChatPromptTemplate.from_messages(BRIEFS_PROMPT)\n",
|
147 |
" self.examples_from_briefs_prompt = ChatPromptTemplate.from_messages(EXAMPLES_FROM_BRIEFS_PROMPT)\n",
|
148 |
" self.examples_prompt = ChatPromptTemplate.from_messages(EXAMPLES_PROMPT)\n",
|
|
|
153 |
" json_parse = JsonOutputParser()\n",
|
154 |
"\n",
|
155 |
" self.description_chain = self.description_prompt | model | output_parser\n",
|
156 |
+
" self.input_analysis_chain = self.input_analysis_prompt | model | output_parser\n",
|
157 |
" self.briefs_chain = self.briefs_prompt | model | output_parser\n",
|
158 |
" self.examples_from_briefs_chain = self.examples_from_briefs_prompt | json_model | json_parse\n",
|
159 |
" self.examples_chain = self.examples_prompt | json_model | json_parse\n",
|
|
|
163 |
" | RunnablePassthrough.assign(description = self.description_chain)\n",
|
164 |
" | {\n",
|
165 |
" \"description\": lambda x: x[\"description\"],\n",
|
166 |
+
" \"examples_from_briefs\": RunnablePassthrough.assign(input_analysis = lambda x: self.input_analysis_chain.invoke(x))\n",
|
167 |
+
" | RunnablePassthrough.assign(new_example_briefs = lambda x: self.briefs_chain.invoke(x)) \n",
|
168 |
+
" | self.examples_from_briefs_chain,\n",
|
169 |
" \"examples\": self.examples_chain\n",
|
170 |
" }\n",
|
171 |
" | RunnablePassthrough.assign(\n",
|
|
|
178 |
"\n",
|
179 |
" def process(self, input_str, generating_batch_size=3):\n",
|
180 |
" try:\n",
|
181 |
+
" try:\n",
|
182 |
+
" example_dict = json.loads(input_str)\n",
|
183 |
+
" except ValueError:\n",
|
184 |
+
" try:\n",
|
185 |
+
" example_dict = yaml.safe_load(input_str)\n",
|
186 |
+
" except yaml.YAMLError as e:\n",
|
187 |
+
" raise ValueError(\"Invalid input format. Expected a JSON or YAML object.\") from e\n",
|
188 |
+
"\n",
|
189 |
+
" # If example_dict is a list, filter out invalid items\n",
|
190 |
+
" if isinstance(example_dict, list):\n",
|
191 |
+
" example_dict = [item for item in example_dict if isinstance(item, dict) and 'input' in item and 'output' in item]\n",
|
192 |
+
"\n",
|
193 |
+
" # If example_dict is not a list, check if it's a valid dict\n",
|
194 |
+
" elif not isinstance(example_dict, dict) or 'input' not in example_dict or 'output' not in example_dict:\n",
|
195 |
" raise ValueError(\"Invalid input format. Expected an object with 'input' and 'output' fields.\")\n",
|
196 |
"\n",
|
197 |
" # Move the original content to a key named 'example'\n",
|
|
|
207 |
},
|
208 |
{
|
209 |
"cell_type": "code",
|
210 |
+
"execution_count": 4,
|
211 |
"metadata": {},
|
212 |
+
"outputs": [
|
213 |
+
{
|
214 |
+
"name": "stderr",
|
215 |
+
"output_type": "stream",
|
216 |
+
"text": [
|
217 |
+
"/home/yale/work/meta-prompt/.venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
218 |
+
" from .autonotebook import tqdm as notebook_tqdm\n"
|
219 |
+
]
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"name": "stdout",
|
223 |
+
"output_type": "stream",
|
224 |
+
"text": [
|
225 |
+
"Running on local URL: http://127.0.0.1:7861\n",
|
226 |
+
"\n",
|
227 |
+
"To create a public link, set `share=True` in `launch()`.\n"
|
228 |
+
]
|
229 |
+
},
|
230 |
+
{
|
231 |
+
"data": {
|
232 |
+
"text/html": [
|
233 |
+
"<div><iframe src=\"http://127.0.0.1:7861/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
234 |
+
],
|
235 |
+
"text/plain": [
|
236 |
+
"<IPython.core.display.HTML object>"
|
237 |
+
]
|
238 |
+
},
|
239 |
+
"metadata": {},
|
240 |
+
"output_type": "display_data"
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"name": "stderr",
|
244 |
+
"output_type": "stream",
|
245 |
+
"text": [
|
246 |
+
"/home/yale/work/meta-prompt/.venv/lib/python3.10/site-packages/langchain_core/_api/deprecation.py:141: LangChainDeprecationWarning: The class `ChatOpenAI` was deprecated in LangChain 0.0.10 and will be removed in 0.3.0. An updated version of the class exists in the langchain-openai package and should be used instead. To use it run `pip install -U langchain-openai` and import as `from langchain_openai import ChatOpenAI`.\n",
|
247 |
+
" warn_deprecated(\n"
|
248 |
+
]
|
249 |
+
}
|
250 |
+
],
|
251 |
"source": [
|
252 |
"import gradio as gr\n",
|
253 |
"\n",
|