yaleh commited on
Commit
da31e15
·
1 Parent(s): e9fb829

Added input dimension analysis.

Browse files
Files changed (1) hide show
  1. demo/sample_generator.ipynb +128 -39
demo/sample_generator.ipynb CHANGED
@@ -2,67 +2,86 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": null,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
9
- "import os\n",
10
- "\n",
11
- "# Load configuration from YAML file\n",
12
- "config = {\n",
13
- " \"model_name\": \"llama3-70b-8192\",\n",
14
- " # \"model_name\": \"llama3-8b-8192\",\n",
15
- " # \"model_name\": \"llama-3.1-70b-versatile\",\n",
16
- " # \"model_name\": \"llama-3.1-8b-instant\",\n",
17
- " # \"model_name\": \"gemma2-9b-it\",\n",
18
- "}\n"
19
  ]
20
  },
21
  {
22
  "cell_type": "code",
23
- "execution_count": null,
24
  "metadata": {},
25
  "outputs": [],
26
  "source": [
27
- "import json\n",
28
- "import yaml\n",
29
- "from langchain.prompts import ChatPromptTemplate\n",
30
- "from langchain.chat_models import ChatOpenAI\n",
31
- "from langchain.schema.output_parser import StrOutputParser\n",
32
- "from langchain_core.runnables import RunnablePassthrough, RunnableParallel, RunnableLambda\n",
33
- "from langchain_core.output_parsers import JsonOutputParser\n",
34
- "from langchain.output_parsers import YamlOutputParser\n",
35
- "\n",
36
  "# Define prompt strings as constants\n",
37
  "DESCRIPTION_PROMPT = [\n",
38
- " (\"system\", \"\"\"Given the following JSON example for a task type:\n",
 
39
  "{raw_example}\n",
40
  "\n",
41
- "Provide a concise description of the task type, including the format and style of the output.\n",
 
 
42
  "\n",
43
  "Format your response as follows:\n",
44
  "Task Description: [Your description here]\n",
45
  "\"\"\")\n",
46
  "]\n",
47
  "\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  "BRIEFS_PROMPT = [\n",
49
- " (\"system\", \"\"\"Given the task type description, generate descriptions for {generating_batch_size} new \n",
50
- "examples with detailed attributes based on this task type. But don't provide any detailed task output.\n",
 
 
 
 
51
  "\n",
52
- "Format your response as a valid YAML object with a single key\n",
53
- "'new_example_briefs' containing a YAML array of {generating_batch_size} objects, each\n",
54
- "with a 'example_brief' field.\n",
55
  "\"\"\"),\n",
56
  " (\"user\", \"\"\"Task Description:\n",
57
  "\n",
58
  "{description}\n",
59
  "\n",
 
 
 
 
60
  "\"\"\")\n",
61
  "]\n",
62
  "\n",
63
  "EXAMPLES_FROM_BRIEFS_PROMPT = [\n",
64
  " (\"system\", \"\"\"Given the task type description, brief descriptions for new examples, \n",
65
- "and JSON example, generate 3 more input/output examples for this task type,\n",
66
  "strictly based on the brief descriptions. Ensure that the new examples are\n",
67
  "consistent with the brief descriptions and do not introduce any new information\n",
68
  "not present in the briefs.\n",
@@ -78,7 +97,7 @@
78
  "\n",
79
  "{new_example_briefs}\n",
80
  "\n",
81
- "Example:\n",
82
  "\n",
83
  "{raw_example}\n",
84
  "\n",
@@ -86,7 +105,7 @@
86
  "]\n",
87
  "\n",
88
  "EXAMPLES_PROMPT = [\n",
89
- " (\"system\", \"\"\"Given the task type description, and input/output example, generate {generating_batch_size}\n",
90
  "new input/output examples for this task type.\n",
91
  "\n",
92
  "Format your response as a valid JSON object with a single key 'examples' \n",
@@ -96,16 +115,34 @@
96
  "\n",
97
  "{description}\n",
98
  "\n",
99
- "Example:\n",
100
  "\n",
101
  "{raw_example}\n",
102
  "\n",
103
  "\"\"\")\n",
104
- "]\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  "\n",
106
  "class TaskDescriptionGenerator:\n",
107
  " def __init__(self, model): \n",
108
  " self.description_prompt = ChatPromptTemplate.from_messages(DESCRIPTION_PROMPT)\n",
 
109
  " self.briefs_prompt = ChatPromptTemplate.from_messages(BRIEFS_PROMPT)\n",
110
  " self.examples_from_briefs_prompt = ChatPromptTemplate.from_messages(EXAMPLES_FROM_BRIEFS_PROMPT)\n",
111
  " self.examples_prompt = ChatPromptTemplate.from_messages(EXAMPLES_PROMPT)\n",
@@ -116,6 +153,7 @@
116
  " json_parse = JsonOutputParser()\n",
117
  "\n",
118
  " self.description_chain = self.description_prompt | model | output_parser\n",
 
119
  " self.briefs_chain = self.briefs_prompt | model | output_parser\n",
120
  " self.examples_from_briefs_chain = self.examples_from_briefs_prompt | json_model | json_parse\n",
121
  " self.examples_chain = self.examples_prompt | json_model | json_parse\n",
@@ -125,7 +163,9 @@
125
  " | RunnablePassthrough.assign(description = self.description_chain)\n",
126
  " | {\n",
127
  " \"description\": lambda x: x[\"description\"],\n",
128
- " \"examples_from_briefs\": RunnablePassthrough.assign(new_example_briefs = lambda x: self.briefs_chain.invoke(x)) | self.examples_from_briefs_chain,\n",
 
 
129
  " \"examples\": self.examples_chain\n",
130
  " }\n",
131
  " | RunnablePassthrough.assign(\n",
@@ -138,9 +178,20 @@
138
  "\n",
139
  " def process(self, input_str, generating_batch_size=3):\n",
140
  " try:\n",
141
- " # Parse input string to a dictionary\n",
142
- " example_dict = json.loads(input_str) if input_str.startswith('{') else yaml.safe_load(input_str)\n",
143
- " if not isinstance(example_dict, dict) or 'input' not in example_dict or 'output' not in example_dict:\n",
 
 
 
 
 
 
 
 
 
 
 
144
  " raise ValueError(\"Invalid input format. Expected an object with 'input' and 'output' fields.\")\n",
145
  "\n",
146
  " # Move the original content to a key named 'example'\n",
@@ -156,9 +207,47 @@
156
  },
157
  {
158
  "cell_type": "code",
159
- "execution_count": null,
160
  "metadata": {},
161
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  "source": [
163
  "import gradio as gr\n",
164
  "\n",
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
9
+ "# import os\n",
10
+ "\n",
11
+ "# # Load configuration from YAML file\n",
12
+ "# config = {\n",
13
+ "# \"model_name\": \"llama3-70b-8192\",\n",
14
+ "# # \"model_name\": \"llama3-8b-8192\",\n",
15
+ "# # \"model_name\": \"llama-3.1-70b-versatile\",\n",
16
+ "# # \"model_name\": \"llama-3.1-8b-instant\",\n",
17
+ "# # \"model_name\": \"gemma2-9b-it\",\n",
18
+ "# }\n"
19
  ]
20
  },
21
  {
22
  "cell_type": "code",
23
+ "execution_count": 2,
24
  "metadata": {},
25
  "outputs": [],
26
  "source": [
 
 
 
 
 
 
 
 
 
27
  "# Define prompt strings as constants\n",
28
  "DESCRIPTION_PROMPT = [\n",
29
+ " (\"system\", \"\"\"Given the following JSON example(s) for a task type:\n",
30
+ " \n",
31
  "{raw_example}\n",
32
  "\n",
33
+ "Provide a concise description of the task type, including the format and\n",
34
+ "style of the output. If there are multiple examples, provide a description\n",
35
+ "for the task type as a whole, ignore the unique parts of the examples.\n",
36
  "\n",
37
  "Format your response as follows:\n",
38
  "Task Description: [Your description here]\n",
39
  "\"\"\")\n",
40
  "]\n",
41
  "\n",
42
+ "INPUT_ANALYSIS_PROMPT = [\n",
43
+ " (\"system\", \"\"\"Describe input dimensions, attributes, ranges, and typical values\n",
44
+ "for a specific task type. Identify main inputs, their impacts, and interactions.\n",
45
+ "Provide names, descriptions, ranges, and examples for each. Explain how they\n",
46
+ "affect task execution or results. Include an example of generating comprehensive\n",
47
+ "input samples using these dimensions and attributes.\n",
48
+ "\n",
49
+ "Format your response as follows:\n",
50
+ "Input Analysis: [Your analysis here]\n",
51
+ "\"\"\"),\n",
52
+ " (\"user\", \"\"\"Task Description:\n",
53
+ "\n",
54
+ "{description}\n",
55
+ "\n",
56
+ "\"\"\")\n",
57
+ "]\n",
58
+ "\n",
59
  "BRIEFS_PROMPT = [\n",
60
+ " (\"system\", \"\"\"Given the task type description, and input analysis, generate\n",
61
+ "descriptions for {generating_batch_size} new examples with detailed attributes\n",
62
+ "based on this task type. But don't provide any detailed task output.\n",
63
+ "\n",
64
+ "Use the input analysis to create diverse and comprehensive example briefs that\n",
65
+ "cover various input dimensions and attribute ranges.\n",
66
  "\n",
67
+ "Format your response as a valid YAML object with a single key 'new_example_briefs'\n",
68
+ "containing a YAML array of {generating_batch_size} objects, each with a\n",
69
+ "'example_brief' field.\n",
70
  "\"\"\"),\n",
71
  " (\"user\", \"\"\"Task Description:\n",
72
  "\n",
73
  "{description}\n",
74
  "\n",
75
+ "Input Analysis:\n",
76
+ "\n",
77
+ "{input_analysis}\n",
78
+ "\n",
79
  "\"\"\")\n",
80
  "]\n",
81
  "\n",
82
  "EXAMPLES_FROM_BRIEFS_PROMPT = [\n",
83
  " (\"system\", \"\"\"Given the task type description, brief descriptions for new examples, \n",
84
+ "and JSON example(s), generate {generating_batch_size} more input/output examples for this task type,\n",
85
  "strictly based on the brief descriptions. Ensure that the new examples are\n",
86
  "consistent with the brief descriptions and do not introduce any new information\n",
87
  "not present in the briefs.\n",
 
97
  "\n",
98
  "{new_example_briefs}\n",
99
  "\n",
100
+ "Example(s):\n",
101
  "\n",
102
  "{raw_example}\n",
103
  "\n",
 
105
  "]\n",
106
  "\n",
107
  "EXAMPLES_PROMPT = [\n",
108
+ " (\"system\", \"\"\"Given the task type description, and input/output example(s), generate {generating_batch_size}\n",
109
  "new input/output examples for this task type.\n",
110
  "\n",
111
  "Format your response as a valid JSON object with a single key 'examples' \n",
 
115
  "\n",
116
  "{description}\n",
117
  "\n",
118
+ "Example(s):\n",
119
  "\n",
120
  "{raw_example}\n",
121
  "\n",
122
  "\"\"\")\n",
123
+ "]\n"
124
+ ]
125
+ },
126
+ {
127
+ "cell_type": "code",
128
+ "execution_count": 3,
129
+ "metadata": {},
130
+ "outputs": [],
131
+ "source": [
132
+ "import json\n",
133
+ "import yaml\n",
134
+ "from langchain.prompts import ChatPromptTemplate\n",
135
+ "from langchain.chat_models import ChatOpenAI\n",
136
+ "from langchain.schema.output_parser import StrOutputParser\n",
137
+ "from langchain_core.runnables import RunnablePassthrough, RunnableParallel, RunnableLambda\n",
138
+ "from langchain_core.output_parsers import JsonOutputParser\n",
139
+ "from langchain.output_parsers import YamlOutputParser\n",
140
+ "\n",
141
  "\n",
142
  "class TaskDescriptionGenerator:\n",
143
  " def __init__(self, model): \n",
144
  " self.description_prompt = ChatPromptTemplate.from_messages(DESCRIPTION_PROMPT)\n",
145
+ " self.input_analysis_prompt = ChatPromptTemplate.from_messages(INPUT_ANALYSIS_PROMPT)\n",
146
  " self.briefs_prompt = ChatPromptTemplate.from_messages(BRIEFS_PROMPT)\n",
147
  " self.examples_from_briefs_prompt = ChatPromptTemplate.from_messages(EXAMPLES_FROM_BRIEFS_PROMPT)\n",
148
  " self.examples_prompt = ChatPromptTemplate.from_messages(EXAMPLES_PROMPT)\n",
 
153
  " json_parse = JsonOutputParser()\n",
154
  "\n",
155
  " self.description_chain = self.description_prompt | model | output_parser\n",
156
+ " self.input_analysis_chain = self.input_analysis_prompt | model | output_parser\n",
157
  " self.briefs_chain = self.briefs_prompt | model | output_parser\n",
158
  " self.examples_from_briefs_chain = self.examples_from_briefs_prompt | json_model | json_parse\n",
159
  " self.examples_chain = self.examples_prompt | json_model | json_parse\n",
 
163
  " | RunnablePassthrough.assign(description = self.description_chain)\n",
164
  " | {\n",
165
  " \"description\": lambda x: x[\"description\"],\n",
166
+ " \"examples_from_briefs\": RunnablePassthrough.assign(input_analysis = lambda x: self.input_analysis_chain.invoke(x))\n",
167
+ " | RunnablePassthrough.assign(new_example_briefs = lambda x: self.briefs_chain.invoke(x)) \n",
168
+ " | self.examples_from_briefs_chain,\n",
169
  " \"examples\": self.examples_chain\n",
170
  " }\n",
171
  " | RunnablePassthrough.assign(\n",
 
178
  "\n",
179
  " def process(self, input_str, generating_batch_size=3):\n",
180
  " try:\n",
181
+ " try:\n",
182
+ " example_dict = json.loads(input_str)\n",
183
+ " except ValueError:\n",
184
+ " try:\n",
185
+ " example_dict = yaml.safe_load(input_str)\n",
186
+ " except yaml.YAMLError as e:\n",
187
+ " raise ValueError(\"Invalid input format. Expected a JSON or YAML object.\") from e\n",
188
+ "\n",
189
+ " # If example_dict is a list, filter out invalid items\n",
190
+ " if isinstance(example_dict, list):\n",
191
+ " example_dict = [item for item in example_dict if isinstance(item, dict) and 'input' in item and 'output' in item]\n",
192
+ "\n",
193
+ " # If example_dict is not a list, check if it's a valid dict\n",
194
+ " elif not isinstance(example_dict, dict) or 'input' not in example_dict or 'output' not in example_dict:\n",
195
  " raise ValueError(\"Invalid input format. Expected an object with 'input' and 'output' fields.\")\n",
196
  "\n",
197
  " # Move the original content to a key named 'example'\n",
 
207
  },
208
  {
209
  "cell_type": "code",
210
+ "execution_count": 4,
211
  "metadata": {},
212
+ "outputs": [
213
+ {
214
+ "name": "stderr",
215
+ "output_type": "stream",
216
+ "text": [
217
+ "/home/yale/work/meta-prompt/.venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
218
+ " from .autonotebook import tqdm as notebook_tqdm\n"
219
+ ]
220
+ },
221
+ {
222
+ "name": "stdout",
223
+ "output_type": "stream",
224
+ "text": [
225
+ "Running on local URL: http://127.0.0.1:7861\n",
226
+ "\n",
227
+ "To create a public link, set `share=True` in `launch()`.\n"
228
+ ]
229
+ },
230
+ {
231
+ "data": {
232
+ "text/html": [
233
+ "<div><iframe src=\"http://127.0.0.1:7861/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
234
+ ],
235
+ "text/plain": [
236
+ "<IPython.core.display.HTML object>"
237
+ ]
238
+ },
239
+ "metadata": {},
240
+ "output_type": "display_data"
241
+ },
242
+ {
243
+ "name": "stderr",
244
+ "output_type": "stream",
245
+ "text": [
246
+ "/home/yale/work/meta-prompt/.venv/lib/python3.10/site-packages/langchain_core/_api/deprecation.py:141: LangChainDeprecationWarning: The class `ChatOpenAI` was deprecated in LangChain 0.0.10 and will be removed in 0.3.0. An updated version of the class exists in the langchain-openai package and should be used instead. To use it run `pip install -U langchain-openai` and import as `from langchain_openai import ChatOpenAI`.\n",
247
+ " warn_deprecated(\n"
248
+ ]
249
+ }
250
+ ],
251
  "source": [
252
  "import gradio as gr\n",
253
  "\n",