yaleh commited on
Commit
df523ee
·
1 Parent(s): 89ef0d1

2 branches of generating examples.

Browse files
Files changed (1) hide show
  1. demo/sample_generator.ipynb +72 -13
demo/sample_generator.ipynb CHANGED
@@ -73,29 +73,61 @@
73
  " \"\"\"Given the following JSON example for a task type:\n",
74
  " {raw_example}\n",
75
  "\n",
76
- " Provide a concise description of the task type.\n",
77
  "\n",
78
  " Format your response as follows:\n",
79
  " Task Description: [Your description here]\n",
80
  " \"\"\"\n",
81
  " )\n",
82
  "\n",
83
- " self.examples_prompt = ChatPromptTemplate.from_template(\n",
84
- " \"\"\"Given the following task type description and JSON example:\n",
85
  " Task Description: {description}\n",
86
- " \n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  " Example:\n",
88
  " {raw_example}\n",
89
  "\n",
90
- " Generate 3 more input/output examples for this task type in the same JSON format.\n",
 
91
  "\n",
92
- " Format your response as a valid JSON object with a single key 'examples' containing a JSON array of 3 objects, each with 'input' and 'output' fields.\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  " \"\"\"\n",
94
  " )\n",
95
  "\n",
96
  " self.description_chain = self.description_prompt | self.model | self.output_parser\n",
 
97
  " # bind json_object to the model\n",
98
  " json_model = self.model.bind(response_format={\"type\": \"json_object\"})\n",
 
99
  " self.examples_chain = self.examples_prompt | json_model | self.output_parser\n",
100
  "\n",
101
  " def generate_description(self, raw_example_str):\n",
@@ -104,9 +136,17 @@
104
  " })\n",
105
  " return result.split(\"Task Description: \")[1].strip()\n",
106
  "\n",
107
- " def generate_examples(self, description, raw_example_str):\n",
108
- " result = self.examples_chain.invoke({\n",
 
 
 
 
 
 
 
109
  " \"description\": description,\n",
 
110
  " \"raw_example\": raw_example_str\n",
111
  " })\n",
112
  "\n",
@@ -119,6 +159,22 @@
119
  " return json.loads(result)\n",
120
  " except json.JSONDecodeError as e:\n",
121
  " raise ValueError(f\"The generated examples are not in valid JSON format. Error: {str(e)} Result: {result}\")\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  "\n",
123
  " def process(self, input_str):\n",
124
  " try:\n",
@@ -135,15 +191,18 @@
135
  " if not isinstance(data, dict) or 'input' not in data or 'output' not in data:\n",
136
  " raise ValueError(\"Invalid input format. Expected an object with 'input' and 'output' fields.\")\n",
137
  " \n",
138
- " description = self.generate_description(json.dumps(data))\n",
139
- " new_examples = self.generate_examples(description, json.dumps(data))\n",
140
- " \n",
 
 
141
  " output = {\n",
142
  " \"task_description\": description,\n",
143
- " \"additional_examples\": new_examples\n",
 
144
  " }\n",
145
  " \n",
146
- " return json.dumps(output, indent=2)\n",
147
  " \n",
148
  " except Exception as e:\n",
149
  " raise RuntimeError(f\"An error occurred during processing: {str(e)}\")\n",
 
73
  " \"\"\"Given the following JSON example for a task type:\n",
74
  " {raw_example}\n",
75
  "\n",
76
+ " Provide a concise description of the task type, including the format and style of the output.\n",
77
  "\n",
78
  " Format your response as follows:\n",
79
  " Task Description: [Your description here]\n",
80
  " \"\"\"\n",
81
  " )\n",
82
  "\n",
83
+ " self.briefs_prompt = ChatPromptTemplate.from_template(\n",
84
+ " \"\"\"Given the following task type description:\n",
85
  " Task Description: {description}\n",
86
+ "\n",
87
+ " Generate descriptions for 3 new examples with detailed attributes\n",
88
+ " based on this task type.\n",
89
+ "\n",
90
+ " Format your response as a valid YAML object with a single key\n",
91
+ " 'brief_descriptions' containing a YAML array of 3 objects, each\n",
92
+ " with a 'description' field.\n",
93
+ " \"\"\"\n",
94
+ " )\n",
95
+ "\n",
96
+ " self.examples_from_briefs_prompt = ChatPromptTemplate.from_template(\n",
97
+ " \"\"\"Given the following task type description, brief description for new examples, \n",
98
+ " and JSON example:\n",
99
+ "\n",
100
+ " Task Description: {description}\n",
101
+ " Brief Description: {brief_description}\n",
102
+ "\n",
103
  " Example:\n",
104
  " {raw_example}\n",
105
  "\n",
106
+ " Generate 3 more input/output examples for this task type, based on the brief\n",
107
+ " description, in the same JSON format.\n",
108
  "\n",
109
+ " Format your response as a valid JSON object with a single key 'examples' \n",
110
+ " containing a JSON array of 3 objects, each with 'input' and 'output' fields.\n",
111
+ " \"\"\"\n",
112
+ " )\n",
113
+ "\n",
114
+ " self.examples_prompt = ChatPromptTemplate.from_template(\n",
115
+ " \"\"\"Given the following task type description, and input/output example:\n",
116
+ " Task Description: {description}\n",
117
+ " Example: {example}\n",
118
+ "\n",
119
+ " Generate 3 new input/output examples for this task type.\n",
120
+ "\n",
121
+ " Format your response as a valid JSON object with a single key 'examples' \n",
122
+ " containing a JSON array of 3 objects, each with 'input' and 'output' fields.\n",
123
  " \"\"\"\n",
124
  " )\n",
125
  "\n",
126
  " self.description_chain = self.description_prompt | self.model | self.output_parser\n",
127
+ " self.briefs_chain = self.briefs_prompt | self.model | self.output_parser\n",
128
  " # bind json_object to the model\n",
129
  " json_model = self.model.bind(response_format={\"type\": \"json_object\"})\n",
130
+ " self.examples_from_briefs_chain = self.examples_from_briefs_prompt | json_model | self.output_parser\n",
131
  " self.examples_chain = self.examples_prompt | json_model | self.output_parser\n",
132
  "\n",
133
  " def generate_description(self, raw_example_str):\n",
 
136
  " })\n",
137
  " return result.split(\"Task Description: \")[1].strip()\n",
138
  "\n",
139
+ " def generate_briefs(self, description):\n",
140
+ " result = self.briefs_chain.invoke({\n",
141
+ " \"description\": description\n",
142
+ " })\n",
143
+ " # return result.split(\"Brief Description: \")[1].strip()\n",
144
+ " return result\n",
145
+ "\n",
146
+ " def generate_examples_from_briefs(self, description, brief_description, raw_example_str):\n",
147
+ " result = self.examples_from_briefs_chain.invoke({\n",
148
  " \"description\": description,\n",
149
+ " \"brief_description\": brief_description,\n",
150
  " \"raw_example\": raw_example_str\n",
151
  " })\n",
152
  "\n",
 
159
  " return json.loads(result)\n",
160
  " except json.JSONDecodeError as e:\n",
161
  " raise ValueError(f\"The generated examples are not in valid JSON format. Error: {str(e)} Result: {result}\")\n",
162
+ " \n",
163
+ " def generate_examples(self, description, example):\n",
164
+ " result = self.examples_chain.invoke({\n",
165
+ " \"description\": description,\n",
166
+ " \"example\": example\n",
167
+ " })\n",
168
+ " \n",
169
+ " try:\n",
170
+ " result = result.strip()\n",
171
+ " if result.startswith('```') and result.endswith('```'):\n",
172
+ " result = result.strip('```').strip()\n",
173
+ " if result.startswith('json'):\n",
174
+ " result = result.strip('json').strip()\n",
175
+ " return json.loads(result)\n",
176
+ " except json.JSONDecodeError as e:\n",
177
+ " raise ValueError(f\"The generated examples are not in valid JSON format. Error: {str(e)} Result: {result}\")\n",
178
  "\n",
179
  " def process(self, input_str):\n",
180
  " try:\n",
 
191
  " if not isinstance(data, dict) or 'input' not in data or 'output' not in data:\n",
192
  " raise ValueError(\"Invalid input format. Expected an object with 'input' and 'output' fields.\")\n",
193
  " \n",
194
+ " description = self.generate_description(json.dumps(data, ensure_ascii=False))\n",
195
+ " brief_description = self.generate_briefs(description)\n",
196
+ " new_examples_from_briefs = self.generate_examples_from_briefs(description, brief_description, json.dumps(data, ensure_ascii=False))\n",
197
+ " new_examples = self.generate_examples(description, json.dumps(data, ensure_ascii=False))\n",
198
+ " \n",
199
  " output = {\n",
200
  " \"task_description\": description,\n",
201
+ " \"brief_description\": brief_description,\n",
202
+ " \"additional_examples\": list(new_examples_from_briefs.values()) + list(new_examples.values())\n",
203
  " }\n",
204
  " \n",
205
+ " return json.dumps(output, indent=2, ensure_ascii=False)\n",
206
  " \n",
207
  " except Exception as e:\n",
208
  " raise RuntimeError(f\"An error occurred during processing: {str(e)}\")\n",