Spaces:
Sleeping
Sleeping
2 branches of generating examples.
Browse files- demo/sample_generator.ipynb +72 -13
demo/sample_generator.ipynb
CHANGED
@@ -73,29 +73,61 @@
|
|
73 |
" \"\"\"Given the following JSON example for a task type:\n",
|
74 |
" {raw_example}\n",
|
75 |
"\n",
|
76 |
-
" Provide a concise description of the task type.\n",
|
77 |
"\n",
|
78 |
" Format your response as follows:\n",
|
79 |
" Task Description: [Your description here]\n",
|
80 |
" \"\"\"\n",
|
81 |
" )\n",
|
82 |
"\n",
|
83 |
-
" self.
|
84 |
-
" \"\"\"Given the following task type description
|
85 |
" Task Description: {description}\n",
|
86 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
" Example:\n",
|
88 |
" {raw_example}\n",
|
89 |
"\n",
|
90 |
-
" Generate 3 more input/output examples for this task type
|
|
|
91 |
"\n",
|
92 |
-
" Format your response as a valid JSON object with a single key 'examples'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
" \"\"\"\n",
|
94 |
" )\n",
|
95 |
"\n",
|
96 |
" self.description_chain = self.description_prompt | self.model | self.output_parser\n",
|
|
|
97 |
" # bind json_object to the model\n",
|
98 |
" json_model = self.model.bind(response_format={\"type\": \"json_object\"})\n",
|
|
|
99 |
" self.examples_chain = self.examples_prompt | json_model | self.output_parser\n",
|
100 |
"\n",
|
101 |
" def generate_description(self, raw_example_str):\n",
|
@@ -104,9 +136,17 @@
|
|
104 |
" })\n",
|
105 |
" return result.split(\"Task Description: \")[1].strip()\n",
|
106 |
"\n",
|
107 |
-
" def
|
108 |
-
" result = self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
" \"description\": description,\n",
|
|
|
110 |
" \"raw_example\": raw_example_str\n",
|
111 |
" })\n",
|
112 |
"\n",
|
@@ -119,6 +159,22 @@
|
|
119 |
" return json.loads(result)\n",
|
120 |
" except json.JSONDecodeError as e:\n",
|
121 |
" raise ValueError(f\"The generated examples are not in valid JSON format. Error: {str(e)} Result: {result}\")\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
"\n",
|
123 |
" def process(self, input_str):\n",
|
124 |
" try:\n",
|
@@ -135,15 +191,18 @@
|
|
135 |
" if not isinstance(data, dict) or 'input' not in data or 'output' not in data:\n",
|
136 |
" raise ValueError(\"Invalid input format. Expected an object with 'input' and 'output' fields.\")\n",
|
137 |
" \n",
|
138 |
-
" description = self.generate_description(json.dumps(data))\n",
|
139 |
-
"
|
140 |
-
" \n",
|
|
|
|
|
141 |
" output = {\n",
|
142 |
" \"task_description\": description,\n",
|
143 |
-
" \"
|
|
|
144 |
" }\n",
|
145 |
" \n",
|
146 |
-
" return json.dumps(output, indent=2)\n",
|
147 |
" \n",
|
148 |
" except Exception as e:\n",
|
149 |
" raise RuntimeError(f\"An error occurred during processing: {str(e)}\")\n",
|
|
|
73 |
" \"\"\"Given the following JSON example for a task type:\n",
|
74 |
" {raw_example}\n",
|
75 |
"\n",
|
76 |
+
" Provide a concise description of the task type, including the format and style of the output.\n",
|
77 |
"\n",
|
78 |
" Format your response as follows:\n",
|
79 |
" Task Description: [Your description here]\n",
|
80 |
" \"\"\"\n",
|
81 |
" )\n",
|
82 |
"\n",
|
83 |
+
" self.briefs_prompt = ChatPromptTemplate.from_template(\n",
|
84 |
+
" \"\"\"Given the following task type description:\n",
|
85 |
" Task Description: {description}\n",
|
86 |
+
"\n",
|
87 |
+
" Generate descriptions for 3 new examples with detailed attributes\n",
|
88 |
+
" based on this task type.\n",
|
89 |
+
"\n",
|
90 |
+
" Format your response as a valid YAML object with a single key\n",
|
91 |
+
" 'brief_descriptions' containing a YAML array of 3 objects, each\n",
|
92 |
+
" with a 'description' field.\n",
|
93 |
+
" \"\"\"\n",
|
94 |
+
" )\n",
|
95 |
+
"\n",
|
96 |
+
" self.examples_from_briefs_prompt = ChatPromptTemplate.from_template(\n",
|
97 |
+
" \"\"\"Given the following task type description, brief description for new examples, \n",
|
98 |
+
" and JSON example:\n",
|
99 |
+
"\n",
|
100 |
+
" Task Description: {description}\n",
|
101 |
+
" Brief Description: {brief_description}\n",
|
102 |
+
"\n",
|
103 |
" Example:\n",
|
104 |
" {raw_example}\n",
|
105 |
"\n",
|
106 |
+
" Generate 3 more input/output examples for this task type, based on the brief\n",
|
107 |
+
" description, in the same JSON format.\n",
|
108 |
"\n",
|
109 |
+
" Format your response as a valid JSON object with a single key 'examples' \n",
|
110 |
+
" containing a JSON array of 3 objects, each with 'input' and 'output' fields.\n",
|
111 |
+
" \"\"\"\n",
|
112 |
+
" )\n",
|
113 |
+
"\n",
|
114 |
+
" self.examples_prompt = ChatPromptTemplate.from_template(\n",
|
115 |
+
" \"\"\"Given the following task type description, and input/output example:\n",
|
116 |
+
" Task Description: {description}\n",
|
117 |
+
" Example: {example}\n",
|
118 |
+
"\n",
|
119 |
+
" Generate 3 new input/output examples for this task type.\n",
|
120 |
+
"\n",
|
121 |
+
" Format your response as a valid JSON object with a single key 'examples' \n",
|
122 |
+
" containing a JSON array of 3 objects, each with 'input' and 'output' fields.\n",
|
123 |
" \"\"\"\n",
|
124 |
" )\n",
|
125 |
"\n",
|
126 |
" self.description_chain = self.description_prompt | self.model | self.output_parser\n",
|
127 |
+
" self.briefs_chain = self.briefs_prompt | self.model | self.output_parser\n",
|
128 |
" # bind json_object to the model\n",
|
129 |
" json_model = self.model.bind(response_format={\"type\": \"json_object\"})\n",
|
130 |
+
" self.examples_from_briefs_chain = self.examples_from_briefs_prompt | json_model | self.output_parser\n",
|
131 |
" self.examples_chain = self.examples_prompt | json_model | self.output_parser\n",
|
132 |
"\n",
|
133 |
" def generate_description(self, raw_example_str):\n",
|
|
|
136 |
" })\n",
|
137 |
" return result.split(\"Task Description: \")[1].strip()\n",
|
138 |
"\n",
|
139 |
+
" def generate_briefs(self, description):\n",
|
140 |
+
" result = self.briefs_chain.invoke({\n",
|
141 |
+
" \"description\": description\n",
|
142 |
+
" })\n",
|
143 |
+
" # return result.split(\"Brief Description: \")[1].strip()\n",
|
144 |
+
" return result\n",
|
145 |
+
"\n",
|
146 |
+
" def generate_examples_from_briefs(self, description, brief_description, raw_example_str):\n",
|
147 |
+
" result = self.examples_from_briefs_chain.invoke({\n",
|
148 |
" \"description\": description,\n",
|
149 |
+
" \"brief_description\": brief_description,\n",
|
150 |
" \"raw_example\": raw_example_str\n",
|
151 |
" })\n",
|
152 |
"\n",
|
|
|
159 |
" return json.loads(result)\n",
|
160 |
" except json.JSONDecodeError as e:\n",
|
161 |
" raise ValueError(f\"The generated examples are not in valid JSON format. Error: {str(e)} Result: {result}\")\n",
|
162 |
+
" \n",
|
163 |
+
" def generate_examples(self, description, example):\n",
|
164 |
+
" result = self.examples_chain.invoke({\n",
|
165 |
+
" \"description\": description,\n",
|
166 |
+
" \"example\": example\n",
|
167 |
+
" })\n",
|
168 |
+
" \n",
|
169 |
+
" try:\n",
|
170 |
+
" result = result.strip()\n",
|
171 |
+
" if result.startswith('```') and result.endswith('```'):\n",
|
172 |
+
" result = result.strip('```').strip()\n",
|
173 |
+
" if result.startswith('json'):\n",
|
174 |
+
" result = result.strip('json').strip()\n",
|
175 |
+
" return json.loads(result)\n",
|
176 |
+
" except json.JSONDecodeError as e:\n",
|
177 |
+
" raise ValueError(f\"The generated examples are not in valid JSON format. Error: {str(e)} Result: {result}\")\n",
|
178 |
"\n",
|
179 |
" def process(self, input_str):\n",
|
180 |
" try:\n",
|
|
|
191 |
" if not isinstance(data, dict) or 'input' not in data or 'output' not in data:\n",
|
192 |
" raise ValueError(\"Invalid input format. Expected an object with 'input' and 'output' fields.\")\n",
|
193 |
" \n",
|
194 |
+
" description = self.generate_description(json.dumps(data, ensure_ascii=False))\n",
|
195 |
+
" brief_description = self.generate_briefs(description)\n",
|
196 |
+
" new_examples_from_briefs = self.generate_examples_from_briefs(description, brief_description, json.dumps(data, ensure_ascii=False))\n",
|
197 |
+
" new_examples = self.generate_examples(description, json.dumps(data, ensure_ascii=False))\n",
|
198 |
+
" \n",
|
199 |
" output = {\n",
|
200 |
" \"task_description\": description,\n",
|
201 |
+
" \"brief_description\": brief_description,\n",
|
202 |
+
" \"additional_examples\": list(new_examples_from_briefs.values()) + list(new_examples.values())\n",
|
203 |
" }\n",
|
204 |
" \n",
|
205 |
+
" return json.dumps(output, indent=2, ensure_ascii=False)\n",
|
206 |
" \n",
|
207 |
" except Exception as e:\n",
|
208 |
" raise RuntimeError(f\"An error occurred during processing: {str(e)}\")\n",
|