Spaces:
Sleeping
Sleeping
File size: 18,894 Bytes
76adccc 336eca2 76adccc 56ef8de 76adccc f9ab7f7 56ef8de f9ab7f7 56ef8de f9ab7f7 ceeb5d2 4937eee f9ab7f7 ceeb5d2 4937eee f9ab7f7 76adccc ceeb5d2 76adccc ceeb5d2 aaa4147 ceeb5d2 76adccc ceeb5d2 4937eee 76adccc f9ab7f7 76adccc 4937eee 76adccc 56ef8de 336eca2 76adccc ceeb5d2 336eca2 76adccc f9ab7f7 76adccc f9ab7f7 76adccc f9ab7f7 76adccc f9ab7f7 76adccc f9ab7f7 76adccc f9ab7f7 76adccc f9ab7f7 76adccc f9ab7f7 76adccc f9ab7f7 4937eee f9ab7f7 76adccc f9ab7f7 4937eee f9ab7f7 76adccc 4937eee 76adccc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 |
import json
from openai import BadRequestError
import yaml
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableParallel, RunnableLambda
from langchain_core.output_parsers import JsonOutputParser
from langchain.output_parsers import YamlOutputParser
# Define prompt strings as constants
DESCRIPTION_PROMPT = [
("system", """{{
"task_description": "Given a JSON example for a task type, provide a concise description of the task type, including the format and style of the input and output. If there are multiple examples, provide an overall description and ignore unique parts. Output a JSON object.",
"requirements": [
"Analyze the provided JSON example(s) to understand the task type",
"Focus on the common format and style across examples, if multiple are given",
"Ignore any unique parts that do not generalize across examples",
"Provide a concise description summarizing the key aspects of the task type"
],
"output_format": {{
"type": "object",
"properties": {{
"description": {{
"type": "string",
"description": "A concise description of the task type, including input and output format and style"
}}
}},
"required": ["description"]
}},
"output_example": {{
"description": "This task involves analyzing financial reports in JSON format to calculate key metrics and generate a summary report. The input JSON contains fields like revenue, expenses, and dates, while the output is a JSON object with the calculated metrics and summary."
}}
}}
"""),
("user", """{raw_example}""")
]
DESCRIPTION_UPDATING_PROMPT = [
("system", """{{
"task_description": "Given the task type description and suggestions, update the task type description according to the suggestions. Output a JSON object.",
"requirements": [
"Carefully read and understand the provided task type description and suggestions",
"Identify the core elements and characteristics of the task",
"Consider possible generalization dimensions such as task domain, complexity, input/output format, application scenarios, etc.",
"Apply the suggestions to update the task description without changing anything that is not suggested",
"Ensure the updated description is clear, specific, and directly related to the task",
"Provide at least 5 specification suggestions across different dimensions"
],
"output_format": {{
"type": "object",
"properties": {{
"description": {{
"type": "string",
"description": "The updated task type description based on the provided suggestions"
}}
}},
"required": ["description"]
}},
"output_example": {{
"description": "An example of an updated task type description based on the provided suggestions"
}}
}}
"""),
("user", """{{
"task_description": "{description}",
"suggestions": "{suggestions}"
}}
""")
]
SPECIFICATION_SUGGESTIONS_PROMPT = [
("system", """{{
"prompt": "Generate suggestions to narrow the task scope for a given task type and example:\n\n1. Analyze the task description and input/output examples.\n2. Identify 3~5 relevant dimensions (e.g., purpose, input/output format, language, steps, criteria, constraints).\n3. Create 3~5 actionable suggestions (no more than 20 words for each) to narrow the task scope based on the above dimensions. Make sure the suggestions are compatible with the provided example.\n4. Start each suggestion with a verb.\n5. Output in JSON format, following `output_format`.\n",
"output_format": "{{\n \"dimensions\": [\n {{ \"dimension\": \"...\" }},\n {{ \"dimension\": \"...\" }}\n ],\n \"suggestions\": [\n {{ \"suggestion\": \"...\" }},\n {{ \"suggestion\": \"...\" }}\n ]\n}}\n",
"task_description": "\n{description}\n",
"examples": "\n{raw_example}\n"
}}
""")
]
GENERALIZATION_SUGGESTIONS_PROMPT = [
("system", """{{
"prompt": "Generate task generalization suggestions for a given task type and example:\n\n1. Analyze the task description and input/output examples.\n2. Identify 3~5 relevant dimensions (e.g., purpose, input/output format, language, steps, criteria, constraints).\n3. Create 3~5 actionable suggestions (no more than 20 words for each) to expand the scope of the task based on the above dimensions. Make sure the suggestions are compatible with the provided example.\n4. Start each suggestion with a verb.\n5. Output in JSON format, following `output_format`.\n",
"output_format": "{{\n \"dimensions\": [\n {{ \"dimension\": \"...\" }},\n {{ \"dimension\": \"...\" }}\n ],\n \"suggestions\": [\n {{ \"suggestion\": \"...\" }},\n {{ \"suggestion\": \"...\" }}\n ]\n}}\n",
"task_description": "\n{description}\n",
"examples": "\n{raw_example}\n"
}}
""")
]
INPUT_ANALYSIS_PROMPT = [
("system", """For the specific task type, analyze the possible task inputs across multiple dimensions.
Conduct a detailed analysis and enumerate:
1. Core Attributes: Identify the fundamental properties or characteristics of this input type.
1. Variation Dimensions: For each dimension that may vary, specify:
- Dimension name
- Possible range of values or options
- Impact on input nature or task difficulty
1. Constraints: List any rules or limitations that must be adhered to.
1. Edge Cases: Describe extreme or special scenarios that may test the robustness of task processing.
1. External Factors: Enumerate factors that might influence input generation or task completion.
1. Potential Extensions: Propose ways to expand or modify this input type to create new variants.
Format your response as follows:
Input Analysis: [Your analysis here]
"""),
("user", """Task Description:
{description}
""")
]
BRIEFS_PROMPT = [
("system", """{{
"prompt": "Given the task type description, and input analysis, generate descriptions for {generating_batch_size} new examples with detailed attributes based on this task type. But don't provide any detailed task output.\n\nUse the input analysis to create diverse and comprehensive example briefs that cover various input dimensions and attribute ranges.\n\nFormat your response as a JSON object following `output_format`.",
"output_format": "{{
"new_example_briefs": [
{{
"example_brief": "..."
}},
{{
"example_brief": "..."
}},
...
]
}},
"task_description": "{description}",
"input_analysis": "{input_analysis}",
"generating_batch_size": "{generating_batch_size}"
}}
""")
]
EXAMPLES_FROM_BRIEFS_PROMPT = [
("system", """{{
"prompt": "Given the task type description, brief descriptions for new examples, and JSON example(s), generate {generating_batch_size} more input/output examples for this task type, strictly following the brief descriptions and task type description. Ensure that the new examples are consistent with the brief descriptions and do not introduce any new information not present in the briefs. Output in JSON format, following `output_format`. Validate the generated new examples with the task type description and brief descriptions.",
"output_format": "{{
"examples": [
{{
"input": "...",
"output": "..."
}},
{{
"input": "...",
"output": "..."
}},
...
]
}},
"task_description": "{description}",
"new_example_briefs": {new_example_briefs},
"raw_example": "{raw_example}"
}}
""")
]
EXAMPLES_DIRECTLY_PROMPT = [
("system", """{{
"prompt": "Given the task type description, and input/output example(s), generate {generating_batch_size} new input/output examples for this task type. Output in JSON format, following `output_format`.",
"output_format": "{{
"examples": [
{{
"input": "...",
"output": "..."
}},
{{
"input": "...",
"output": "..."
}},
...
]
}},
"task_description": "{description}",
"examples": "{raw_example}"
}}
""")
]
class TaskDescriptionGenerator:
def __init__(self, model):
self.description_prompt = ChatPromptTemplate.from_messages(DESCRIPTION_PROMPT)
self.description_updating_prompt = ChatPromptTemplate.from_messages(DESCRIPTION_UPDATING_PROMPT)
self.specification_suggestions_prompt = ChatPromptTemplate.from_messages(SPECIFICATION_SUGGESTIONS_PROMPT)
self.generalization_suggestions_prompt = ChatPromptTemplate.from_messages(GENERALIZATION_SUGGESTIONS_PROMPT)
self.input_analysis_prompt = ChatPromptTemplate.from_messages(INPUT_ANALYSIS_PROMPT)
self.briefs_prompt = ChatPromptTemplate.from_messages(BRIEFS_PROMPT)
self.examples_from_briefs_prompt = ChatPromptTemplate.from_messages(EXAMPLES_FROM_BRIEFS_PROMPT)
self.examples_directly_prompt = ChatPromptTemplate.from_messages(EXAMPLES_DIRECTLY_PROMPT)
json_model = model.bind(response_format={"type": "json_object"})
# json_model = model
output_parser = StrOutputParser()
json_parse = JsonOutputParser()
self.description_chain = (self.description_prompt | json_model | json_parse).with_retry(
retry_if_exception_type=(BadRequestError,), # Retry only on ValueError
wait_exponential_jitter=True, # Add jitter to the exponential backoff
stop_after_attempt=2 # Try twice
).with_fallbacks([RunnableLambda(lambda x: {"description": ""})]) | (lambda x: x["description"])
self.description_updating_chain = (self.description_updating_prompt | json_model | json_parse).with_retry(
retry_if_exception_type=(BadRequestError,), # Retry only on ValueError
wait_exponential_jitter=True, # Add jitter to the exponential backoff
stop_after_attempt=2 # Try twice
).with_fallbacks([RunnableLambda(lambda x: {"description": ""})]) | (lambda x: x["description"])
self.specification_suggestions_chain = (self.specification_suggestions_prompt | json_model | json_parse).with_retry(
retry_if_exception_type=(BadRequestError,), # Retry only on ValueError
wait_exponential_jitter=True, # Add jitter to the exponential backoff
stop_after_attempt=2 # Try twice
).with_fallbacks([RunnableLambda(lambda x: {"dimensions": [], "suggestions": []})])
self.generalization_suggestions_chain = (self.generalization_suggestions_prompt | json_model | json_parse).with_retry(
retry_if_exception_type=(BadRequestError,), # Retry only on ValueError
wait_exponential_jitter=True, # Add jitter to the exponential backoff
stop_after_attempt=2 # Try twice
).with_fallbacks([RunnableLambda(lambda x: {"dimensions": [], "suggestions": []})])
self.input_analysis_chain = self.input_analysis_prompt | model | output_parser
# self.briefs_chain = self.briefs_prompt | model | output_parser
self.briefs_chain = self.briefs_prompt | json_model | json_parse | RunnableLambda(lambda x: x["new_example_briefs"])
self.examples_from_briefs_chain = (self.examples_from_briefs_prompt | json_model | json_parse).with_retry(
retry_if_exception_type=(BadRequestError,), # Retry only on ValueError
wait_exponential_jitter=True, # Add jitter to the exponential backoff
stop_after_attempt=2 # Try twice
).with_fallbacks([RunnableLambda(lambda x: {"examples": []})])
self.examples_directly_chain = (self.examples_directly_prompt | json_model | json_parse).with_retry(
retry_if_exception_type=(BadRequestError,), # Retry only on ValueError
wait_exponential_jitter=True, # Add jitter to the exponential backoff
stop_after_attempt=2 # Try twice
).with_fallbacks([RunnableLambda(lambda x: {"examples": []})])
# New sub-chain for loading and validating input
self.input_loader = RunnableLambda(self.load_and_validate_input)
self.chain = (
self.input_loader
| RunnablePassthrough.assign(raw_example=lambda x: json.dumps(x["example"], ensure_ascii=False))
| RunnablePassthrough.assign(description=self.description_chain)
| {
"description": lambda x: x["description"],
"examples_from_briefs": RunnablePassthrough.assign(input_analysis=self.input_analysis_chain)
| RunnablePassthrough.assign(new_example_briefs=self.briefs_chain)
| RunnablePassthrough.assign(examples=self.examples_from_briefs_chain | (lambda x: x["examples"])),
"examples_directly": self.examples_directly_chain,
"suggestions": {
"specification": self.specification_suggestions_chain,
"generalization": self.generalization_suggestions_chain
} | RunnableLambda(lambda x: [item['suggestion'] for sublist in [v['suggestions'] for v in x.values()] for item in sublist])
}
| RunnablePassthrough.assign(
additional_examples=lambda x: (
list(x["examples_from_briefs"]["examples"])
+ list(x["examples_directly"]["examples"])
)
)
)
def parse_input_str(self, input_str):
try:
example_dict = json.loads(input_str)
except ValueError:
try:
example_dict = yaml.safe_load(input_str)
except yaml.YAMLError as e:
raise ValueError("Invalid input format. Expected a JSON or YAML object.") from e
# If example_dict is a list, filter out invalid items
if isinstance(example_dict, list):
example_dict = [item for item in example_dict if isinstance(item, dict) and 'input' in item and 'output' in item]
# If example_dict is not a list, check if it's a valid dict
elif not isinstance(example_dict, dict) or 'input' not in example_dict or 'output' not in example_dict:
raise ValueError("Invalid input format. Expected an object with 'input' and 'output' fields.")
return example_dict
def load_and_validate_input(self, input_dict):
input_str = input_dict["input_str"]
generating_batch_size = input_dict.get("generating_batch_size")
try:
example_dict = self.parse_input_str(input_str)
# Move the original content to a key named 'example'
input_dict = {"example": example_dict}
if generating_batch_size is not None:
input_dict["generating_batch_size"] = generating_batch_size
return input_dict
except Exception as e:
raise RuntimeError(f"An error occurred during processing: {str(e)}")
def process(self, input_str, generating_batch_size=3):
input_dict = {"input_str": input_str, "generating_batch_size": generating_batch_size}
result = self.chain.invoke(input_dict)
return result
def generate_description(self, input_str, generating_batch_size=3):
chain = (
self.input_loader
| RunnablePassthrough.assign(raw_example=lambda x: json.dumps(x["example"], ensure_ascii=False))
| RunnablePassthrough.assign(description=self.description_chain)
| {
"description": lambda x: x["description"],
"suggestions": {
"specification": self.specification_suggestions_chain,
"generalization": self.generalization_suggestions_chain
} | RunnableLambda(lambda x: [item['suggestion'] for sublist in [v['suggestions'] for v in x.values() if 'suggestions' in v] for item in sublist if 'suggestion' in item])
}
)
return chain.invoke({
"input_str": input_str,
"generating_batch_size": generating_batch_size
})
def update_description(self, input_str, description, suggestions):
# package array suggestions into a JSON array
suggestions_str = json.dumps(suggestions, ensure_ascii=False)
# return the updated description with new suggestions
chain = (
RunnablePassthrough.assign(
description=self.description_updating_chain
)
| {
"description": lambda x: x["description"],
"suggestions": {
"specification": self.specification_suggestions_chain,
"generalization": self.generalization_suggestions_chain
} | RunnableLambda(lambda x: [item['suggestion'] for sublist in [v['suggestions'] for v in x.values() if 'suggestions' in v] for item in sublist if 'suggestion' in item])
}
)
return chain.invoke({
"raw_example": input_str,
"description": description,
"suggestions": suggestions_str
})
def generate_suggestions(self, input_str, description):
chain = RunnablePassthrough.assign(
suggestions={
"specification": self.specification_suggestions_chain,
"generalization": self.generalization_suggestions_chain
} | RunnableLambda(lambda x: [item['suggestion'] for sublist in [v['suggestions'] for v in x.values() if 'suggestions' in v] for item in sublist if 'suggestion' in item])
)
return chain.invoke({
"description": description,
"raw_example": input_str
})
def analyze_input(self, description):
return self.input_analysis_chain.invoke(description)
def generate_briefs(self, description, input_analysis, generating_batch_size):
return self.briefs_chain.invoke({
"description": description,
"input_analysis": input_analysis,
"generating_batch_size": generating_batch_size
})
def generate_examples_from_briefs(self, description, new_example_briefs, input_str, generating_batch_size=3):
chain = (
self.input_loader
| RunnablePassthrough.assign(
raw_example = lambda x: json.dumps(x["example"], ensure_ascii=False),
description = lambda x: description,
new_example_briefs = lambda x: new_example_briefs
)
| self.examples_from_briefs_chain
)
return chain.invoke({
"description": description,
"new_example_briefs": new_example_briefs,
"input_str": input_str,
"generating_batch_size": generating_batch_size
})
def generate_examples_directly(self, description, raw_example, generating_batch_size):
return self.examples_directly_chain.invoke({
"description": description,
"raw_example": raw_example,
"generating_batch_size": generating_batch_size
}) |