{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Define prompt strings as constants\n", "DESCRIPTION_PROMPT = [\n", " (\"system\", \"\"\"Given the JSON example(s) for a task type:\n", " \n", "{raw_example}\n", "\n", "Provide a concise description of the task type, including the format and style\n", "of the input and output. If there are multiple examples, provide an overall\n", "description and ignore unique parts.\n", "\n", "Format your response as follows:\n", "Task Description: [Your description here]\n", "\"\"\")\n", "]\n", "\n", "INPUT_ANALYSIS_PROMPT = [\n", " (\"system\", \"\"\"For the specific task type, analyze the possible task inputs across multiple dimensions.\n", " \n", "Conduct a detailed analysis and enumerate:\n", "\n", "1. Core Attributes: Identify the fundamental properties or characteristics of this input type.\n", "1. Variation Dimensions: For each dimension that may vary, specify:\n", " - Dimension name\n", " - Possible range of values or options\n", " - Impact on input nature or task difficulty\n", "1. Constraints: List any rules or limitations that must be adhered to.\n", "1. Edge Cases: Describe extreme or special scenarios that may test the robustness of task processing.\n", "1. External Factors: Enumerate factors that might influence input generation or task completion.\n", "1. Potential Extensions: Propose ways to expand or modify this input type to create new variants.\n", "\n", "Format your response as follows:\n", "Input Analysis: [Your analysis here]\n", "\"\"\"),\n", " (\"user\", \"\"\"Task Description:\n", "\n", "{description}\n", "\n", "\"\"\")\n", "]\n", "\n", "BRIEFS_PROMPT = [\n", " (\"system\", \"\"\"Given the task type description, and input analysis, generate\n", "descriptions for {generating_batch_size} new examples with detailed attributes\n", "based on this task type. But don't provide any detailed task output.\n", "\n", "Use the input analysis to create diverse and comprehensive example briefs that\n", "cover various input dimensions and attribute ranges.\n", "\n", "Format your response as a valid YAML object with a single key 'new_example_briefs'\n", "containing a YAML array of {generating_batch_size} objects, each with a\n", "'example_brief' field.\n", "\"\"\"),\n", " (\"user\", \"\"\"Task Description:\n", "\n", "{description}\n", "\n", "Input Analysis:\n", "\n", "{input_analysis}\n", "\n", "\"\"\")\n", "]\n", "\n", "EXAMPLES_FROM_BRIEFS_PROMPT = [\n", " (\"system\", \"\"\"Given the task type description, brief descriptions for new examples, \n", "and JSON example(s), generate {generating_batch_size} more input/output examples for this task type,\n", "strictly based on the brief descriptions. Ensure that the new examples are\n", "consistent with the brief descriptions and do not introduce any new information\n", "not present in the briefs.\n", "\n", "Format your response as a valid JSON object with a single key 'examples' \n", "containing a JSON array of {generating_batch_size} objects, each with 'input' and 'output' fields.\n", "\"\"\"),\n", " (\"user\", \"\"\"Task Description:\n", "\n", "{description}\n", "\n", "New Example Briefs: \n", "\n", "{new_example_briefs}\n", "\n", "Example(s):\n", "\n", "{raw_example}\n", "\n", "\"\"\")\n", "]\n", "\n", "EXAMPLES_DIRECTLY_PROMPT = [\n", " (\"system\", \"\"\"Given the task type description, and input/output example(s), generate {generating_batch_size}\n", "new input/output examples for this task type.\n", "\n", "Format your response as a valid JSON object with a single key 'examples' \n", "containing a JSON array of {generating_batch_size} objects, each with 'input' and 'output' fields.\n", "\"\"\"),\n", " (\"user\", \"\"\"Task Description:\n", "\n", "{description}\n", "\n", "Example(s):\n", "\n", "{raw_example}\n", "\n", "\"\"\")\n", "]\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import json\n", "import yaml\n", "from langchain.prompts import ChatPromptTemplate\n", "from langchain.chat_models import ChatOpenAI\n", "from langchain.schema.output_parser import StrOutputParser\n", "from langchain_core.runnables import RunnablePassthrough, RunnableParallel, RunnableLambda\n", "from langchain_core.output_parsers import JsonOutputParser\n", "from langchain.output_parsers import YamlOutputParser\n", "\n", "\n", "class TaskDescriptionGenerator:\n", " def __init__(self, model): \n", " self.description_prompt = ChatPromptTemplate.from_messages(DESCRIPTION_PROMPT)\n", " self.input_analysis_prompt = ChatPromptTemplate.from_messages(INPUT_ANALYSIS_PROMPT)\n", " self.briefs_prompt = ChatPromptTemplate.from_messages(BRIEFS_PROMPT)\n", " self.examples_from_briefs_prompt = ChatPromptTemplate.from_messages(EXAMPLES_FROM_BRIEFS_PROMPT)\n", " self.examples_directly_prompt = ChatPromptTemplate.from_messages(EXAMPLES_DIRECTLY_PROMPT)\n", "\n", " json_model = model.bind(response_format={\"type\": \"json_object\"})\n", "\n", " output_parser = StrOutputParser()\n", " json_parse = JsonOutputParser()\n", "\n", " self.description_chain = self.description_prompt | model | output_parser\n", " self.input_analysis_chain = self.input_analysis_prompt | model | output_parser\n", " self.briefs_chain = self.briefs_prompt | model | output_parser\n", " self.examples_from_briefs_chain = self.examples_from_briefs_prompt | json_model | json_parse\n", " self.examples_directly_chain = self.examples_directly_prompt | json_model | json_parse\n", "\n", " # New sub-chain for loading and validating input\n", " self.input_loader = RunnableLambda(self.load_and_validate_input)\n", "\n", " self.chain = (\n", " self.input_loader\n", " | RunnablePassthrough.assign(raw_example = lambda x: json.dumps(x[\"example\"], ensure_ascii=False))\n", " | RunnablePassthrough.assign(description = self.description_chain)\n", " | {\n", " \"description\": lambda x: x[\"description\"],\n", " \"examples_from_briefs\": RunnablePassthrough.assign(input_analysis = self.input_analysis_chain)\n", " | RunnablePassthrough.assign(new_example_briefs = self.briefs_chain) \n", " | RunnablePassthrough.assign(examples = self.examples_from_briefs_chain | (lambda x: x[\"examples\"])),\n", " \"examples_directly\": self.examples_directly_chain\n", " }\n", " | RunnablePassthrough.assign(\n", " additional_examples=lambda x: (\n", " list(x[\"examples_from_briefs\"][\"examples\"])\n", " + list(x[\"examples_directly\"][\"examples\"])\n", " )\n", " )\n", " )\n", "\n", " def load_and_validate_input(self, input_dict):\n", " input_str = input_dict[\"input_str\"]\n", " generating_batch_size = input_dict[\"generating_batch_size\"]\n", "\n", " try:\n", " try:\n", " example_dict = json.loads(input_str)\n", " except ValueError:\n", " try:\n", " example_dict = yaml.safe_load(input_str)\n", " except yaml.YAMLError as e:\n", " raise ValueError(\"Invalid input format. Expected a JSON or YAML object.\") from e\n", "\n", " # If example_dict is a list, filter out invalid items\n", " if isinstance(example_dict, list):\n", " example_dict = [item for item in example_dict if isinstance(item, dict) and 'input' in item and 'output' in item]\n", "\n", " # If example_dict is not a list, check if it's a valid dict\n", " elif not isinstance(example_dict, dict) or 'input' not in example_dict or 'output' not in example_dict:\n", " raise ValueError(\"Invalid input format. Expected an object with 'input' and 'output' fields.\")\n", "\n", " # Move the original content to a key named 'example'\n", " input_dict = {\"example\": example_dict, \"generating_batch_size\": generating_batch_size}\n", "\n", " return input_dict\n", "\n", " except Exception as e:\n", " raise RuntimeError(f\"An error occurred during processing: {str(e)}\")\n", "\n", " def process(self, input_str, generating_batch_size=3):\n", " input_dict = {\"input_str\": input_str, \"generating_batch_size\": generating_batch_size}\n", " result = self.chain.invoke(input_dict)\n", " return result\n", "\n", " def generate_description(self, input_str, generating_batch_size=3):\n", " chain = (\n", " self.input_loader \n", " | RunnablePassthrough.assign(raw_example = lambda x: json.dumps(x[\"example\"], ensure_ascii=False))\n", " | self.description_chain\n", " )\n", " return chain.invoke({\n", " \"input_str\": input_str,\n", " \"generating_batch_size\": generating_batch_size\n", " })\n", "\n", " def analyze_input(self, description):\n", " return self.input_analysis_chain.invoke(description)\n", "\n", " def generate_briefs(self, description, input_analysis, generating_batch_size):\n", " return self.briefs_chain.invoke({\n", " \"description\": description,\n", " \"input_analysis\": input_analysis,\n", " \"generating_batch_size\": generating_batch_size\n", " })\n", "\n", " def generate_examples_from_briefs(self, description, new_example_briefs, input_str, generating_batch_size=3):\n", " chain = (\n", " self.input_loader\n", " | RunnablePassthrough.assign(\n", " raw_example = lambda x: json.dumps(x[\"example\"], ensure_ascii=False),\n", " description = lambda x: description,\n", " new_example_briefs = lambda x: new_example_briefs\n", " )\n", " | self.examples_from_briefs_chain\n", " )\n", " return chain.invoke({\n", " \"description\": description,\n", " \"new_example_briefs\": new_example_briefs,\n", " \"input_str\": input_str,\n", " \"generating_batch_size\": generating_batch_size\n", " })\n", "\n", " def generate_examples_directly(self, description, raw_example, generating_batch_size):\n", " return self.examples_directly_chain.invoke({\n", " \"description\": description,\n", " \"raw_example\": raw_example,\n", " \"generating_batch_size\": generating_batch_size\n", " })" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import gradio as gr\n", "\n", "def process_json(input_json, model_name, generating_batch_size, temperature):\n", " try:\n", " model = ChatOpenAI(model=model_name, temperature=temperature, max_retries=3)\n", " generator = TaskDescriptionGenerator(model)\n", " result = generator.process(input_json, generating_batch_size)\n", " description = result[\"description\"]\n", " examples_directly = [[example[\"input\"], example[\"output\"]] for example in result[\"examples_directly\"][\"examples\"]]\n", " input_analysis = result[\"examples_from_briefs\"][\"input_analysis\"]\n", " new_example_briefs = result[\"examples_from_briefs\"][\"new_example_briefs\"]\n", " examples_from_briefs = [[example[\"input\"], example[\"output\"]] for example in result[\"examples_from_briefs\"][\"examples\"]]\n", " examples = [[example[\"input\"], example[\"output\"]] for example in result[\"additional_examples\"]]\n", " return description, examples_directly, input_analysis, new_example_briefs, examples_from_briefs, examples\n", " except Exception as e:\n", " raise gr.Error(f\"An error occurred: {str(e)}\")\n", " \n", "def generate_description_only(input_json, model_name, temperature):\n", " try:\n", " model = ChatOpenAI(model=model_name, temperature=temperature, max_retries=3)\n", " generator = TaskDescriptionGenerator(model)\n", " description = generator.generate_description(input_json)\n", " return description\n", " except Exception as e:\n", " raise gr.Error(f\"An error occurred: {str(e)}\")\n", "\n", "def analyze_input(description, model_name, temperature):\n", " try:\n", " model = ChatOpenAI(model=model_name, temperature=temperature, max_retries=3)\n", " generator = TaskDescriptionGenerator(model)\n", " input_analysis = generator.analyze_input(description)\n", " return input_analysis\n", " except Exception as e:\n", " raise gr.Error(f\"An error occurred: {str(e)}\")\n", " \n", "def generate_briefs(description, input_analysis, generating_batch_size, model_name, temperature):\n", " try:\n", " model = ChatOpenAI(model=model_name, temperature=temperature, max_retries=3)\n", " generator = TaskDescriptionGenerator(model)\n", " briefs = generator.generate_briefs(description, input_analysis, generating_batch_size)\n", " return briefs\n", " except Exception as e:\n", " raise gr.Error(f\"An error occurred: {str(e)}\")\n", " \n", "def generate_examples_from_briefs(description, new_example_briefs, input_str, generating_batch_size, model_name, temperature):\n", " try:\n", " model = ChatOpenAI(model=model_name, temperature=temperature, max_retries=3)\n", " generator = TaskDescriptionGenerator(model)\n", " result = generator.generate_examples_from_briefs(description, new_example_briefs, input_str, generating_batch_size)\n", " examples = [[example[\"input\"], example[\"output\"]] for example in result[\"examples\"]]\n", " return examples\n", " except Exception as e:\n", " raise gr.Error(f\"An error occurred: {str(e)}\")\n", " \n", "def generate_examples_directly(description, raw_example, generating_batch_size, model_name, temperature):\n", " try:\n", " model = ChatOpenAI(model=model_name, temperature=temperature, max_retries=3)\n", " generator = TaskDescriptionGenerator(model)\n", " result = generator.generate_examples_directly(description, raw_example, generating_batch_size)\n", " examples = [[example[\"input\"], example[\"output\"]] for example in result[\"examples\"]]\n", " return examples\n", " except Exception as e:\n", " raise gr.Error(f\"An error occurred: {str(e)}\")\n", "\n", "def format_selected_example(evt: gr.SelectData, examples):\n", " if evt.index[0] < len(examples):\n", " selected_example = examples.iloc[evt.index[0]] # Use iloc to access by integer position\n", " json_example = json.dumps({\"input\": selected_example.iloc[0], \"output\": selected_example.iloc[1]}, indent=2, ensure_ascii=False)\n", " return json_example\n", " return \"\"\n", "\n", "with gr.Blocks(title=\"Task Description Generator\") as demo:\n", " gr.Markdown(\"# Task Description Generator\")\n", " gr.Markdown(\"Enter a JSON object with 'input' and 'output' fields to generate a task description and additional examples.\")\n", "\n", " with gr.Row():\n", " with gr.Column(scale=1): # Inputs column\n", " input_json = gr.Textbox(label=\"Input JSON\", lines=10, show_copy_button=True)\n", " model_name = gr.Dropdown(\n", " label=\"Model Name\",\n", " choices=[\"llama3-70b-8192\", \"llama3-8b-8192\", \"llama-3.1-70b-versatile\", \"llama-3.1-8b-instant\", \"gemma2-9b-it\"],\n", " value=\"llama3-70b-8192\"\n", " )\n", " temperature = gr.Slider(label=\"Temperature\", value=1.0, minimum=0.0, maximum=1.0, step=0.1)\n", " generating_batch_size = gr.Slider(label=\"Generating Batch Size\", value=3, minimum=1, maximum=10, step=1)\n", " with gr.Row():\n", " submit_button = gr.Button(\"Generate\", variant=\"primary\")\n", " generate_description_button = gr.Button(\"Generate Description\", variant=\"secondary\")\n", "\n", " with gr.Column(scale=1): # Outputs column\n", " description_output = gr.Textbox(label=\"Description\", lines=5, show_copy_button=True)\n", " with gr.Row():\n", " generate_examples_directly_button = gr.Button(\"Generate Examples Directly\", variant=\"secondary\")\n", " analyze_input_button = gr.Button(\"Analyze Input\", variant=\"secondary\")\n", " examples_directly_output = gr.DataFrame(label=\"Examples Directly\", headers=[\"Input\", \"Output\"], interactive=False)\n", " input_analysis_output = gr.Textbox(label=\"Input Analysis\", lines=5, show_copy_button=True)\n", " generate_briefs_button = gr.Button(\"Generate Briefs\", variant=\"secondary\")\n", " example_briefs_output = gr.Textbox(label=\"Example Briefs\", lines=5, show_copy_button=True)\n", " generate_examples_from_briefs_button = gr.Button(\"Generate Examples from Briefs\", variant=\"secondary\")\n", " examples_from_briefs_output = gr.DataFrame(label=\"Examples from Briefs\", headers=[\"Input\", \"Output\"], interactive=False)\n", " examples_output = gr.DataFrame(label=\"Examples\", headers=[\"Input\", \"Output\"], interactive=False)\n", " new_example_json = gr.Textbox(label=\"New Example JSON\", lines=5, show_copy_button=True)\n", "\n", " clear_button = gr.ClearButton([input_json, description_output, input_analysis_output,\n", " example_briefs_output, examples_from_briefs_output,\n", " examples_output, new_example_json])\n", "\n", " submit_button.click(\n", " fn=process_json,\n", " inputs=[input_json, model_name, generating_batch_size, temperature],\n", " outputs=[description_output, examples_directly_output, input_analysis_output, example_briefs_output, examples_from_briefs_output, examples_output]\n", " )\n", "\n", " generate_description_button.click(\n", " fn=generate_description_only,\n", " inputs=[input_json, model_name, temperature],\n", " outputs=[description_output]\n", " )\n", "\n", " generate_examples_directly_button.click(\n", " fn=generate_examples_directly,\n", " inputs=[description_output, input_json, generating_batch_size, model_name, temperature],\n", " outputs=[examples_directly_output]\n", " )\n", "\n", " analyze_input_button.click(\n", " fn=analyze_input,\n", " inputs=[description_output, model_name, temperature],\n", " outputs=[input_analysis_output]\n", " )\n", "\n", " generate_briefs_button.click(\n", " fn=generate_briefs,\n", " inputs=[description_output, input_analysis_output, generating_batch_size, model_name, temperature],\n", " outputs=[example_briefs_output]\n", " )\n", "\n", " generate_examples_from_briefs_button.click(\n", " fn=generate_examples_from_briefs,\n", " inputs=[description_output, example_briefs_output, input_json, generating_batch_size, model_name, temperature],\n", " outputs=[examples_from_briefs_output]\n", " )\n", "\n", " examples_directly_output.select(\n", " fn=format_selected_example,\n", " inputs=[examples_directly_output],\n", " outputs=[new_example_json]\n", " )\n", "\n", " examples_from_briefs_output.select(\n", " fn=format_selected_example,\n", " inputs=[examples_from_briefs_output],\n", " outputs=[new_example_json]\n", " )\n", "\n", " examples_output.select(\n", " fn=format_selected_example,\n", " inputs=[examples_output],\n", " outputs=[new_example_json]\n", " )\n", "\n", " gr.Markdown(\"### Manual Flagging\")\n", " with gr.Row():\n", " flag_button = gr.Button(\"Flag\")\n", " flag_reason = gr.Textbox(label=\"Reason for flagging\")\n", "\n", " flagging_callback = gr.CSVLogger()\n", " flag_button.click(\n", " lambda *args: flagging_callback.flag(args),\n", " inputs=[input_json, model_name, generating_batch_size, description_output, examples_output, flag_reason],\n", " outputs=[]\n", " )\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 2 }