Spaces:

yaleh
/

meta-prompt

Running

App Files Files Community

yaleh commited on Aug 29, 2024

Commit

f9ab7f7

1 Parent(s): 2b661e2

`Apply Changes` works.

Browse files

Files changed (2) hide show

app/streamlit_sample_generator.py +39 -12
meta_prompt/sample_generator.py +246 -33

app/streamlit_sample_generator.py CHANGED Viewed

@@ -12,6 +12,7 @@ def process_json(input_json, model_name, generating_batch_size, temperature):
         generator = TaskDescriptionGenerator(model)
         result = generator.process(input_json, generating_batch_size)
         description = result["description"]
         examples_directly = [[example["input"], example["output"]]
                              for example in result["examples_directly"]["examples"]]
         input_analysis = result["examples_from_briefs"]["input_analysis"]
@@ -20,10 +21,10 @@ def process_json(input_json, model_name, generating_batch_size, temperature):
                                 for example in result["examples_from_briefs"]["examples"]]
         examples = [[example["input"], example["output"]]
                     for example in result["additional_examples"]]
-        return description, examples_directly, input_analysis, new_example_briefs, examples_from_briefs, examples
     except Exception as e:
         st.warning(f"An error occurred: {str(e)}. Returning default values.")
-        return "", [], "", [], [], []
 def generate_description_only(input_json, model_name, temperature):
@@ -31,10 +32,13 @@ def generate_description_only(input_json, model_name, temperature):
         model = ChatOpenAI(
             model=model_name, temperature=temperature, max_retries=3)
         generator = TaskDescriptionGenerator(model)
-        description = generator.generate_description(input_json)
-        return description
     except Exception as e:
-        st.error(f"An error occurred: {str(e)}")
 def analyze_input(description, model_name, temperature):
@@ -45,7 +49,8 @@ def analyze_input(description, model_name, temperature):
         input_analysis = generator.analyze_input(description)
         return input_analysis
     except Exception as e:
-        st.error(f"An error occurred: {str(e)}")
 def generate_briefs(description, input_analysis, generating_batch_size, model_name, temperature):
@@ -57,7 +62,8 @@ def generate_briefs(description, input_analysis, generating_batch_size, model_na
             description, input_analysis, generating_batch_size)
         return briefs
     except Exception as e:
-        st.error(f"An error occurred: {str(e)}")
 def generate_examples_from_briefs(description, new_example_briefs, input_str, generating_batch_size, model_name, temperature):
@@ -71,7 +77,8 @@ def generate_examples_from_briefs(description, new_example_briefs, input_str, ge
                     for example in result["examples"]]
         return examples
     except Exception as e:
-        st.error(f"An error occurred: {str(e)}")
 def generate_examples_directly(description, raw_example, generating_batch_size, model_name, temperature):
@@ -85,7 +92,8 @@ def generate_examples_directly(description, raw_example, generating_batch_size,
                     for example in result["examples"]]
         return examples
     except Exception as e:
-        st.error(f"An error occurred: {str(e)}")
 def example_directly_selected():
@@ -142,6 +150,9 @@ if 'input_data' not in st.session_state:
 if 'description_output_text' not in st.session_state:
     st.session_state.description_output_text = ''
 if 'input_analysis_output_text' not in st.session_state:
     st.session_state.input_analysis_output_text = ''
@@ -169,8 +180,9 @@ if 'selected_example' not in st.session_state:
 def update_description_output_text():
     input_json = package_input_data()
-    st.session_state.description_output_text = generate_description_only(
-        input_json, model_name, temperature)
 def update_input_analysis_output_text():
@@ -203,8 +215,9 @@ def generate_examples_dataframe():
     input_json = package_input_data()
     result = process_json(input_json, model_name,
                           generating_batch_size, temperature)
-    description, examples_directly, input_analysis, new_example_briefs, examples_from_briefs, examples = result
     st.session_state.description_output_text = description
     st.session_state.examples_directly_dataframe = pd.DataFrame(
         examples_directly, columns=["Input", "Output"])
     st.session_state.input_analysis_output_text = input_analysis
@@ -239,6 +252,12 @@ def import_input_data_from_json():
     except Exception as e:
         st.error(f"Failed to import JSON: {str(e)}")
 # Streamlit UI
 st.title("LLM Task Example Generator")
@@ -288,6 +307,13 @@ with st.expander("Description and Analysis"):
     description_output = st.text_area(
         "Description", value=st.session_state.description_output_text, height=100)
     col3, col4 = st.columns(2)
     with col3:
         generate_examples_directly_button = st.button(
@@ -327,3 +353,4 @@ def show_sidebar():
             st.button("Append to Input Data", on_click=append_selected_to_input_data)
 show_sidebar()

         generator = TaskDescriptionGenerator(model)
         result = generator.process(input_json, generating_batch_size)
         description = result["description"]
+        suggestions = result["suggestions"]
         examples_directly = [[example["input"], example["output"]]
                              for example in result["examples_directly"]["examples"]]
         input_analysis = result["examples_from_briefs"]["input_analysis"]
                                 for example in result["examples_from_briefs"]["examples"]]
         examples = [[example["input"], example["output"]]
                     for example in result["additional_examples"]]
+        return description, suggestions, examples_directly, input_analysis, new_example_briefs, examples_from_briefs, examples
     except Exception as e:
         st.warning(f"An error occurred: {str(e)}. Returning default values.")
+        return "", [], [], "", [], [], []
 def generate_description_only(input_json, model_name, temperature):
         model = ChatOpenAI(
             model=model_name, temperature=temperature, max_retries=3)
         generator = TaskDescriptionGenerator(model)
+        result = generator.generate_description(input_json)
+        description = result["description"]
+        suggestions = result["suggestions"]
+        return description, suggestions
     except Exception as e:
+        st.warning(f"An error occurred: {str(e)}")
+        return "", []
 def analyze_input(description, model_name, temperature):
         input_analysis = generator.analyze_input(description)
         return input_analysis
     except Exception as e:
+        st.warning(f"An error occurred: {str(e)}")
+        return ""
 def generate_briefs(description, input_analysis, generating_batch_size, model_name, temperature):
             description, input_analysis, generating_batch_size)
         return briefs
     except Exception as e:
+        st.warning(f"An error occurred: {str(e)}")
+        return ""
 def generate_examples_from_briefs(description, new_example_briefs, input_str, generating_batch_size, model_name, temperature):
                     for example in result["examples"]]
         return examples
     except Exception as e:
+        st.warning(f"An error occurred: {str(e)}")
+        return []
 def generate_examples_directly(description, raw_example, generating_batch_size, model_name, temperature):
                     for example in result["examples"]]
         return examples
     except Exception as e:
+        st.warning(f"An error occurred: {str(e)}")
+        return []
 def example_directly_selected():
 if 'description_output_text' not in st.session_state:
     st.session_state.description_output_text = ''
+if 'suggestions' not in st.session_state:
+    st.session_state.suggestions = []
 if 'input_analysis_output_text' not in st.session_state:
     st.session_state.input_analysis_output_text = ''
 def update_description_output_text():
     input_json = package_input_data()
+    result = generate_description_only(input_json, model_name, temperature)
+    st.session_state.description_output_text = result[0]
+    st.session_state.suggestions = result[1]
 def update_input_analysis_output_text():
     input_json = package_input_data()
     result = process_json(input_json, model_name,
                           generating_batch_size, temperature)
+    description, suggestions, examples_directly, input_analysis, new_example_briefs, examples_from_briefs, examples = result
     st.session_state.description_output_text = description
+    st.session_state.suggestions = suggestions  # Ensure suggestions are stored in session state
     st.session_state.examples_directly_dataframe = pd.DataFrame(
         examples_directly, columns=["Input", "Output"])
     st.session_state.input_analysis_output_text = input_analysis
     except Exception as e:
         st.error(f"Failed to import JSON: {str(e)}")
+def apply_suggestions():
+    result = TaskDescriptionGenerator(
+        ChatOpenAI(model=model_name, temperature=temperature, max_retries=3)).update_description(
+        package_input_data(), st.session_state.description_output_text, st.session_state.selected_suggestions)
+    st.session_state.description_output_text = result["description"]
+    st.session_state.suggestions = result["suggestions"]
 # Streamlit UI
 st.title("LLM Task Example Generator")
     description_output = st.text_area(
         "Description", value=st.session_state.description_output_text, height=100)
+    # Add multiselect for suggestions
+    selected_suggestions = st.multiselect(
+        "Suggestions", options=st.session_state.suggestions, key="selected_suggestions")
+    # Add button to apply suggestions
+    apply_suggestions_button = st.button("Apply Suggestions", on_click=apply_suggestions)
     col3, col4 = st.columns(2)
     with col3:
         generate_examples_directly_button = st.button(
             st.button("Append to Input Data", on_click=append_selected_to_input_data)
 show_sidebar()

meta_prompt/sample_generator.py CHANGED Viewed

@@ -21,6 +21,173 @@ Task Description: [Your description here]
 """)
 ]
 INPUT_ANALYSIS_PROMPT = [
     ("system", """For the specific task type, analyze the possible task inputs across multiple dimensions.
@@ -78,16 +245,16 @@ not present in the briefs.
 Format your response as a valid JSON object with a single key 'examples'
 containing a JSON array of {generating_batch_size} objects, each with 'input' and 'output' fields.
-"""),
-    ("user", """Task Description:
 {description}
-New Example Briefs:
 {new_example_briefs}
-Example(s):
 {raw_example}
@@ -100,12 +267,12 @@ new input/output examples for this task type.
 Format your response as a valid JSON object with a single key 'examples'
 containing a JSON array of {generating_batch_size} objects, each with 'input' and 'output' fields.
-"""),
-    ("user", """Task Description:
 {description}
-Example(s):
 {raw_example}
@@ -116,6 +283,9 @@ Example(s):
 class TaskDescriptionGenerator:
     def __init__(self, model):
         self.description_prompt = ChatPromptTemplate.from_messages(DESCRIPTION_PROMPT)
         self.input_analysis_prompt = ChatPromptTemplate.from_messages(INPUT_ANALYSIS_PROMPT)
         self.briefs_prompt = ChatPromptTemplate.from_messages(BRIEFS_PROMPT)
         self.examples_from_briefs_prompt = ChatPromptTemplate.from_messages(EXAMPLES_FROM_BRIEFS_PROMPT)
@@ -127,6 +297,9 @@ class TaskDescriptionGenerator:
         json_parse = JsonOutputParser()
         self.description_chain = self.description_prompt | model | output_parser
         self.input_analysis_chain = self.input_analysis_prompt | model | output_parser
         self.briefs_chain = self.briefs_prompt | model | output_parser
         self.examples_from_briefs_chain = self.examples_from_briefs_prompt | json_model | json_parse
@@ -137,14 +310,18 @@ class TaskDescriptionGenerator:
         self.chain = (
             self.input_loader
-            | RunnablePassthrough.assign(raw_example = lambda x: json.dumps(x["example"], ensure_ascii=False))
-            | RunnablePassthrough.assign(description = self.description_chain)
             | {
                 "description": lambda x: x["description"],
-                "examples_from_briefs": RunnablePassthrough.assign(input_analysis = self.input_analysis_chain)
-                    | RunnablePassthrough.assign(new_example_briefs = self.briefs_chain)
-                    | RunnablePassthrough.assign(examples = self.examples_from_briefs_chain | (lambda x: x["examples"])),
-                "examples_directly": self.examples_directly_chain
             }
             | RunnablePassthrough.assign(
                 additional_examples=lambda x: (
@@ -154,29 +331,35 @@ class TaskDescriptionGenerator:
             )
         )
-    def load_and_validate_input(self, input_dict):
-        input_str = input_dict["input_str"]
-        generating_batch_size = input_dict["generating_batch_size"]
         try:
             try:
-                example_dict = json.loads(input_str)
-            except ValueError:
-                try:
-                    example_dict = yaml.safe_load(input_str)
-                except yaml.YAMLError as e:
-                    raise ValueError("Invalid input format. Expected a JSON or YAML object.") from e
-            # If example_dict is a list, filter out invalid items
-            if isinstance(example_dict, list):
-                example_dict = [item for item in example_dict if isinstance(item, dict) and 'input' in item and 'output' in item]
-            # If example_dict is not a list, check if it's a valid dict
-            elif not isinstance(example_dict, dict) or 'input' not in example_dict or 'output' not in example_dict:
-                raise ValueError("Invalid input format. Expected an object with 'input' and 'output' fields.")
             # Move the original content to a key named 'example'
-            input_dict = {"example": example_dict, "generating_batch_size": generating_batch_size}
             return input_dict
@@ -191,13 +374,43 @@ class TaskDescriptionGenerator:
     def generate_description(self, input_str, generating_batch_size=3):
         chain = (
             self.input_loader
-            | RunnablePassthrough.assign(raw_example = lambda x: json.dumps(x["example"], ensure_ascii=False))
-            | self.description_chain
         )
         return chain.invoke({
             "input_str": input_str,
             "generating_batch_size": generating_batch_size
         })
     def analyze_input(self, description):
         return self.input_analysis_chain.invoke(description)

 """)
 ]
+DESCRIPTION_UPDATING_PROMPT = [
+    ("system", """Given the task type description and suggestions, update the task type description according to the suggestions.
+1. Input Information:
+   - You will receive a task type description and suggestions for updating the description.
+   - Carefully read and understand the provided information.
+2. Task Analysis:
+   - Identify the core elements and characteristics of the task.
+   - Consider possible generalization dimensions such as task domain, complexity, input/output format, application scenarios, etc.
+3. Update Task Description:
+   - Apply the suggestions to update the task description. Don't change anything that is not suggested.
+   - Ensure the updated description is clear, specific, and directly related to the task.
+4. Output Format:
+   - Format your response as follows:
+Task Description: [Your updated description here]
+   - Output the updated `Task Description` only. Don't output anything else.
+5. Completeness Check:
+   - Ensure all important aspects of the task description are covered.
+   - Check for any missing key information or dimensions.
+6. Quantity Requirement:
+   - Provide at least 5 specification suggestions across different dimensions.
+"""),
+    ("user", """***Task Description:***
+{description}
+***Suggestions:***
+{suggestions}
+""")
+]
+SPECIFICATION_SUGGESTIONS_PROMPT = [
+    ("system", """Based on the given task type description and corresponding input/output examples, list suggestions to specify the task type description in multiple dimensions using JSON format.
+Please complete this task according to the following requirements:
+1. Analyze the given task type description and input/output examples.
+2. Identify multiple dimensions to specify the task description, such as:
+   - Task purpose
+   - Input format requirements
+   - Output format requirements
+   - Processing steps
+   - Evaluation criteria
+   - Constraints
+   - Special case handling
+   - Other relevant dimensions
+3. Output Format:
+   - Use JSON format to list the suggestions.
+   - The JSON structure should contain a top-level array, with each object representing a suggestion for a specific dimension.
+4. Suggestion Content:
+   - Each suggestion should be clear, specific, and directly related to the task description or input/output examples.
+   - Start each suggestion with a verb, such as "Limit the scope of supported tasks to..." Make sure it is an actionable, self-contained, and complete suggestion.
+   - Ensure suggestions are compatible with the provided input/output examples.
+5. Output Example:
+   ```json
+   {{
+     "suggestions": [
+       {{
+         "suggestion": "..."
+       }},
+       {{
+       "suggestion": "..."
+     }},
+     ...
+    ]
+   }}
+   ```
+6. Completeness Check:
+   - Ensure all important aspects of the task description are covered.
+   - Check for any missing key information or dimensions.
+7. Quantity Requirement:
+   - Provide at least 5 specification suggestions across different dimensions.
+Please begin the task directly. After completion, verify that your JSON output meets all requirements and directly addresses the task needs.
+***Task Description:***
+{description}
+***Example(s):***
+{raw_example}
+""")
+]
+GENERALIZATION_SUGGESTIONS_PROMPT = [
+    ("system", """Based on a given task type description and corresponding input/output examples, list suggestions for generalizing the task type description across multiple dimensions in JSON format.
+Please complete this task according to the following requirements:
+1. Input Information:
+   - You will receive a task type description and corresponding input/output examples.
+   - Carefully read and understand the provided information.
+2. Task Analysis:
+   - Identify the core elements and characteristics of the task.
+   - Consider possible generalization dimensions such as task domain, complexity, input/output format, application scenarios, etc.
+3. Generate Generalization Suggestions:
+   - Based on your analysis, propose generalization suggestions across multiple dimensions.
+   - Each suggestion should be a reasonable extension or variation of the original task type.
+   - Start each suggestion with a verb, such as "Expand the scope of support to..." Make sure it is an actionable, self-contained, and complete suggestion.
+4. Output Format:
+   - Use JSON format to list the suggestions.
+   - The JSON structure should contain a top-level array, with each object representing a suggestion for a specific dimension.
+5. Output Example:
+   ```json
+   {{
+     "suggestions": [
+       {{
+         "suggestion": "..."
+       }},
+       {{
+       "suggestion": "..."
+     }},
+     ...
+    ]
+   }}
+   ```
+6. Quality Requirements:
+   - Ensure each generalization suggestion is meaningful and feasible.
+   - Provide diverse generalization dimensions to cover different aspects of possible extensions.
+   - Maintain conciseness and clarity in suggestions.
+7. Quantity Requirement:
+   - Provide at least 5 generalization suggestions across different dimensions.
+8. Notes:
+   - Avoid proposing generalizations completely unrelated to the original task.
+   - Ensure the JSON format is correct and can be parsed.
+After completing the task, please check if your output meets all requirements, especially the correctness of the JSON format and the quality of generalization suggestions.
+***Task Description:***
+{description}
+***Example(s):***
+{raw_example}
+""")
+]
 INPUT_ANALYSIS_PROMPT = [
     ("system", """For the specific task type, analyze the possible task inputs across multiple dimensions.
 Format your response as a valid JSON object with a single key 'examples'
 containing a JSON array of {generating_batch_size} objects, each with 'input' and 'output' fields.
+***Task Description:***
 {description}
+***New Example Briefs:***
 {new_example_briefs}
+***Example(s):***
 {raw_example}
 Format your response as a valid JSON object with a single key 'examples'
 containing a JSON array of {generating_batch_size} objects, each with 'input' and 'output' fields.
+***Task Description:***
 {description}
+***Example(s):***
 {raw_example}
 class TaskDescriptionGenerator:
     def __init__(self, model):
         self.description_prompt = ChatPromptTemplate.from_messages(DESCRIPTION_PROMPT)
+        self.description_updating_prompt = ChatPromptTemplate.from_messages(DESCRIPTION_UPDATING_PROMPT)
+        self.specification_suggestions_prompt = ChatPromptTemplate.from_messages(SPECIFICATION_SUGGESTIONS_PROMPT)
+        self.generalization_suggestions_prompt = ChatPromptTemplate.from_messages(GENERALIZATION_SUGGESTIONS_PROMPT)
         self.input_analysis_prompt = ChatPromptTemplate.from_messages(INPUT_ANALYSIS_PROMPT)
         self.briefs_prompt = ChatPromptTemplate.from_messages(BRIEFS_PROMPT)
         self.examples_from_briefs_prompt = ChatPromptTemplate.from_messages(EXAMPLES_FROM_BRIEFS_PROMPT)
         json_parse = JsonOutputParser()
         self.description_chain = self.description_prompt | model | output_parser
+        self.description_updating_chain = self.description_updating_prompt | model | output_parser
+        self.specification_suggestions_chain = self.specification_suggestions_prompt | json_model | json_parse
+        self.generalization_suggestions_chain = self.generalization_suggestions_prompt | json_model | json_parse
         self.input_analysis_chain = self.input_analysis_prompt | model | output_parser
         self.briefs_chain = self.briefs_prompt | model | output_parser
         self.examples_from_briefs_chain = self.examples_from_briefs_prompt | json_model | json_parse
         self.chain = (
             self.input_loader
+            | RunnablePassthrough.assign(raw_example=lambda x: json.dumps(x["example"], ensure_ascii=False))
+            | RunnablePassthrough.assign(description=self.description_chain)
             | {
                 "description": lambda x: x["description"],
+                "examples_from_briefs": RunnablePassthrough.assign(input_analysis=self.input_analysis_chain)
+                | RunnablePassthrough.assign(new_example_briefs=self.briefs_chain)
+                | RunnablePassthrough.assign(examples=self.examples_from_briefs_chain | (lambda x: x["examples"])),
+                "examples_directly": self.examples_directly_chain,
+                "suggestions": {
+                    "specification": self.specification_suggestions_chain,
+                    "generalization": self.generalization_suggestions_chain
+                } | RunnableLambda(lambda x: [item['suggestion'] for sublist in [v['suggestions'] for v in x.values()] for item in sublist])
             }
             | RunnablePassthrough.assign(
                 additional_examples=lambda x: (
             )
         )
+    def parse_input_str(self, input_str):
         try:
+            example_dict = json.loads(input_str)
+        except ValueError:
             try:
+                example_dict = yaml.safe_load(input_str)
+            except yaml.YAMLError as e:
+                raise ValueError("Invalid input format. Expected a JSON or YAML object.") from e
+        # If example_dict is a list, filter out invalid items
+        if isinstance(example_dict, list):
+            example_dict = [item for item in example_dict if isinstance(item, dict) and 'input' in item and 'output' in item]
+        # If example_dict is not a list, check if it's a valid dict
+        elif not isinstance(example_dict, dict) or 'input' not in example_dict or 'output' not in example_dict:
+            raise ValueError("Invalid input format. Expected an object with 'input' and 'output' fields.")
+        return example_dict
+    def load_and_validate_input(self, input_dict):
+        input_str = input_dict["input_str"]
+        generating_batch_size = input_dict.get("generating_batch_size")
+        try:
+            example_dict = self.parse_input_str(input_str)
             # Move the original content to a key named 'example'
+            input_dict = {"example": example_dict}
+            if generating_batch_size is not None:
+                input_dict["generating_batch_size"] = generating_batch_size
             return input_dict
     def generate_description(self, input_str, generating_batch_size=3):
         chain = (
             self.input_loader
+            | RunnablePassthrough.assign(raw_example=lambda x: json.dumps(x["example"], ensure_ascii=False))
+            | RunnablePassthrough.assign(description=self.description_chain)
+            | {
+                "description": lambda x: x["description"],
+                "suggestions": {
+                    "specification": self.specification_suggestions_chain,
+                    "generalization": self.generalization_suggestions_chain
+                } | RunnableLambda(lambda x: [item['suggestion'] for sublist in [v['suggestions'] for v in x.values()] for item in sublist])
+            }
         )
         return chain.invoke({
             "input_str": input_str,
             "generating_batch_size": generating_batch_size
         })
+    def update_description(self, input_str, description, suggestions):
+        # package array suggestions into a JSON array
+        suggestions_str = json.dumps(suggestions, ensure_ascii=False)
+        # return the updated description with new suggestions
+        chain = (
+            RunnablePassthrough.assign(
+                description=self.description_updating_chain
+            )
+            | {
+                "description": lambda x: x["description"],
+                "suggestions": {
+                    "specification": self.specification_suggestions_chain,
+                    "generalization": self.generalization_suggestions_chain
+                } | RunnableLambda(lambda x: [item['suggestion'] for sublist in [v['suggestions'] for v in x.values()] for item in sublist])
+            }
+        )
+        return chain.invoke({
+            "raw_example": input_str,
+            "description": description,
+            "suggestions": suggestions_str
+        })
     def analyze_input(self, description):
         return self.input_analysis_chain.invoke(description)