diff --git "a/demo/langgraph_meta_prompt.ipynb" "b/demo/langgraph_meta_prompt.ipynb" new file mode 100644--- /dev/null +++ "b/demo/langgraph_meta_prompt.ipynb" @@ -0,0 +1,1614 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Uncomment the following line to install the required packages\n", + "\n", + "# %pip install langchain openai langchain_openai langchain_core langgraph" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Not running in Google Colab\n" + ] + } + ], + "source": [ + "import sys\n", + "import os\n", + "\n", + "if 'google.colab' in sys.modules:\n", + " print(\"Running in Google Colab\")\n", + " from google.colab import userdata\n", + "\n", + " # get secret openai_api_key and set it to OS env OPENAI_API_KEY\n", + " try:\n", + " openai_api_key = userdata.get('openai_api_key')\n", + " os.environ['OPENAI_API_KEY'] = openai_api_key\n", + " except:\n", + " print(\"No openai_api_key found in Google Colab\")\n", + "\n", + " # get secret openai_base_url\n", + " try:\n", + " openai_base_url = userdata.get('openai_base_url')\n", + " os.environ['OPENAI_API_BASE'] = openai_base_url\n", + " except:\n", + " print(\"No openai_base_url found in Google Colab\")\n", + "else:\n", + " print(\"Not running in Google Colab\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import Annotated, Sequence, Dict, Any\n", + "\n", + "from typing_extensions import TypedDict\n", + "\n", + "from langchain_openai import ChatOpenAI\n", + "\n", + "from langgraph.graph import StateGraph, END\n", + "from langgraph.graph.message import add_messages\n", + "from langgraph.checkpoint.memory import MemorySaver\n", + "from langchain_core.messages import HumanMessage, SystemMessage, BaseMessage\n", + "from langchain_core.prompts import ChatPromptTemplate\n", + "from langchain_core.pydantic_v1 import BaseModel\n", + "\n", + "import operator\n", + "import random\n", + "\n", + "# Can converge correctly\n", + "\n", + "# MODEL_NAME = \"anthropic/claude-3.5-sonnet:beta\"\n", + "# MODEL_NAME = \"openai/gpt-4o\"\n", + "# MODEL_NAME = \"openai/gpt-4-turbo\"\n", + "# MODEL_NAME = \"llama3-70b-8192\"\n", + "# MODEL_NAME = \"meta-llama/llama-3-70b-instruct\"\n", + "# MODEL_NAME = \"deepseek/deepseek-chat\"\n", + "# MODEL_NAME = \"qwen/qwen-2-72b-instruct\"\n", + "\n", + "# Failed to converge correctly\n", + "\n", + "# MODEL_NAME = \"llama3-8b-8192\"\n", + "# MODEL_NAME = \"mistralai/mixtral-8x22b-instruct\"\n", + "# MODEL_NAME = \"anthropic/claude-3-haiku:beta\"\n", + "MODEL_NAME = \"google/gemma-2-9b-it\"\n", + "# MODEL_NAME = \"meta-llama/llama-3-8b-instruct\"\n", + "# MODEL_NAME = \"microsoft/phi-3-medium-128k-instruct\"\n", + "# MODEL_NAME = \"mixtral-8x7b-32768\"\n", + "# MODEL_NAME = \"cohere/command-r\"\n", + "\n", + "llm = ChatOpenAI(model_name=MODEL_NAME, temperature=0.5)\n", + "\n", + "# EXECUTOR_MODEL = \"microsoft/phi-3-medium-128k-instruct\"\n", + "# EXECUTOR_MODEL = \"deepseek/deepseek-chat\"\n", + "# EXECUTOR_MODEL = \"gemma-7b-it\"\n", + "# EXECUTOR_MODEL = \"llama3-8b-8192\"\n", + "# EXECUTOR_MODEL = \"llama3-70b-8192\"\n", + "# EXECUTOR_MODEL = \"mixtral-8x7b-32768\"\n", + "# EXECUTOR_MODEL = \"anthropic/claude-3-haiku:beta\"\n", + "# EXECUTOR_MODEL = \"meta-llama/llama-3-8b-instruct\"\n", + "EXECUTOR_MODEL = \"google/gemma-2-9b-it\"\n", + "# EXECUTOR_MODEL = \"anthropic/claude-3.5-sonnet:beta\"\n", + "\n", + "executor_llm = ChatOpenAI(model_name=EXECUTOR_MODEL, temperature=0.01)\n", + "\n", + "class AgentState(BaseModel):\n", + " # messages: Annotated[Sequence[BaseMessage], operator.add] = []\n", + " acceptance_criteria: str = \"Exactly text match.\"\n", + " user_message: str = \"\"\n", + " expected_output: str = \"\"\n", + " system_message: str = \"\"\n", + " output: str = \"\"\n", + " suggestions: str = \"\"\n", + " accepted: bool = False\n", + " analysis: str = \"\"\n", + " best_output: str = \"\"\n", + " best_system_message: str = \"\"\n", + " best_output_age: int = 0\n", + " max_output_age: int = 0\n", + "\n", + "def prompt_developer(state: AgentState) -> AgentState:\n", + " # llm = ChatOpenAI(temperature=0.1)\n", + " \n", + " if not state.system_message:\n", + " # Initial system message creation\n", + " initial_prompt = ChatPromptTemplate.from_messages([\n", + " (\"system\", \"\"\"# Expert Prompt Engineer\n", + "\n", + "You are an expert prompt engineer tasked with creating system messages for AI\n", + "assistants.\n", + "\n", + "## Instructions\n", + "\n", + "1. Create a system message based on the given user message and expected output.\n", + "2. Ensure the system message can handle similar user messages.\n", + "3. Output only the system message, without any additional content.\n", + "4. Expected Output text should not appear in System Message as an example. But\n", + " it's OK to use some similar text as an example instead.\n", + "5. Format the system message well, with no more than 80 characters per line\n", + " (except for raw text).\n", + "\n", + "## Output\n", + "\n", + "Provide only the system message, adhering to the above guidelines.\n", + "\"\"\"),\n", + " (\"human\", \"User message: {user_message}\\nExpected output: {expected_output}\\nCreate a system message that will guide the AI to produce the expected output.\")\n", + " ])\n", + " response = llm(initial_prompt.format_messages(\n", + " user_message=state.user_message, \n", + " expected_output=state.expected_output\n", + " ))\n", + " state.system_message = response.content\n", + " else:\n", + " # Update system message based on analysis\n", + " update_prompt = ChatPromptTemplate.from_messages([\n", + " (\"system\", \"\"\"# Expert Prompt Engineer\n", + "\n", + "You are an expert prompt engineer tasked with updating system messages for AI\n", + "assistants. You Update System Message according to Suggestions, to improve\n", + "Output and match Expected Output more closely.\n", + "\n", + "## Instructions\n", + "\n", + "1. Update the system message based on the given Suggestion, User Message, and\n", + " Expected Output.\n", + "2. Ensure the updated system message can handle similar user messages.\n", + "3. Modify only the content mentioned in the Suggestion. Do not change the\n", + " parts that are not related to the Suggestion.\n", + "4. Output only the updated system message, without any additional content.\n", + "5. Expected Output text should not appear in System Message as an example. But\n", + " it's OK to use some similar text as an example instead.\n", + " * Remove the Expected Output text or text highly similar to Expected Output\n", + " from System Message, if it's present.\n", + "6. Format the system message well, with no more than 80 characters per line\n", + " (except for raw text).\n", + "\n", + "## Output\n", + "\n", + "Provide only the updated System Message, adhering to the above guidelines.\n", + "\"\"\"),\n", + " (\"human\", \"\"\"Current system message: {system_message}\n", + "# User Message\n", + "\n", + "{user_message}\n", + "\n", + "# Expected Output\n", + "\n", + "{expected_output}\n", + "\n", + "# Suggestions\n", + "\n", + "{suggestions}\n", + "\"\"\")\n", + " ])\n", + " response = llm(update_prompt.format_messages(**state.dict()))\n", + " state.system_message = response.content\n", + " print(state.system_message)\n", + "\n", + " # state.messages.append(SystemMessage(content=state.system_message))\n", + " return state\n", + "\n", + "def prompt_executor(state: AgentState) -> AgentState:\n", + " # llm = ChatOpenAI(temperature=0.1)\n", + " messages = [\n", + " SystemMessage(content=state.system_message),\n", + " HumanMessage(content=state.user_message)\n", + " ]\n", + " response = executor_llm(messages)\n", + " state.output = response.content\n", + " # state.messages.append(HumanMessage(content=state.user_message))\n", + " # state.messages.append(response)\n", + "\n", + " print(response.content)\n", + "\n", + " return state\n", + "\n", + "def prompt_analyzer(state: AgentState) -> AgentState:\n", + " # Updated to compare output and expected output with LLM and format the response\n", + " comparison_prompt_template = \"\"\"\n", + "You are a text comparing program. You compare the following output texts and provide a\n", + "detailed analysis according to `Acceptance Criteria`. Then you decide whether `Actual Output`\n", + "is acceptable.\n", + "\n", + "Provide your analysis in the following format:\n", + "\n", + "```\n", + "- Acceptable Differences: [List acceptable differences succinctly]\n", + "- Unacceptable Differences: [List unacceptable differences succinctly]\n", + "- Accept: [Yes/No]\n", + "```\n", + "\n", + "* Compare Expected Output and Actual Output with the guidance of Accept Criteria.\n", + "* Only set 'Accept' to 'Yes', if Accept Criteria are all met. Otherwise, set 'Accept' to 'No'.\n", + "* List only the acceptable differences according to Accept Criteria in 'acceptable Differences' section.\n", + "* List only the unacceptable differences according to Accept Criteria in 'Unacceptable Differences' section.\n", + "\n", + "# Acceptance Criteria\n", + "\n", + "```\n", + "{acceptance_criteria}\n", + "```\n", + "\"\"\"\n", + " human_prompt_template = \"\"\"\n", + "# Expected Output\n", + "\n", + "```\n", + "{expected_output}\n", + "```\n", + "\n", + "# Actual Output\n", + "\n", + "```\n", + "{output}\n", + "```\n", + "\"\"\"\n", + "\n", + " comparison_prompt = ChatPromptTemplate.from_messages([\n", + " (\"system\", comparison_prompt_template),\n", + " (\"human\", human_prompt_template)\n", + " ])\n", + " \n", + " # Format the prompt with the current state\n", + " formatted_prompt = comparison_prompt.format_messages(**state.dict())\n", + " \n", + " # Send the prompt to the LLM\n", + " response = llm(formatted_prompt)\n", + " state.analysis = response.content\n", + "\n", + " print(response.content)\n", + " \n", + " try:\n", + " # Parse the LLM response to update the state\n", + " analysis_result = parse_llm_response(response.content)\n", + " \n", + " # Update state.matched based on the LLM's analysis\n", + " state.accepted = analysis_result['Accept'].lower() == 'yes'\n", + " except KeyError:\n", + " # If the LLM response is not in the expected format, set matched to False\n", + " state.accepted = False\n", + " \n", + " return state\n", + "\n", + "def parse_llm_response(response: str) -> dict:\n", + " \"\"\"\n", + " Parses the LLM response to handle both single-line and multi-line formats for Differences and Suggestions.\n", + " \"\"\"\n", + " lines = response.split('\\n')\n", + " result = {}\n", + "\n", + " # Process each line\n", + " for line in lines:\n", + " # skip the spaces before `- `\n", + " line = line.strip()\n", + " if line.startswith('- Accept:'):\n", + " result['Accept'] = line.split(': ')[1].strip().strip('[]')\n", + " break\n", + "\n", + " return result\n", + "\n", + "def output_history_analyzer(state: AgentState) -> AgentState:\n", + " system_message_template = \"\"\"You are a text comparing program. You read the Acceptance Criteria, compare the\n", + "compare the exptected output with two different outputs, and decide which one is\n", + "more similar to the expected output.\n", + "\n", + "You output the following analysis according to the Acceptance Criteria:\n", + "\n", + "* Your analysis in a Markdown list.\n", + "* The ID of the output that is more similar to the Expected Output as Preferred\n", + " Output ID, with the following format:\n", + " \n", + "```\n", + "# Analysis\n", + "\n", + "...\n", + "\n", + "# Preferred Output ID: [ID]\n", + "```\n", + "\n", + "If both outputs are equally similar to the expected output, output the following:\n", + "\n", + "```\n", + "# Analysis\n", + "\n", + "...\n", + "\n", + "# Draw\n", + "```\n", + "\"\"\"\n", + " human_message_templates = [\n", + " \"\"\"\n", + "# Output ID: A\n", + "\n", + "```\n", + "{best_output}\n", + "```\n", + "\n", + "# Output ID: B\n", + "\n", + "```\n", + "{output}\n", + "```\n", + "\n", + "# Acceptance Criteria\n", + "\n", + "{acceptance_criteria}\n", + "\n", + "# Expected Output\n", + "\n", + "```\n", + "{expected_output}\n", + "```\n", + "\"\"\",\n", + " \"\"\"\n", + "# Output ID: B\n", + "\n", + "```\n", + "{output}\n", + "```\n", + "\n", + "# Output ID: A\n", + "\n", + "```\n", + "{best_output}\n", + "```\n", + "\n", + "# Acceptance Criteria\n", + "\n", + "{acceptance_criteria}\n", + " \n", + "# Expected Output\n", + "\n", + "```\n", + "{expected_output}\n", + "```\n", + "\"\"\"\n", + " ]\n", + "\n", + " # pick a random human message template\n", + " output_comparison_prompt_template = ChatPromptTemplate.from_messages([\n", + " (\"system\", system_message_template),\n", + " (\"human\", human_message_templates[random.randint(0, 1)])\n", + " ])\n", + "\n", + " if (state.best_output is None or state.best_output == \"\") and \\\n", + " (state.best_system_message is None or state.best_system_message == \"\"):\n", + " state.best_output = state.output\n", + " state.best_system_message = state.system_message\n", + " state.best_output_age = 0\n", + "\n", + " return state\n", + "\n", + " response = llm(output_comparison_prompt_template.format_messages(**state.dict()))\n", + "\n", + " print(response.content)\n", + "\n", + " result = parse_output_history_analyzer(response.content, 'A')\n", + "\n", + " if result == 'B':\n", + " state.best_output = state.output\n", + " state.best_system_message = state.system_message\n", + " state.best_output_age = 0\n", + " else:\n", + " state.best_output_age += 1\n", + " state.output = state.best_output\n", + " state.system_message = state.best_system_message\n", + "\n", + " print(\"Best Output Age: \", state.best_output_age)\n", + "\n", + " return state\n", + "\n", + "def parse_output_history_analyzer(response: str, default_result = None) -> dict:\n", + " \"\"\"\n", + " Parses the LLM response to handle both single-line and multi-line formats for Differences and Suggestions.\n", + " \"\"\"\n", + " lines = response.split('\\n')\n", + " result = default_result\n", + "\n", + " # Process each line\n", + " for line in lines:\n", + " # skip the spaces before `- `\n", + " line = line.strip()\n", + " if line.startswith('# Preferred Output ID:'):\n", + " result = line.split(': ')[1].strip().strip('[]')\n", + " break\n", + " elif line.startswith('# Draw'): \n", + " result = default_result\n", + " break\n", + "\n", + " print(\"Result: \", result)\n", + "\n", + " return result\n", + "\n", + "def prompt_suggester(state: AgentState) -> AgentState:\n", + " # Updated to compare output and expected output with LLM and format the response\n", + " suggester_prompt_template = \"\"\"\n", + "Read the following inputs and outputs of an LLM prompt, and also analysis about them.\n", + "Then suggest how to improve System Prompt.\n", + "\n", + "* The goal is to improve the System Prompt to match the Expected Output better.\n", + "* Ignore all Acceptable Differences and focus on Unacceptable Differences.\n", + "* Suggest formal changes first, then semantic changes.\n", + "* Provide your suggestions in a Markdown list, nothing else. Output only the\n", + " suggestions related with Unacceptable Differences.\n", + " * Use `... should ...` to clearly state the desired output.\n", + " * Figue out the contexts of the System Message that conflict with the suggestions,\n", + " and suggest modification or deletion.\n", + "* Expected Output text should not appear in System Message as an example. But\n", + " it's OK to use some similar text as an example instead.\n", + " * Ask to remove the Expected Output text or text highly similar to Expected Output\n", + " from System Message, if it's present.\n", + "* Provide format examples or detected format name, if System Message does not.\n", + " * Specify the detected format name (e.g. XML, JSON, etc.) of Expected Output, if\n", + " System Message does not mention it.\n", + "\"\"\"\n", + " human_prompt_template = \"\"\"\n", + "System Prompt:\n", + "```\n", + "{system_message}\n", + "```\n", + "User Message:\n", + "```\n", + "{user_message}\n", + "```\n", + "Expected Output: \n", + "```\n", + "{expected_output}\n", + "```\n", + "Actual Output: \n", + "```\n", + "{output}\n", + "```\n", + "\n", + "Acceptance Criteria:\n", + "```\n", + "{acceptance_criteria}\n", + "```\n", + "\n", + "Analysis:\n", + "```\n", + "{analysis}\n", + "```\n", + "\"\"\"\n", + "\n", + " suggester_prompt = ChatPromptTemplate.from_messages([\n", + " (\"system\", suggester_prompt_template),\n", + " (\"human\", human_prompt_template)\n", + " ])\n", + " \n", + " # Format the prompt with the current state\n", + " formatted_prompt = suggester_prompt.format_messages(**state.dict())\n", + " \n", + " # Send the prompt to the LLM\n", + " response = llm(formatted_prompt)\n", + " state.suggestions = response.content\n", + "\n", + " print(response.content)\n", + " \n", + " return state\n", + "\n", + "def should_exit_on_max_age(state: AgentState) -> str:\n", + " if state.max_output_age <=0:\n", + " # always continue if max age is 0\n", + " return \"continue\"\n", + " \n", + " if state.best_output_age >= state.max_output_age:\n", + " return END\n", + " \n", + " if state.best_output_age > 0:\n", + " # skip prompt_analyzer and prompt_suggester, goto prompt_developer\n", + " return \"rerun\" \n", + " \n", + " return \"continue\"\n", + "\n", + "def should_exit_on_acceptable_output(state: AgentState) -> str:\n", + " if state.accepted:\n", + " return END\n", + " else:\n", + " return \"continue\"\n", + "\n", + "\n", + "workflow = StateGraph(AgentState)\n", + "\n", + "workflow.add_node(\"prompt_developer\", prompt_developer)\n", + "workflow.add_node(\"prompt_executor\", prompt_executor)\n", + "workflow.add_node(\"output_history_analyzer\", output_history_analyzer)\n", + "workflow.add_node(\"prompt_analyzer\", prompt_analyzer)\n", + "workflow.add_node(\"prompt_suggester\", prompt_suggester)\n", + "\n", + "workflow.set_entry_point(\"prompt_developer\")\n", + "\n", + "workflow.add_edge(\"prompt_developer\", \"prompt_executor\")\n", + "workflow.add_edge(\"prompt_executor\", \"output_history_analyzer\")\n", + "\n", + "workflow.add_conditional_edges(\n", + " \"output_history_analyzer\",\n", + " should_exit_on_max_age,\n", + " {\n", + " \"continue\": \"prompt_analyzer\",\n", + " \"rerun\": \"prompt_suggester\",\n", + " END: END\n", + " }\n", + ")\n", + "\n", + "workflow.add_conditional_edges(\n", + " \"prompt_analyzer\",\n", + " should_exit_on_acceptable_output,\n", + " {\n", + " \"continue\": \"prompt_suggester\",\n", + " END: END\n", + " }\n", + ")\n", + "\n", + "workflow.add_edge(\"prompt_suggester\", \"prompt_developer\")\n", + "\n", + "memory = MemorySaver()\n", + "graph = workflow.compile(checkpointer=memory)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "image/jpeg": "", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from IPython.display import Image, display\n", + "\n", + "try:\n", + " display(Image(graph.get_graph().draw_mermaid_png()))\n", + "except Exception:\n", + " # This requires some extra dependencies and is optional\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "User Message:\n", + " \n", + "今天下午3点,在北京国家会议中心,阿里巴巴集团董事局主席马云宣布将投资100亿元人民币用于农村电商发展。这一决定受到了与会代表的热烈欢迎,大家认为这将为中国农村经济带来新的机遇。\n", + "\n", + "Expected Output:\n", + " \n", + "{\n", + " \"文本分析结果\": {\n", + " \"情感分析\": {\n", + " \"整体情感\": \"积极\",\n", + " \"情感得分\": 0.82,\n", + " \"情感细分\": {\n", + " \"乐观\": 0.75,\n", + " \"兴奋\": 0.60,\n", + " \"期待\": 0.85\n", + " }\n", + " },\n", + " \"实体识别\": [\n", + " {\"实体\": \"北京\", \"类型\": \"地点\", \"起始位置\": 7, \"结束位置\": 9},\n", + " {\"实体\": \"国家会议中心\", \"类型\": \"地点\", \"起始位置\": 9, \"结束位置\": 15},\n", + " {\"实体\": \"阿里巴巴集团\", \"类型\": \"组织\", \"起始位置\": 16, \"结束位置\": 22},\n", + " {\"实体\": \"马云\", \"类型\": \"人物\", \"起始位置\": 26, \"结束位置\": 28},\n", + " {\"实体\": \"100亿元\", \"类型\": \"金额\", \"起始位置\": 32, \"结束位置\": 37},\n", + " {\"实体\": \"人民币\", \"类型\": \"货币\", \"起始位置\": 37, \"结束位置\": 40},\n", + " {\"实体\": \"中国\", \"类型\": \"地点\", \"起始位置\": 71, \"结束位置\": 73}\n", + " ],\n", + " \"关键词提取\": [\n", + " {\"关键词\": \"农村电商\", \"权重\": 0.95},\n", + " {\"关键词\": \"马云\", \"权重\": 0.85},\n", + " {\"关键词\": \"投资\", \"权重\": 0.80},\n", + " {\"关键词\": \"阿里巴巴\", \"权重\": 0.75},\n", + " {\"关键词\": \"经济机遇\", \"权重\": 0.70}\n", + " ]\n", + " }\n", + "}\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/yale/work/meta-prompt/.venv/lib/python3.10/site-packages/langchain_core/_api/deprecation.py:139: LangChainDeprecationWarning: The method `BaseChatModel.__call__` was deprecated in langchain-core 0.1.7 and will be removed in 0.3.0. Use invoke instead.\n", + " warn_deprecated(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "```\n", + "You are a text analysis AI. Given a piece of text in Chinese, analyze it and return the following information in JSON format:\n", + "\n", + "* **文本分析结果:**\n", + " * **情感分析:**\n", + " * **整体情感:** (e.g., 积极, 消极, 中性)\n", + " * **情感得分:** (a number between 0 and 1)\n", + " * **情感细分:** (a dictionary of emotions and their scores)\n", + " * **实体识别:** A list of dictionaries, each containing:\n", + " * **实体:** (e.g., 人名, 地名, 组织名)\n", + " * **类型:** (e.g., 人物, 地点, 组织)\n", + " * **起始位置:** (the starting index of the entity in the text)\n", + " * **结束位置:** (the ending index of the entity in the text)\n", + " * **关键词提取:** A list of dictionaries, each containing:\n", + " * **关键词:** (the extracted keyword)\n", + " * **权重:** (the importance score of the keyword) \n", + "\n", + "\n", + "\n", + "```\n", + "```json\n", + "{\n", + " \"文本分析结果\": {\n", + " \"情感分析\": {\n", + " \"整体情感\": \"积极\",\n", + " \"情感得分\": 0.85,\n", + " \"情感细分\": {\n", + " \"高兴\": 0.6,\n", + " \"期待\": 0.25,\n", + " \"赞赏\": 0.1\n", + " }\n", + " },\n", + " \"实体识别\": [\n", + " {\n", + " \"实体\": \"马云\",\n", + " \"类型\": \"人物\",\n", + " \"起始位置\": 29,\n", + " \"结束位置\": 33\n", + " },\n", + " {\n", + " \"实体\": \"阿里巴巴集团\",\n", + " \"类型\": \"组织\",\n", + " \"起始位置\": 16,\n", + " \"结束位置\": 27\n", + " },\n", + " {\n", + " \"实体\": \"北京国家会议中心\",\n", + " \"类型\": \"地点\",\n", + " \"起始位置\": 7,\n", + " \"结束位置\": 21\n", + " },\n", + " {\n", + " \"实体\": \"中国\",\n", + " \"类型\": \"国家\",\n", + " \"起始位置\": 60,\n", + " \"结束位置\": 63\n", + " }\n", + " ],\n", + " \"关键词提取\": [\n", + " {\n", + " \"关键词\": \"投资\",\n", + " \"权重\": 0.25\n", + " },\n", + " {\n", + " \"关键词\": \"农村电商\",\n", + " \"权重\": 0.2\n", + " },\n", + " {\n", + " \"关键词\": \"马云\",\n", + " \"权重\": 0.18\n", + " },\n", + " {\n", + " \"关键词\": \"阿里巴巴\",\n", + " \"权重\": 0.15\n", + " },\n", + " {\n", + " \"关键词\": \"北京国家会议中心\",\n", + " \"权重\": 0.12\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "``` \n", + "\n", + "**Explanation:**\n", + "\n", + "* **情感分析:** The text expresses a positive sentiment overall, with a score of 0.85. The emotions detected are \"高兴\" (happy), \"期待\" (expectation), and \"赞赏\" (appreciation).\n", + "* **实体识别:** The entities identified are:\n", + " * **马云 (Jack Ma):** A person, the chairman of Alibaba Group.\n", + " * **阿里巴巴集团 (Alibaba Group):** An organization, the company making the investment.\n", + " * **北京国家会议中心 (Beijing National Convention Center):** A location, where the announcement was made.\n", + " * **中国 (China):** A country, the beneficiary of the investment.\n", + "* **关键词提取:** The keywords extracted are:\n", + " * **投资 (investment):** The core action of the announcement.\n", + " * **农村电商 (rural e-commerce):** The focus of the investment.\n", + " * **马云 (Jack Ma):** The key person making the announcement.\n", + " * **阿里巴巴 (Alibaba):** The company behind the investment.\n", + " * **北京国家会议中心 (Beijing National Convention Center):** The location of the announcement, adding context.\n", + "\n", + "\n", + "\n", + "Let me know if you have any other text you'd like me to analyze!\n", + "```\n", + "- Acceptable Differences: \n", + " * Differences in digital values in the table.\n", + " * Differences in JSON field values\n", + " * Differences in section/item orders.\n", + "- Unacceptable Differences: \n", + " * \"情感细分\" field values are different.\n", + " * \"实体识别\" field values are different.\n", + " * \"关键词提取\" field values are different.\n", + "- Accept: No \n", + "``` \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "- The System Prompt should remove the example text. \n", + "- The System Prompt should specify the expected format of the output as JSON. \n", + "- The System Prompt should include a requirement for a \"国家\" (country) entity type. \n", + "\n", + "\n", + "\n", + "```\n", + "You are a text analysis AI. Given a piece of text in Chinese, analyze it and return the following information in JSON format:\n", + "\n", + "* **文本分析结果:**\n", + " * **情感分析:**\n", + " * **整体情感:** (e.g., 积极, 消极, 中性)\n", + " * **情感得分:** (a number between 0 and 1)\n", + " * **情感细分:** (a dictionary of emotions and their scores)\n", + " * **实体识别:** A list of dictionaries, each containing:\n", + " * **实体:** (e.g., 人名, 地名, 组织名)\n", + " * **类型:** (e.g., 人物, 地点, 组织, 国家)\n", + " * **起始位置:** (the starting index of the entity in the text)\n", + " * **结束位置:** (the ending index of the entity in the text)\n", + " * **关键词提取:** A list of dictionaries, each containing:\n", + " * **关键词:** (the extracted keyword)\n", + " * **权重:** (the importance score of the keyword) \n", + "```\n", + "```json\n", + "{\n", + " \"文本分析结果\": {\n", + " \"情感分析\": {\n", + " \"整体情感\": \"积极\",\n", + " \"情感得分\": 0.85,\n", + " \"情感细分\": {\n", + " \"高兴\": 0.6,\n", + " \"期待\": 0.25,\n", + " \"赞赏\": 0.1\n", + " }\n", + " },\n", + " \"实体识别\": [\n", + " {\n", + " \"实体\": \"马云\",\n", + " \"类型\": \"人物\",\n", + " \"起始位置\": 29,\n", + " \"结束位置\": 33\n", + " },\n", + " {\n", + " \"实体\": \"阿里巴巴集团\",\n", + " \"类型\": \"组织\",\n", + " \"起始位置\": 16,\n", + " \"结束位置\": 27\n", + " },\n", + " {\n", + " \"实体\": \"北京国家会议中心\",\n", + " \"类型\": \"地点\",\n", + " \"起始位置\": 7,\n", + " \"结束位置\": 21\n", + " },\n", + " {\n", + " \"实体\": \"中国\",\n", + " \"类型\": \"国家\",\n", + " \"起始位置\": 60,\n", + " \"结束位置\": 63\n", + " }\n", + " ],\n", + " \"关键词提取\": [\n", + " {\n", + " \"关键词\": \"投资\",\n", + " \"权重\": 0.2\n", + " },\n", + " {\n", + " \"关键词\": \"农村电商\",\n", + " \"权重\": 0.18\n", + " },\n", + " {\n", + " \"关键词\": \"马云\",\n", + " \"权重\": 0.15\n", + " },\n", + " {\n", + " \"关键词\": \"阿里巴巴\",\n", + " \"权重\": 0.12\n", + " },\n", + " {\n", + " \"关键词\": \"机遇\",\n", + " \"权重\": 0.1\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "``` \n", + "\n", + "\n", + "**Explanation:**\n", + "\n", + "* **情感分析:** The text expresses a positive sentiment overall, with a score of 0.85. The emotions detected are \"高兴\" (happy), \"期待\" (expectation), and \"赞赏\" (appreciation).\n", + "* **实体识别:** The entities identified are:\n", + " * **马云 (Jack Ma):** A person, the chairman of Alibaba Group.\n", + " * **阿里巴巴集团 (Alibaba Group):** An organization, a multinational technology company.\n", + " * **北京国家会议中心 (Beijing National Convention Center):** A location, a convention center in Beijing.\n", + " * **中国 (China):** A country.\n", + "* **关键词提取:** The keywords extracted are:\n", + " * **投资 (investment):** Reflects the main action in the text.\n", + " * **农村电商 (rural e-commerce):** The focus of the investment.\n", + " * **马云 (Jack Ma):** The person making the announcement.\n", + " * **阿里巴巴 (Alibaba):** The company making the investment.\n", + " * **机遇 (opportunity):** The positive outcome expected from the investment.\n", + "\n", + "\n", + "\n", + "Let me know if you have any other text you'd like me to analyze!\n", + "\n", + "\n", + "# Analysis\n", + "\n", + "* Both outputs provide similar JSON structures with consistent sections: \"文本分析结果\", \"情感分析\", \"实体识别\", and \"关键词提取\".\n", + "* The \"情感分析\" section in both outputs shows a positive sentiment with a score around 0.85.\n", + "* The \"实体识别\" sections identify similar entities, including \"马云\", \"阿里巴巴集团\", \"北京国家会议中心\", and \"中国\".\n", + "* The \"关键词提取\" sections also show overlapping keywords like \"投资\", \"农村电商\", \"马云\", and \"阿里巴巴\".\n", + "\n", + "However, there are some notable differences:\n", + "\n", + "* Output A includes \"北京国家会议中心\" as a keyword, while Output B does not.\n", + "* Output B assigns slightly different weights to some keywords compared to Output A.\n", + "* Output A's \"情感分析\" section includes \"乐观\" and \"兴奋\" as emotions, while Output B uses \"高兴\" and \"期待\".\n", + "\n", + "* Output A's \"实体识别\" section includes \"北京\", \"国家会议中心\", \"100亿元\", and \"人民币\", which are not present in Output B.\n", + "\n", + "# Preferred Output ID: A \n", + "\n", + "\n", + "\n", + "Result: A\n", + "Best Output Age: 1\n", + "\n", + "\n", + "- The System Prompt should remove the example text of the expected output. \n", + "- The System Prompt should specify that the \"实体识别\" field should include \"金额\" and \"货币\" as entity types. \n", + "- The System Prompt should specify that the \"关键词提取\" field should include keywords related to the context of the text. \n", + "\n", + "\n", + "\n", + "\n", + "```\n", + "You are a text analysis AI. Given a piece of text in Chinese, analyze it and return the following information in JSON format:\n", + "\n", + "* **文本分析结果:**\n", + " * **情感分析:**\n", + " * **整体情感:** (e.g., 积极, 消极, 中性)\n", + " * **情感得分:** (a number between 0 and 1)\n", + " * **情感细分:** (a dictionary of emotions and their scores)\n", + " * **实体识别:** A list of dictionaries, each containing:\n", + " * **实体:** (e.g., 人名, 地名, 组织名)\n", + " * **类型:** (e.g., 人物, 地点, 组织, 金额, 货币)\n", + " * **起始位置:** (the starting index of the entity in the text)\n", + " * **结束位置:** (the ending index of the entity in the text)\n", + " * **关键词提取:** A list of dictionaries, each containing:\n", + " * **关键词:** (the extracted keyword)\n", + " * **权重:** (the importance score of the keyword) \n", + "\n", + "\n", + "\n", + "```\n", + "```json\n", + "{\n", + " \"文本分析结果\": {\n", + " \"情感分析\": {\n", + " \"整体情感\": \"积极\",\n", + " \"情感得分\": 0.85,\n", + " \"情感细分\": {\n", + " \"高兴\": 0.6,\n", + " \"期待\": 0.25,\n", + " \"赞赏\": 0.1\n", + " }\n", + " },\n", + " \"实体识别\": [\n", + " {\n", + " \"实体\": \"马云\",\n", + " \"类型\": \"人物\",\n", + " \"起始位置\": 29,\n", + " \"结束位置\": 33\n", + " },\n", + " {\n", + " \"实体\": \"阿里巴巴集团\",\n", + " \"类型\": \"组织\",\n", + " \"起始位置\": 16,\n", + " \"结束位置\": 27\n", + " },\n", + " {\n", + " \"实体\": \"北京国家会议中心\",\n", + " \"类型\": \"地点\",\n", + " \"起始位置\": 7,\n", + " \"结束位置\": 21\n", + " },\n", + " {\n", + " \"实体\": \"100亿元人民币\",\n", + " \"类型\": \"金额\",\n", + " \"起始位置\": 38,\n", + " \"结束位置\": 51\n", + " },\n", + " {\n", + " \"实体\": \"中国农村经济\",\n", + " \"类型\": \"经济\",\n", + " \"起始位置\": 70,\n", + " \"结束位��\": 83\n", + " }\n", + " ],\n", + " \"关键词提取\": [\n", + " {\n", + " \"关键词\": \"马云\",\n", + " \"权重\": 0.25\n", + " },\n", + " {\n", + " \"关键词\": \"阿里巴巴\",\n", + " \"权重\": 0.18\n", + " },\n", + " {\n", + " \"关键词\": \"投资\",\n", + " \"权重\": 0.15\n", + " },\n", + " {\n", + " \"关键词\": \"农村电商\",\n", + " \"权重\": 0.12\n", + " },\n", + " {\n", + " \"关键词\": \"机遇\",\n", + " \"权重\": 0.1\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "``` \n", + "\n", + "**Explanation:**\n", + "\n", + "* **情感分析:** The text expresses a positive sentiment overall, with a score of 0.85. The emotions detected are \"高兴\" (happy), \"期待\" (expectation), and \"赞赏\" (appreciation).\n", + "* **实体识别:** The entities identified are:\n", + " * **马云 (Jack Ma):** A person, the chairman of Alibaba Group.\n", + " * **阿里巴巴集团 (Alibaba Group):** An organization.\n", + " * **北京国家会议中心 (Beijing National Convention Center):** A location.\n", + " * **100亿元人民币 (10 billion yuan):** An amount of money.\n", + " * **中国农村经济 (Chinese rural economy):** An economic concept.\n", + "* **关键词提取:** The keywords extracted are:\n", + " * **马云 (Jack Ma):** The main subject of the announcement.\n", + " * **阿里巴巴 (Alibaba):** The company making the investment.\n", + " * **投资 (investment):** The core action being taken.\n", + " * **农村电商 (rural e-commerce):** The area of focus for the investment.\n", + " * **机遇 (opportunity):** The positive outcome expected from the investment.\n", + "\n", + "\n", + "\n", + "Let me know if you have any other text you'd like me to analyze!\n", + "\n", + "\n", + "# Analysis\n", + "\n", + "* Both outputs provide similar JSON structures with consistent top-level sections: \"文本分析结果\", \"情感分析\", \"实体识别\", and \"关键词提取\".\n", + "* The \"情感分析\" section in both outputs shows a positive sentiment with a score close to 0.85. \n", + "* There are differences in the specific emotions detected and the scores assigned to them.\n", + "* The \"实体识别\" sections identify some overlapping entities but also have differences in the detected entities and their classifications.\n", + "* The \"关键词提取\" sections show variations in the extracted keywords and their assigned weights.\n", + "\n", + "Considering the acceptable differences outlined in the Acceptance Criteria, both outputs demonstrate a reasonable level of similarity to the Expected Output. \n", + "\n", + "# Draw \n", + "\n", + "\n", + "\n", + "Result: A\n", + "Best Output Age: 2\n", + "\n", + "\n", + "- The System Prompt should remove the example text within the `文本分析结果` section. \n", + "- The System Prompt should specify that the `实体识别` section should include all named entities in the text, not just a subset. \n", + "- The System Prompt should specify that the `关键词提取` section should include the most relevant keywords, not just a few. \n", + "\n", + "\n", + "\n", + "```\n", + "You are a text analysis AI. Given a piece of text in Chinese, analyze it and return the following information in JSON format:\n", + "\n", + "* **文本分析结果:**\n", + " * **情感分析:**\n", + " * **整体情感:** (e.g., 积极, 消极, 中性)\n", + " * **情感得分:** (a number between 0 and 1)\n", + " * **情感细分:** (a dictionary of emotions and their scores)\n", + " * **实体识别:** A list of dictionaries, each containing:\n", + " * **实体:** (e.g., 人名, 地名, 组织名)\n", + " * **类型:** (e.g., 人物, 地点, 组织)\n", + " * **起始位置:** (the starting index of the entity in the text)\n", + " * **结束位置:** (the ending index of the entity in the text)\n", + " * **关键词提取:** A list of dictionaries, each containing:\n", + " * **关键词:** (the extracted keyword)\n", + " * **权重:** (the importance score of the keyword) \n", + "\n", + "\n", + "\n", + "```\n", + "```json\n", + "{\n", + " \"文本分析结果\": {\n", + " \"情感分析\": {\n", + " \"整体情感\": \"积极\",\n", + " \"情感得分\": 0.85,\n", + " \"情感细分\": {\n", + " \"高兴\": 0.6,\n", + " \"期待\": 0.25,\n", + " \"赞赏\": 0.1\n", + " }\n", + " },\n", + " \"实体识别\": [\n", + " {\n", + " \"实体\": \"马云\",\n", + " \"类型\": \"人物\",\n", + " \"起始位置\": 29,\n", + " \"结束位置\": 33\n", + " },\n", + " {\n", + " \"实体\": \"阿里巴巴集团\",\n", + " \"类型\": \"组织\",\n", + " \"起始位置\": 16,\n", + " \"结束位置\": 27\n", + " },\n", + " {\n", + " \"实体\": \"北京国家会议中心\",\n", + " \"类型\": \"地点\",\n", + " \"起始位置\": 7,\n", + " \"结束位置\": 21\n", + " },\n", + " {\n", + " \"实体\": \"中国\",\n", + " \"类型\": \"国家\",\n", + " \"起始位置\": 60,\n", + " \"结束位置\": 63\n", + " }\n", + " ],\n", + " \"关键词提取\": [\n", + " {\n", + " \"关键词\": \"投资\",\n", + " \"权重\": 0.25\n", + " },\n", + " {\n", + " \"关键词\": \"农村电商\",\n", + " \"权重\": 0.2\n", + " },\n", + " {\n", + " \"关键词\": \"马云\",\n", + " \"权重\": 0.18\n", + " },\n", + " {\n", + " \"关键词\": \"阿里巴巴\",\n", + " \"权重\": 0.15\n", + " },\n", + " {\n", + " \"关键词\": \"北京国家会议中心\",\n", + " \"权重\": 0.12\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "``` \n", + "\n", + "**Explanation:**\n", + "\n", + "* **情感分析:** The text expresses a positive sentiment overall, with a score of 0.85. The emotions detected are \"高兴\" (happy), \"期待\" (expectation), and \"赞赏\" (appreciation).\n", + "* **实体识别:** The entities identified are:\n", + " * **马云 (Jack Ma):** A person, the chairman of Alibaba Group.\n", + " * **阿里巴巴集团 (Alibaba Group):** An organization, the company making the investment.\n", + " * **北京国家会议中心 (Beijing National Convention Center):** A location, where the announcement was made.\n", + " * **中国 (China):** A country, the beneficiary of the investment.\n", + "* **关键词提取:** The keywords extracted are:\n", + " * **投资 (investment):** The core action of the announcement.\n", + " * **农村电商 (rural e-commerce):** The target area of the investment.\n", + " * **马云 (Jack Ma):** The key person making the announcement.\n", + " * **阿里巴巴 (Alibaba):** The company behind the investment.\n", + " * **北京国家会议中心 (Beijing National Convention Center):** The location of the announcement, adding context.\n", + "\n", + "\n", + "\n", + "Let me know if you have any other text you'd like me to analyze!\n", + "\n", + "\n", + "# Analysis\n", + "\n", + "* Both outputs have the same top-level sections: \"文本分析结果\", \"情感分析\", \"实体识别\", and \"关键词提取\".\n", + "* Both outputs have consistent data types for all JSON fields.\n", + "* Both outputs have similar structures within each section. \n", + "* There are differences in the specific values for \"情感得分\", \"情感细分\", \"实体识别\" entities, and \"关键词提取\" keywords.\n", + "\n", + "# Preferred Output ID: A \n", + "\n", + "\n", + "While both outputs are structured similarly and adhere to the Acceptance Criteria, Output A is preferred because it closely mirrors the expected output's structure and field names. \n", + "\n", + "\n", + "\n", + "Result: A\n", + "Best Output Age: 3\n", + "Final Result: {'acceptance_criteria': '\\n* Consistent with Expected Output:\\n * Formats of all JSON sections\\n * Data types of all JSON fields\\n * Top layer sections\\n* Acceptable differences:\\n * Differences in digital values in the table.\\n * Extra or missing spaces.\\n * Extra or missing line breaks at the beginning or end of the output.\\n * Differences in JSON field values\\n * Differences in section/item orders.\\n * JSON wrapped in backquotes.\\n', 'user_message': '\\n今天下午3点,在北京国家会议中心,阿里巴巴集团董事局主席马云宣布将投资100亿元人民币用于农村电商发展。这一决定受到了与会代表的热烈欢迎,大家认为这将为中国农村经济带来新的机遇。\\n', 'expected_output': '\\n{\\n \"文本分析结果\": {\\n \"情感分析\": {\\n \"整体情感\": \"积极\",\\n \"情感得分\": 0.82,\\n \"情感细分\": {\\n \"乐观\": 0.75,\\n \"兴奋\": 0.60,\\n \"期待\": 0.85\\n }\\n },\\n \"实体识别\": [\\n {\"实体\": \"北京\", \"类型\": \"地点\", \"起始位置\": 7, \"结束位置\": 9},\\n {\"实体\": \"国家会议中心\", \"类型\": \"地点\", \"起始位置\": 9, \"结束位置\": 15},\\n {\"实体\": \"阿里巴巴集团\", \"类型\": \"组织\", \"起始位置\": 16, \"结束位置\": 22},\\n {\"实体\": \"马云\", \"类型\": \"人物\", \"起始位置\": 26, \"结束位置\": 28},\\n {\"实体\": \"100亿元\", \"类型\": \"金额\", \"起始位置\": 32, \"结束位置\": 37},\\n {\"实体\": \"人民币\", \"类型\": \"货币\", \"起始位置\": 37, \"结束位置\": 40},\\n {\"实体\": \"中国\", \"类型\": \"地点\", \"起始位置\": 71, \"结束位置\": 73}\\n ],\\n \"关键词提取\": [\\n {\"关键词\": \"农村电商\", \"权重\": 0.95},\\n {\"关键词\": \"马云\", \"权重\": 0.85},\\n {\"关键词\": \"投资\", \"权重\": 0.80},\\n {\"关键词\": \"阿里巴巴\", \"权重\": 0.75},\\n {\"关键词\": \"经济机遇\", \"权重\": 0.70}\\n ]\\n }\\n}\\n', 'system_message': '```\\nYou are a text analysis AI. Given a piece of text in Chinese, analyze it and return the following information in JSON format:\\n\\n* **文本分析结果:**\\n * **情感分析:**\\n * **整体情感:** (e.g., 积极, 消极, 中性)\\n * **情感得分:** (a number between 0 and 1)\\n * **情感细分:** (a dictionary of emotions and their scores)\\n * **实体识别:** A list of dictionaries, each containing:\\n * **实体:** (e.g., 人名, 地名, 组织名)\\n * **类型:** (e.g., 人物, 地点, 组织)\\n * **起始位置:** (the starting index of the entity in the text)\\n * **结束位置:** (the ending index of the entity in the text)\\n * **关键词提取:** A list of dictionaries, each containing:\\n * **关键词:** (the extracted keyword)\\n * **权重:** (the importance score of the keyword) \\n\\n\\n\\n```', 'output': '```json\\n{\\n \"文本分析结果\": {\\n \"情感分析\": {\\n \"整体情感\": \"积极\",\\n \"情感得分\": 0.85,\\n \"情感细分\": {\\n \"高兴\": 0.6,\\n \"期待\": 0.25,\\n \"赞赏\": 0.1\\n }\\n },\\n \"实体识别\": [\\n {\\n \"实体\": \"马云\",\\n \"类型\": \"人物\",\\n \"起始位置\": 29,\\n \"结束位置\": 33\\n },\\n {\\n \"实体\": \"阿里巴巴集团\",\\n \"类型\": \"组织\",\\n \"起始位置\": 16,\\n \"结束位置\": 27\\n },\\n {\\n \"实体\": \"北京国家会议中心\",\\n \"类型\": \"地点\",\\n \"起始位置\": 7,\\n \"结束位置\": 21\\n },\\n {\\n \"实体\": \"中国\",\\n \"类型\": \"国家\",\\n \"起始位置\": 60,\\n \"结束位置\": 63\\n }\\n ],\\n \"关键词提取\": [\\n {\\n \"关键词\": \"投资\",\\n \"权重\": 0.25\\n },\\n {\\n \"关键词\": \"农村电商\",\\n \"权重\": 0.2\\n },\\n {\\n \"关键词\": \"马云\",\\n \"权重\": 0.18\\n },\\n {\\n \"关键词\": \"阿里巴巴\",\\n \"权重\": 0.15\\n },\\n {\\n \"关键词\": \"北京国家会议中心\",\\n \"权重\": 0.12\\n }\\n ]\\n }\\n}\\n``` \\n\\n**Explanation:**\\n\\n* **情感分析:** The text expresses a positive sentiment overall, with a score of 0.85. The emotions detected are \"高兴\" (happy), \"期待\" (expectation), and \"赞赏\" (appreciation).\\n* **实体识别:** The entities identified are:\\n * **马云 (Jack Ma):** A person, the chairman of Alibaba Group.\\n * **阿里巴巴集团 (Alibaba Group):** An organization, the company making the investment.\\n * **北京国家会议中心 (Beijing National Convention Center):** A location, where the announcement was made.\\n * **中国 (China):** A country, the beneficiary of the investment.\\n* **关键词提取:** The keywords extracted are:\\n * **投资 (investment):** The core action of the announcement.\\n * **农村电商 (rural e-commerce):** The focus of the investment.\\n * **马云 (Jack Ma):** The key person making the announcement.\\n * **阿里巴巴 (Alibaba):** The company behind the investment.\\n * **北京国家会议中心 (Beijing National Convention Center):** The location of the announcement, adding context.\\n\\n\\n\\nLet me know if you have any other text you\\'d like me to analyze!', 'suggestions': '\\n\\n- The System Prompt should remove the example text within the `文本分析结果` section. \\n- The System Prompt should specify that the `实体识别` section should include all named entities in the text, not just a subset. \\n- The System Prompt should specify that the `关键词提取` section should include the most relevant keywords, not just a few. \\n\\n\\n', 'accepted': False, 'analysis': '```\\n- Acceptable Differences: \\n * Differences in digital values in the table.\\n * Differences in JSON field values\\n * Differences in section/item orders.\\n- Unacceptable Differences: \\n * \"情感细分\" field values are different.\\n * \"实体识别\" field values are different.\\n * \"关键词提取\" field values are different.\\n- Accept: No \\n``` \\n\\n\\n', 'best_output': '```json\\n{\\n \"文本分析结果\": {\\n \"情感分析\": {\\n \"整体情感\": \"积极\",\\n \"情感得分\": 0.85,\\n \"情感细分\": {\\n \"高兴\": 0.6,\\n \"期待\": 0.25,\\n \"赞赏\": 0.1\\n }\\n },\\n \"实体识别\": [\\n {\\n \"实体\": \"马云\",\\n \"类型\": \"人物\",\\n \"起始位置\": 29,\\n \"结束位置\": 33\\n },\\n {\\n \"实体\": \"阿里巴巴集团\",\\n \"类型\": \"组织\",\\n \"起始位置\": 16,\\n \"结束位置\": 27\\n },\\n {\\n \"实体\": \"北京国家会议中心\",\\n \"类型\": \"地点\",\\n \"起始位置\": 7,\\n \"结束位置\": 21\\n },\\n {\\n \"实体\": \"中国\",\\n \"类型\": \"国家\",\\n \"起始位置\": 60,\\n \"结束位置\": 63\\n }\\n ],\\n \"关键词提取\": [\\n {\\n \"关键词\": \"投资\",\\n \"权重\": 0.25\\n },\\n {\\n \"关键词\": \"农村电商\",\\n \"权重\": 0.2\\n },\\n {\\n \"关键词\": \"马云\",\\n \"权重\": 0.18\\n },\\n {\\n \"关键词\": \"阿里巴巴\",\\n \"权重\": 0.15\\n },\\n {\\n \"关键词\": \"北京国家会议中心\",\\n \"权重\": 0.12\\n }\\n ]\\n }\\n}\\n``` \\n\\n**Explanation:**\\n\\n* **情感分析:** The text expresses a positive sentiment overall, with a score of 0.85. The emotions detected are \"高兴\" (happy), \"期待\" (expectation), and \"赞赏\" (appreciation).\\n* **实体识别:** The entities identified are:\\n * **马云 (Jack Ma):** A person, the chairman of Alibaba Group.\\n * **阿里巴巴集团 (Alibaba Group):** An organization, the company making the investment.\\n * **北京国家会议中心 (Beijing National Convention Center):** A location, where the announcement was made.\\n * **中国 (China):** A country, the beneficiary of the investment.\\n* **关键词提取:** The keywords extracted are:\\n * **投资 (investment):** The core action of the announcement.\\n * **农村电商 (rural e-commerce):** The focus of the investment.\\n * **马云 (Jack Ma):** The key person making the announcement.\\n * **阿里巴巴 (Alibaba):** The company behind the investment.\\n * **北京国家会议中心 (Beijing National Convention Center):** The location of the announcement, adding context.\\n\\n\\n\\nLet me know if you have any other text you\\'d like me to analyze!', 'best_system_message': '```\\nYou are a text analysis AI. Given a piece of text in Chinese, analyze it and return the following information in JSON format:\\n\\n* **文本分析结果:**\\n * **情感分析:**\\n * **整体情感:** (e.g., 积极, 消极, 中性)\\n * **情感得分:** (a number between 0 and 1)\\n * **情感细分:** (a dictionary of emotions and their scores)\\n * **实体识别:** A list of dictionaries, each containing:\\n * **实体:** (e.g., 人名, 地名, 组织名)\\n * **类型:** (e.g., 人物, 地点, 组织)\\n * **起始位置:** (the starting index of the entity in the text)\\n * **结束位置:** (the ending index of the entity in the text)\\n * **关键词提取:** A list of dictionaries, each containing:\\n * **关键词:** (the extracted keyword)\\n * **权重:** (the importance score of the keyword) \\n\\n\\n\\n```', 'best_output_age': 3, 'max_output_age': 3}\n", + "System Message:\n", + "```\n", + "You are a text analysis AI. Given a piece of text in Chinese, analyze it and return the following information in JSON format:\n", + "\n", + "* **文本分析结果:**\n", + " * **情感分析:**\n", + " * **整体情感:** (e.g., 积极, 消极, 中性)\n", + " * **情感得分:** (a number between 0 and 1)\n", + " * **情感细分:** (a dictionary of emotions and their scores)\n", + " * **实体识别:** A list of dictionaries, each containing:\n", + " * **实体:** (e.g., 人名, 地名, 组织名)\n", + " * **类型:** (e.g., 人物, 地点, 组织)\n", + " * **起始位置:** (the starting index of the entity in the text)\n", + " * **结束位置:** (the ending index of the entity in the text)\n", + " * **关键词提取:** A list of dictionaries, each containing:\n", + " * **关键词:** (the extracted keyword)\n", + " * **权重:** (the importance score of the keyword) \n", + "\n", + "\n", + "\n", + "```\n", + "Output:\n", + "```json\n", + "{\n", + " \"文本分析结果\": {\n", + " \"情感分析\": {\n", + " \"整体情感\": \"积极\",\n", + " \"情感得分\": 0.85,\n", + " \"情感细分\": {\n", + " \"高兴\": 0.6,\n", + " \"期待\": 0.25,\n", + " \"赞赏\": 0.1\n", + " }\n", + " },\n", + " \"实体识别\": [\n", + " {\n", + " \"实体\": \"马云\",\n", + " \"类型\": \"人物\",\n", + " \"起始位置\": 29,\n", + " \"结束位置\": 33\n", + " },\n", + " {\n", + " \"实体\": \"阿里巴巴集团\",\n", + " \"类型\": \"组织\",\n", + " \"起始位置\": 16,\n", + " \"结束位置\": 27\n", + " },\n", + " {\n", + " \"实体\": \"北京国家会议中心\",\n", + " \"类型\": \"地点\",\n", + " \"起始位置\": 7,\n", + " \"结束位置\": 21\n", + " },\n", + " {\n", + " \"实体\": \"中国\",\n", + " \"类型\": \"国家\",\n", + " \"起始位置\": 60,\n", + " \"结束位置\": 63\n", + " }\n", + " ],\n", + " \"关键词提取\": [\n", + " {\n", + " \"关键词\": \"投资\",\n", + " \"权重\": 0.25\n", + " },\n", + " {\n", + " \"关键词\": \"农村电商\",\n", + " \"权重\": 0.2\n", + " },\n", + " {\n", + " \"关键词\": \"马云\",\n", + " \"权重\": 0.18\n", + " },\n", + " {\n", + " \"关键词\": \"阿里巴巴\",\n", + " \"权重\": 0.15\n", + " },\n", + " {\n", + " \"关键词\": \"北京国家会议中心\",\n", + " \"权重\": 0.12\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "``` \n", + "\n", + "**Explanation:**\n", + "\n", + "* **情感分析:** The text expresses a positive sentiment overall, with a score of 0.85. The emotions detected are \"高兴\" (happy), \"期待\" (expectation), and \"赞赏\" (appreciation).\n", + "* **实体识别:** The entities identified are:\n", + " * **马云 (Jack Ma):** A person, the chairman of Alibaba Group.\n", + " * **阿里巴巴集团 (Alibaba Group):** An organization, the company making the investment.\n", + " * **北京国家会议中心 (Beijing National Convention Center):** A location, where the announcement was made.\n", + " * **中国 (China):** A country, the beneficiary of the investment.\n", + "* **关键词提取:** The keywords extracted are:\n", + " * **投资 (investment):** The core action of the announcement.\n", + " * **农村电商 (rural e-commerce):** The focus of the investment.\n", + " * **马云 (Jack Ma):** The key person making the announcement.\n", + " * **阿里巴巴 (Alibaba):** The company behind the investment.\n", + " * **北京国家会议中心 (Beijing National Convention Center):** The location of the announcement, adding context.\n", + "\n", + "\n", + "\n", + "Let me know if you have any other text you'd like me to analyze!\n" + ] + } + ], + "source": [ + "initial_states = [\n", + " AgentState(\n", + " max_output_age=3,\n", + " user_message=\"(2+8)*3\",\n", + " expected_output=\"\"\"(2+8)*3\n", + "= 10*3\n", + "= 30\n", + "\"\"\",\n", + " acceptance_criteria=\"\"\"\n", + "* Exactly text match.\n", + "* Acceptable differences:\n", + " * Extra or missing spaces.\n", + " * Extra or missing line breaks at the beginning or end of the output.\n", + "\"\"\"),\n", + " AgentState(\n", + " max_output_age=3,\n", + " user_message=\"\"\"Here is the GDP data in billions of US dollars (USD) for these years:\n", + "\n", + "Germany:\n", + "\n", + "2015: $3,368.29 billion\n", + "2016: $3,467.79 billion\n", + "2017: $3,677.83 billion\n", + "2018: $3,946.00 billion\n", + "2019: $3,845.03 billion\n", + "France:\n", + "\n", + "2015: $2,423.47 billion\n", + "2016: $2,465.12 billion\n", + "2017: $2,582.49 billion\n", + "2018: $2,787.86 billion\n", + "2019: $2,715.52 billion\n", + "United Kingdom:\n", + "\n", + "2015: $2,860.58 billion\n", + "2016: $2,650.90 billion\n", + "2017: $2,622.43 billion\n", + "2018: $2,828.87 billion\n", + "2019: $2,829.21 billion\n", + "Italy:\n", + "\n", + "2015: $1,815.72 billion\n", + "2016: $1,852.50 billion\n", + "2017: $1,937.80 billion\n", + "2018: $2,073.90 billion\n", + "2019: $1,988.14 billion\n", + "Spain:\n", + "\n", + "2015: $1,199.74 billion\n", + "2016: $1,235.95 billion\n", + "2017: $1,313.13 billion\n", + "2018: $1,426.19 billion\n", + "2019: $1,430.38 billion\n", + "\"\"\",\n", + " expected_output=\"\"\"Year,Germany,France,United Kingdom,Italy,Spain\n", + "2016-2015,2.96%,1.71%,-7.35%,2.02%,3.04%\n", + "2017-2016,5.08%,4.78%,-1.07%,4.61%,6.23%\n", + "2018-2017,7.48%,7.99%,7.89%,7.10%,8.58%\n", + "2019-2018,-2.56%,-2.59%,0.01%,-4.11%,0.30%\n", + "\"\"\",\n", + " acceptance_criteria=\"\"\"\n", + "* Strict text matching of the header row and first column(year).\n", + "* Acceptable differences:\n", + " * Differences in digital/percentage values in the table, even significant ones.\n", + " * Extra or missing spaces.\n", + " * Extra or missing line breaks.\n", + "\"\"\"),\n", + " AgentState(\n", + " max_output_age=3,\n", + " user_message=\"\"\"\n", + "Gene sequence: ATGGCCATGGCGCCCAGAACTGAGATCAATAGTACCCGTATTAACGGGTGA\n", + "Species: Escherichia coli\n", + "\"\"\",\n", + " expected_output=\"\"\"\n", + "{\n", + " \"Gene Sequence Analysis Results\": {\n", + " \"Basic Information\": {\n", + " \"Sequence Length\": 54,\n", + " \"GC Content\": \"51.85%\"\n", + " },\n", + " \"Nucleotide Composition\": {\n", + " \"A\": {\"Count\": 12, \"Percentage\": \"22.22%\"},\n", + " \"T\": {\"Count\": 11, \"Percentage\": \"20.37%\"},\n", + " \"G\": {\"Count\": 16, \"Percentage\": \"29.63%\"},\n", + " \"C\": {\"Count\": 15, \"Percentage\": \"27.78%\"}\n", + " },\n", + " \"Codon Analysis\": {\n", + " \"Start Codon\": \"ATG\",\n", + " \"Stop Codon\": \"TGA\",\n", + " \"Codon Table\": [\n", + " {\"Codon\": \"ATG\", \"Amino Acid\": \"Methionine\", \"Position\": 1},\n", + " {\"Codon\": \"GCC\", \"Amino Acid\": \"Alanine\", \"Position\": 2},\n", + " {\"Codon\": \"ATG\", \"Amino Acid\": \"Methionine\", \"Position\": 3},\n", + " // ... other codons ...\n", + " {\"Codon\": \"TGA\", \"Amino Acid\": \"Stop Codon\", \"Position\": 18}\n", + " ]\n", + " },\n", + " \"Potential Function Prediction\": {\n", + " \"Protein Length\": 17,\n", + " \"Possible Functional Domains\": [\n", + " {\"Domain Name\": \"ABC Transporter\", \"Start Position\": 5, \"End Position\": 15, \"Confidence\": \"75%\"},\n", + " {\"Domain Name\": \"Membrane Protein\", \"Start Position\": 1, \"End Position\": 17, \"Confidence\": \"60%\"}\n", + " ],\n", + " \"Secondary Structure Prediction\": {\n", + " \"α-helix\": [\"2-8\", \"12-16\"],\n", + " \"β-sheet\": [\"9-11\"],\n", + " \"Random Coil\": [\"1\", \"17\"]\n", + " }\n", + " },\n", + " \"Homology Analysis\": {\n", + " \"Most Similar Sequences\": [\n", + " {\n", + " \"Gene Name\": \"abcT\",\n", + " \"Species\": \"Salmonella enterica\",\n", + " \"Similarity\": \"89%\",\n", + " \"E-value\": \"3e-25\"\n", + " },\n", + " {\n", + " \"Gene Name\": \"yojI\",\n", + " \"Species\": \"Escherichia coli\",\n", + " \"Similarity\": \"95%\",\n", + " \"E-value\": \"1e-30\"\n", + " }\n", + " ]\n", + " },\n", + " \"Mutation Analysis\": {\n", + " \"SNP Sites\": [\n", + " {\"Position\": 27, \"Wild Type\": \"A\", \"Mutant\": \"G\", \"Amino Acid Change\": \"Glutamine->Arginine\"},\n", + " {\"Position\": 42, \"Wild Type\": \"C\", \"Mutant\": \"T\", \"Amino Acid Change\": \"None (Synonymous Mutation)\"}\n", + " ]\n", + " }\n", + " }\n", + "}\n", + "\"\"\",\n", + " acceptance_criteria=\"\"\"\n", + "* Consistent with Expected Output:\n", + " * Formats of all JSON sections\n", + " * Data types of all JSON fields\n", + " * Top layer sections\n", + "* Acceptable differences:\n", + " * Extra or missing spaces\n", + " * Extra or missing line breaks at the beginning or end of the output\n", + " * Differences in JSON field values\n", + " * JSON wrapped in backquotes\n", + "\"\"\"),\n", + " AgentState(\n", + " max_output_age=3,\n", + " user_message=\"\"\"\n", + "今天下午3点,在北京国家会议中心,阿里巴巴集团董事局主席马云宣布将投资100亿元人民币用于农村电商发展。这一决定受到了与会代表的热烈欢迎,大家认为这将为中国农村经济带来新的机遇。\n", + "\"\"\",\n", + " expected_output=\"\"\"\n", + "{\n", + " \"文本分析结果\": {\n", + " \"情感分析\": {\n", + " \"整体情感\": \"积极\",\n", + " \"情感得分\": 0.82,\n", + " \"情感细分\": {\n", + " \"乐观\": 0.75,\n", + " \"兴奋\": 0.60,\n", + " \"期待\": 0.85\n", + " }\n", + " },\n", + " \"实体识别\": [\n", + " {\"实体\": \"北京\", \"类型\": \"地点\", \"起始位置\": 7, \"结束位置\": 9},\n", + " {\"实体\": \"国家会议中心\", \"类型\": \"地点\", \"起始位置\": 9, \"结束位置\": 15},\n", + " {\"实体\": \"阿里巴巴集团\", \"类型\": \"组织\", \"起始位置\": 16, \"结束位置\": 22},\n", + " {\"实体\": \"马云\", \"类型\": \"人物\", \"起始位置\": 26, \"结束位置\": 28},\n", + " {\"实体\": \"100亿元\", \"类型\": \"金额\", \"起始位置\": 32, \"结束位置\": 37},\n", + " {\"实体\": \"人民币\", \"类型\": \"货币\", \"起始位置\": 37, \"结束位置\": 40},\n", + " {\"实体\": \"中国\", \"类型\": \"地点\", \"起始位置\": 71, \"结束位置\": 73}\n", + " ],\n", + " \"关键词提取\": [\n", + " {\"关键词\": \"农村电商\", \"权重\": 0.95},\n", + " {\"关键词\": \"马云\", \"权重\": 0.85},\n", + " {\"关键词\": \"投资\", \"权重\": 0.80},\n", + " {\"关键词\": \"阿里巴巴\", \"权重\": 0.75},\n", + " {\"关键词\": \"经济机遇\", \"权重\": 0.70}\n", + " ]\n", + " }\n", + "}\n", + "\"\"\",\n", + " acceptance_criteria=\"\"\"\n", + "* Consistent with Expected Output:\n", + " * Formats of all JSON sections\n", + " * Data types of all JSON fields\n", + " * Top layer sections\n", + "* Acceptable differences:\n", + " * Differences in digital values in the table.\n", + " * Extra or missing spaces.\n", + " * Extra or missing line breaks at the beginning or end of the output.\n", + " * Differences in JSON field values\n", + " * Differences in section/item orders.\n", + " * JSON wrapped in backquotes.\n", + "\"\"\"),\n", + " AgentState(\n", + " max_output_age=3,\n", + " user_message=\"Low-noise amplifier\",\n", + " expected_output=\"\"\"\n", + "A '''low-noise amplifier''' ('''LNA''') is an electronic component that amplifies a very low-power [[signal]] without significantly degrading its [[signal-to-noise ratio]] (SNR). Any [[electronic amplifier]] will increase the power of both the signal and the [[Noise (electronics)|noise]] present at its input, but the amplifier will also introduce some additional noise. LNAs are designed to minimize that additional noise, by choosing special components, operating points, and [[Circuit topology (electrical)|circuit topologies]]. Minimizing additional noise must balance with other design goals such as [[power gain]] and [[impedance matching]].\n", + "\n", + "LNAs are found in [[Radio|radio communications]] systems, [[Amateur Radio]] stations, medical instruments and [[electronic test equipment]]. A typical LNA may supply a power gain of 100 (20 [[decibels]] (dB)) while decreasing the SNR by less than a factor of two (a 3 dB [[noise figure]] (NF)). Although LNAs are primarily concerned with weak signals that are just above the [[noise floor]], they must also consider the presence of larger signals that cause [[intermodulation distortion]].\n", + "\"\"\",\n", + " acceptance_criteria=\"\"\"\n", + "* Consistent with Expected Output:\n", + " * Language\n", + " * Text length\n", + " * Text style\n", + " * Text structures\n", + "* Cover all the major content of Expected Output.\n", + "* Acceptable differences:\n", + " * Minor format differences.\n", + " * Expression differences.\n", + " * Numerical differences.\n", + " * Additional content in Actual Output.\n", + " * Missing minor content in Actual Output.\n", + "\"\"\"\n", + " ),\n", + " AgentState(\n", + " max_output_age=3,\n", + " user_message=\"What is the meaning of life?\",\n", + " expected_output=\"\"\"\n", + "[\n", + " {\"persona\": \"Philosopher\", \"prompt\": \"Explore the concept of life's meaning through the lens of existentialism and purpose-driven existence.\"},\n", + " {\"persona\": \"Scientist\", \"prompt\": \"Examine the biological and evolutionary perspectives on the function and significance of life.\"},\n", + " {\"persona\": \"Child\", \"prompt\": \"Imagine you're explaining to a curious 7-year-old what makes life special and important.\"}\n", + "]\n", + "\"\"\",\n", + " acceptance_criteria=\"\"\"\n", + "* Consistent with Expected Output:\n", + " * Formats of all JSON sections\n", + " * Data types and formats of all JSON fields\n", + " * Top layer sections\n", + "* Acceptable differences:\n", + " * Differences in field values\n", + " * Extra or missing spaces\n", + " * Extra or missing line breaks at the beginning or end of the output\n", + " * JSON wrapped in backquotes\n", + "\"\"\"\n", + " ),\n", + " AgentState(\n", + " max_output_age=3,\n", + " user_message=\"\"\" 0) {\n", + " echo \"Login successful\";\n", + "} else {\n", + " echo \"Login failed\";\n", + "}\n", + "?>\n", + "\"\"\",\n", + " expected_output=\"\"\"\n", + "security_analysis:\n", + " vulnerabilities:\n", + " - type: SQL Injection\n", + " severity: Critical\n", + " description: Unsanitized user input directly used in SQL query\n", + " mitigation: Use prepared statements or parameterized queries\n", + " - type: Password Storage\n", + " severity: High\n", + " description: Passwords stored in plain text\n", + " mitigation: Use password hashing (e.g., bcrypt) before storage\n", + " additional_issues:\n", + " - Lack of input validation\n", + " - No CSRF protection\n", + " - Potential for timing attacks in login logic\n", + " overall_risk_score: 9.5/10\n", + " recommended_actions:\n", + " - Implement proper input sanitization\n", + " - Use secure password hashing algorithms\n", + " - Add CSRF tokens to forms\n", + " - Consider using a secure authentication library\n", + "\"\"\",\n", + " acceptance_criteria=\"\"\"\n", + "* Consistent with Expected Output:\n", + " * Formats of all YAML sections\n", + " * Data types and formats of all YAML fields\n", + " * Top layer sections\n", + "* Acceptable differences:\n", + " * Differences in field values\n", + " * Extra or missing spaces\n", + " * Extra or missing line breaks at the beginning or end of the output\n", + " * YAML wrapped in backquotes\n", + "\"\"\"\n", + " ),\n", + "]\n", + "\n", + "selected_states = initial_states[3:4]\n", + "\n", + "for initial_state in selected_states:\n", + " print(\"User Message:\\n\", initial_state.user_message)\n", + " print(\"Expected Output:\\n\", initial_state.expected_output)\n", + "\n", + " try:\n", + " config = {\"configurable\": {\"thread_id\": \"1\"}, \"recursion_limit\": 25}\n", + " result = graph.invoke(initial_state, config)\n", + " print(\"Final Result:\", result)\n", + "\n", + " # format system message, break it into multiple lines\n", + " print(\"System Message:\")\n", + " print(result['best_system_message'])\n", + " print(\"Output:\")\n", + " print(result['best_output'])\n", + " except Exception as e:\n", + " # print the error message, saying failed to converge\n", + " print(\"Failed to converge.\")\n", + " print(e)\n", + "\n", + " states = graph.get_state(config)\n", + "\n", + " # if the length of states is bigger than 0, print the best system message and output\n", + " if len(states) > 0:\n", + " result = states[0]\n", + "\n", + " print(\"System Message:\")\n", + " print(result['best_system_message'])\n", + " print(\"Output:\")\n", + " print(result['best_output'])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}