Spaces:
Sleeping
Sleeping
Added LangGraph version demo.
Browse files- .gitignore +1 -0
- langgraph_meta_prompt.ipynb +881 -0
- requirements.txt +38 -7
.gitignore
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
.venv
|
2 |
.vscode
|
3 |
__pycache__
|
|
|
|
1 |
.venv
|
2 |
.vscode
|
3 |
__pycache__
|
4 |
+
.env
|
langgraph_meta_prompt.ipynb
ADDED
@@ -0,0 +1,881 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"from typing import Annotated, Sequence, Dict, Any\n",
|
10 |
+
"\n",
|
11 |
+
"from typing_extensions import TypedDict\n",
|
12 |
+
"\n",
|
13 |
+
"from langchain_openai import ChatOpenAI\n",
|
14 |
+
"\n",
|
15 |
+
"from langgraph.graph import StateGraph, END\n",
|
16 |
+
"from langgraph.graph.message import add_messages\n",
|
17 |
+
"from langchain_core.messages import HumanMessage, SystemMessage, BaseMessage\n",
|
18 |
+
"from langchain_core.prompts import ChatPromptTemplate\n",
|
19 |
+
"from langchain_core.pydantic_v1 import BaseModel\n",
|
20 |
+
"\n",
|
21 |
+
"import operator\n",
|
22 |
+
"import random\n",
|
23 |
+
"\n",
|
24 |
+
"# Can converge correctly\n",
|
25 |
+
"\n",
|
26 |
+
"MODEL_NAME = \"anthropic/claude-3.5-sonnet:beta\"\n",
|
27 |
+
"# MODEL_NAME = \"llama3-70b-8192\"\n",
|
28 |
+
"# MODEL_NAME = \"meta-llama/llama-3-70b-instruct\"\n",
|
29 |
+
"# MODEL_NAME = \"deepseek/deepseek-chat\"\n",
|
30 |
+
"# MODEL_NAME = \"qwen/qwen-2-72b-instruct\"\n",
|
31 |
+
"\n",
|
32 |
+
"# Failed to converge correctly\n",
|
33 |
+
"\n",
|
34 |
+
"# MODEL_NAME = \"llama3-8b-8192\"\n",
|
35 |
+
"# MODEL_NAME = \"mistralai/mixtral-8x22b-instruct\"\n",
|
36 |
+
"# MODEL_NAME = \"anthropic/claude-3-haiku:beta\"\n",
|
37 |
+
"# MODEL_NAME = \"google/gemma-2-9b-it\"\n",
|
38 |
+
"# MODEL_NAME = \"meta-llama/llama-3-8b-instruct\"\n",
|
39 |
+
"# MODEL_NAME = \"microsoft/phi-3-medium-128k-instruct\"\n",
|
40 |
+
"# MODEL_NAME = \"mixtral-8x7b-32768\"\n",
|
41 |
+
"# MODEL_NAME = \"cohere/command-r\"\n",
|
42 |
+
"\n",
|
43 |
+
"llm = ChatOpenAI(model_name=MODEL_NAME, temperature=0.5)\n",
|
44 |
+
"\n",
|
45 |
+
"# EXECUTOR_MODEL = \"microsoft/phi-3-medium-128k-instruct:free\"\n",
|
46 |
+
"# EXECUTOR_MODEL = \"deepseek/deepseek-chat\"\n",
|
47 |
+
"# EXECUTOR_MODEL = \"gemma-7b-it\"\n",
|
48 |
+
"# EXECUTOR_MODEL = \"llama3-8b-8192\"\n",
|
49 |
+
"# EXECUTOR_MODEL = \"llama3-70b-8192\"\n",
|
50 |
+
"# EXECUTOR_MODEL = \"mixtral-8x7b-32768\"\n",
|
51 |
+
"# EXECUTOR_MODEL = \"anthropic/claude-3-haiku:beta\"\n",
|
52 |
+
"EXECUTOR_MODEL = \"meta-llama/llama-3-8b-instruct\"\n",
|
53 |
+
"# EXECUTOR_MODEL = \"google/gemma-2-9b-it\"\n",
|
54 |
+
"\n",
|
55 |
+
"executor_llm = ChatOpenAI(model_name=EXECUTOR_MODEL, temperature=0.01)\n",
|
56 |
+
"\n",
|
57 |
+
"class AgentState(BaseModel):\n",
|
58 |
+
" # messages: Annotated[Sequence[BaseMessage], operator.add] = []\n",
|
59 |
+
" acceptance_criteria: str = \"Exactly text match.\"\n",
|
60 |
+
" user_message: str = \"\"\n",
|
61 |
+
" expected_output: str = \"\"\n",
|
62 |
+
" system_message: str = \"\"\n",
|
63 |
+
" output: str = \"\"\n",
|
64 |
+
" suggestions: str = \"\"\n",
|
65 |
+
" accepted: bool = False\n",
|
66 |
+
" analysis: str = \"\"\n",
|
67 |
+
" best_output: str = \"\"\n",
|
68 |
+
" best_system_message: str = \"\"\n",
|
69 |
+
" best_output_age: int = 0\n",
|
70 |
+
" max_output_age: int = 0\n",
|
71 |
+
"\n",
|
72 |
+
"def prompt_developer(state: AgentState) -> AgentState:\n",
|
73 |
+
" # llm = ChatOpenAI(temperature=0.1)\n",
|
74 |
+
" \n",
|
75 |
+
" if not state.system_message:\n",
|
76 |
+
" # Initial system message creation\n",
|
77 |
+
" initial_prompt = ChatPromptTemplate.from_messages([\n",
|
78 |
+
" (\"system\", \"\"\"# Expert Prompt Engineer\n",
|
79 |
+
"\n",
|
80 |
+
"You are an expert prompt engineer tasked with creating system messages for AI\n",
|
81 |
+
"assistants.\n",
|
82 |
+
"\n",
|
83 |
+
"## Instructions\n",
|
84 |
+
"\n",
|
85 |
+
"1. Create a system message based on the given user message and expected output.\n",
|
86 |
+
"2. Ensure the system message can handle similar user messages.\n",
|
87 |
+
"3. Output only the system message, without any additional content.\n",
|
88 |
+
"4. Expected Output text should not appear in System Message as an example. But\n",
|
89 |
+
" it's OK to use some similar text as an example instead.\n",
|
90 |
+
"5. Format the system message well, with no more than 80 characters per line\n",
|
91 |
+
" (except for raw text).\n",
|
92 |
+
"\n",
|
93 |
+
"## Output\n",
|
94 |
+
"\n",
|
95 |
+
"Provide only the system message, adhering to the above guidelines.\n",
|
96 |
+
"\"\"\"),\n",
|
97 |
+
" (\"human\", \"User message: {user_message}\\nExpected output: {expected_output}\\nCreate a system message that will guide the AI to produce the expected output.\")\n",
|
98 |
+
" ])\n",
|
99 |
+
" response = llm(initial_prompt.format_messages(\n",
|
100 |
+
" user_message=state.user_message, \n",
|
101 |
+
" expected_output=state.expected_output\n",
|
102 |
+
" ))\n",
|
103 |
+
" state.system_message = response.content\n",
|
104 |
+
" else:\n",
|
105 |
+
" # Update system message based on analysis\n",
|
106 |
+
" update_prompt = ChatPromptTemplate.from_messages([\n",
|
107 |
+
" (\"system\", \"\"\"# Expert Prompt Engineer\n",
|
108 |
+
"\n",
|
109 |
+
"You are an expert prompt engineer tasked with updating system messages for AI\n",
|
110 |
+
"assistants.\n",
|
111 |
+
"\n",
|
112 |
+
"## Instructions\n",
|
113 |
+
"\n",
|
114 |
+
"1. Update the system message based on the given suggestion, user message, and\n",
|
115 |
+
" expected output.\n",
|
116 |
+
"2. Ensure the updated system message can handle similar user messages.\n",
|
117 |
+
"3. Output only the updated system message, without any additional content.\n",
|
118 |
+
"4. Expected Output text should not appear in System Message as an example. But\n",
|
119 |
+
" it's OK to use some similar text as an example instead.\n",
|
120 |
+
"5. Format the system message well, with no more than 80 characters per line\n",
|
121 |
+
" (except for raw text).\n",
|
122 |
+
"\n",
|
123 |
+
"## Output\n",
|
124 |
+
"\n",
|
125 |
+
"Provide only the updated system message, adhering to the above guidelines.\n",
|
126 |
+
"\"\"\"),\n",
|
127 |
+
" (\"human\", \"\"\"Current system message: {system_message}\n",
|
128 |
+
"User message: {user_message}\n",
|
129 |
+
"Expected output: {expected_output}\n",
|
130 |
+
"Suggestions: {suggestions}\n",
|
131 |
+
" \n",
|
132 |
+
"Update the system message according to Suggestions, to improve the output and match the expected output more closely.\n",
|
133 |
+
"\"\"\")\n",
|
134 |
+
" ])\n",
|
135 |
+
" response = llm(update_prompt.format_messages(**state.dict()))\n",
|
136 |
+
" state.system_message = response.content\n",
|
137 |
+
" print(state.system_message)\n",
|
138 |
+
"\n",
|
139 |
+
" # state.messages.append(SystemMessage(content=state.system_message))\n",
|
140 |
+
" return state\n",
|
141 |
+
"\n",
|
142 |
+
"def prompt_executor(state: AgentState) -> AgentState:\n",
|
143 |
+
" # llm = ChatOpenAI(temperature=0.1)\n",
|
144 |
+
" messages = [\n",
|
145 |
+
" SystemMessage(content=state.system_message),\n",
|
146 |
+
" HumanMessage(content=state.user_message)\n",
|
147 |
+
" ]\n",
|
148 |
+
" response = executor_llm(messages)\n",
|
149 |
+
" state.output = response.content\n",
|
150 |
+
" # state.messages.append(HumanMessage(content=state.user_message))\n",
|
151 |
+
" # state.messages.append(response)\n",
|
152 |
+
"\n",
|
153 |
+
" print(response.content)\n",
|
154 |
+
"\n",
|
155 |
+
" return state\n",
|
156 |
+
"\n",
|
157 |
+
"def prompt_analyzer(state: AgentState) -> AgentState:\n",
|
158 |
+
" # Updated to compare output and expected output with LLM and format the response\n",
|
159 |
+
" comparison_prompt_template = \"\"\"\n",
|
160 |
+
"You are a text comparing program. You compare the following output texts and provide a\n",
|
161 |
+
"detailed analysis according to `Acceptance Criteria`. Then you decide whether `Actual Output`\n",
|
162 |
+
"is acceptable.\n",
|
163 |
+
"\n",
|
164 |
+
"# Expected Output\n",
|
165 |
+
"\n",
|
166 |
+
"```\n",
|
167 |
+
"{expected_output}\n",
|
168 |
+
"```\n",
|
169 |
+
"\n",
|
170 |
+
"#Actual Output\n",
|
171 |
+
"\n",
|
172 |
+
"```\n",
|
173 |
+
"{output}\n",
|
174 |
+
"```\n",
|
175 |
+
"\n",
|
176 |
+
"----\n",
|
177 |
+
"\n",
|
178 |
+
"Provide your analysis in the following format:\n",
|
179 |
+
"\n",
|
180 |
+
"```\n",
|
181 |
+
"- Acceptable Differences: [List acceptable differences succinctly]\n",
|
182 |
+
"- Unacceptable Differences: [List unacceptable differences succinctly]\n",
|
183 |
+
"- Accept: [Yes/No]\n",
|
184 |
+
"```\n",
|
185 |
+
"\n",
|
186 |
+
"* Compare Expected Output and Actual Output with the guidance of Accept Criteria.\n",
|
187 |
+
"* Only set 'Accept' to 'Yes', if Accept Criteria are all met. Otherwise, set 'Accept' to 'No'.\n",
|
188 |
+
"* List only the acceptable differences according to Accept Criteria in 'acceptable Differences' section.\n",
|
189 |
+
"* List only the unacceptable differences according to Accept Criteria in 'Unacceptable Differences' section.\n",
|
190 |
+
"\n",
|
191 |
+
"# Acceptance Criteria\n",
|
192 |
+
"\n",
|
193 |
+
"```\n",
|
194 |
+
"{acceptance_criteria}\n",
|
195 |
+
"```\n",
|
196 |
+
"\"\"\"\n",
|
197 |
+
"\n",
|
198 |
+
" comparison_prompt = ChatPromptTemplate.from_messages([\n",
|
199 |
+
" (\"system\", comparison_prompt_template)\n",
|
200 |
+
" ])\n",
|
201 |
+
" \n",
|
202 |
+
" # Format the prompt with the current state\n",
|
203 |
+
" formatted_prompt = comparison_prompt.format_messages(**state.dict())\n",
|
204 |
+
" \n",
|
205 |
+
" # Send the prompt to the LLM\n",
|
206 |
+
" response = llm(formatted_prompt)\n",
|
207 |
+
" state.analysis = response.content\n",
|
208 |
+
"\n",
|
209 |
+
" print(response.content)\n",
|
210 |
+
" \n",
|
211 |
+
" try:\n",
|
212 |
+
" # Parse the LLM response to update the state\n",
|
213 |
+
" analysis_result = parse_llm_response(response.content)\n",
|
214 |
+
" \n",
|
215 |
+
" # Update state.matched based on the LLM's analysis\n",
|
216 |
+
" state.accepted = analysis_result['Accept'].lower() == 'yes'\n",
|
217 |
+
" except KeyError:\n",
|
218 |
+
" # If the LLM response is not in the expected format, set matched to False\n",
|
219 |
+
" state.accepted = False\n",
|
220 |
+
" \n",
|
221 |
+
" return state\n",
|
222 |
+
"\n",
|
223 |
+
"def parse_llm_response(response: str) -> dict:\n",
|
224 |
+
" \"\"\"\n",
|
225 |
+
" Parses the LLM response to handle both single-line and multi-line formats for Differences and Suggestions.\n",
|
226 |
+
" \"\"\"\n",
|
227 |
+
" lines = response.split('\\n')\n",
|
228 |
+
" result = {}\n",
|
229 |
+
"\n",
|
230 |
+
" # Process each line\n",
|
231 |
+
" for line in lines:\n",
|
232 |
+
" # skip the spaces before `- `\n",
|
233 |
+
" line = line.strip()\n",
|
234 |
+
" if line.startswith('- Accept:'):\n",
|
235 |
+
" result['Accept'] = line.split(': ')[1].strip()\n",
|
236 |
+
" break\n",
|
237 |
+
"\n",
|
238 |
+
" return result\n",
|
239 |
+
"\n",
|
240 |
+
"def output_history_analyzer(state: AgentState) -> AgentState:\n",
|
241 |
+
" system_message_template = \"\"\"You are a text comparing program. You read the Acceptance Criteria, compare the\n",
|
242 |
+
"compare the exptected output with two different outputs, and decide which one is\n",
|
243 |
+
"more similar to the expected output.\n",
|
244 |
+
"\n",
|
245 |
+
"Output the ID of the output that is more similar to the expected output, with the\n",
|
246 |
+
"following format:\n",
|
247 |
+
" \n",
|
248 |
+
"```\n",
|
249 |
+
"# Better Output ID: [ID]\n",
|
250 |
+
"```\n",
|
251 |
+
"\n",
|
252 |
+
"If both outputs are equally similar to the expected output, output the following:\n",
|
253 |
+
"\n",
|
254 |
+
"```\n",
|
255 |
+
"# Draw\n",
|
256 |
+
"```\n",
|
257 |
+
"\"\"\"\n",
|
258 |
+
" human_message_templates = [\n",
|
259 |
+
" \"\"\"\n",
|
260 |
+
"# Output ID: A\n",
|
261 |
+
"\n",
|
262 |
+
"```\n",
|
263 |
+
"{best_output}\n",
|
264 |
+
"```\n",
|
265 |
+
"\n",
|
266 |
+
"# Output ID: B\n",
|
267 |
+
"\n",
|
268 |
+
"```\n",
|
269 |
+
"{output}\n",
|
270 |
+
"```\n",
|
271 |
+
"\n",
|
272 |
+
"# Acceptance Criteria\n",
|
273 |
+
"\n",
|
274 |
+
"{acceptance_criteria}\n",
|
275 |
+
"\n",
|
276 |
+
"# Expected Output\n",
|
277 |
+
"\n",
|
278 |
+
"```\n",
|
279 |
+
"{expected_output}\n",
|
280 |
+
"```\n",
|
281 |
+
"\"\"\",\n",
|
282 |
+
" \"\"\"\n",
|
283 |
+
"# Output ID: B\n",
|
284 |
+
"\n",
|
285 |
+
"```\n",
|
286 |
+
"{output}\n",
|
287 |
+
"```\n",
|
288 |
+
"\n",
|
289 |
+
"# Output ID: A\n",
|
290 |
+
"\n",
|
291 |
+
"```\n",
|
292 |
+
"{best_output}\n",
|
293 |
+
"```\n",
|
294 |
+
"\n",
|
295 |
+
"# Acceptance Criteria\n",
|
296 |
+
"\n",
|
297 |
+
"{acceptance_criteria}\n",
|
298 |
+
" \n",
|
299 |
+
"# Expected Output\n",
|
300 |
+
"\n",
|
301 |
+
"```\n",
|
302 |
+
"{expected_output}\n",
|
303 |
+
"```\n",
|
304 |
+
"\"\"\"\n",
|
305 |
+
" ]\n",
|
306 |
+
"\n",
|
307 |
+
" # pick a random human message template\n",
|
308 |
+
" output_comparison_prompt_template = ChatPromptTemplate.from_messages([\n",
|
309 |
+
" (\"system\", system_message_template),\n",
|
310 |
+
" (\"human\", human_message_templates[random.randint(0, 1)])\n",
|
311 |
+
" ])\n",
|
312 |
+
"\n",
|
313 |
+
" if (state.best_output is None or state.best_output == \"\") and \\\n",
|
314 |
+
" (state.best_system_message is None or state.best_system_message == \"\"):\n",
|
315 |
+
" state.best_output = state.output\n",
|
316 |
+
" state.best_system_message = state.system_message\n",
|
317 |
+
" state.best_output_age = 0\n",
|
318 |
+
"\n",
|
319 |
+
" return state\n",
|
320 |
+
"\n",
|
321 |
+
" response = llm(output_comparison_prompt_template.format_messages(**state.dict()))\n",
|
322 |
+
"\n",
|
323 |
+
" print(response.content)\n",
|
324 |
+
"\n",
|
325 |
+
" result = parse_output_history_analyzer(response.content, 'A')\n",
|
326 |
+
"\n",
|
327 |
+
" if result == 'A':\n",
|
328 |
+
" state.best_output_age += 1\n",
|
329 |
+
" state.output = state.best_output\n",
|
330 |
+
" state.system_message = state.best_system_message\n",
|
331 |
+
" else:\n",
|
332 |
+
" state.best_output = state.output\n",
|
333 |
+
" state.best_system_message = state.system_message\n",
|
334 |
+
" state.best_output_age = 0\n",
|
335 |
+
"\n",
|
336 |
+
" return state\n",
|
337 |
+
"\n",
|
338 |
+
"def parse_output_history_analyzer(response: str, default_result = None) -> dict:\n",
|
339 |
+
" \"\"\"\n",
|
340 |
+
" Parses the LLM response to handle both single-line and multi-line formats for Differences and Suggestions.\n",
|
341 |
+
" \"\"\"\n",
|
342 |
+
" lines = response.split('\\n')\n",
|
343 |
+
" result = default_result\n",
|
344 |
+
"\n",
|
345 |
+
" # Process each line\n",
|
346 |
+
" for line in lines:\n",
|
347 |
+
" # skip the spaces before `- `\n",
|
348 |
+
" line = line.strip()\n",
|
349 |
+
" if line.startswith('# Better Output ID:'):\n",
|
350 |
+
" result = line.split(': ')[1].strip()\n",
|
351 |
+
" break\n",
|
352 |
+
" elif line.startswith('# Draw'):\n",
|
353 |
+
" result = default_result\n",
|
354 |
+
" break\n",
|
355 |
+
"\n",
|
356 |
+
" return result\n",
|
357 |
+
"\n",
|
358 |
+
"def prompt_suggester(state: AgentState) -> AgentState:\n",
|
359 |
+
" # Updated to compare output and expected output with LLM and format the response\n",
|
360 |
+
" suggester_prompt_template = \"\"\"\n",
|
361 |
+
"Read the following inputs and outputs of an LLM prompt, and also analysis about them.\n",
|
362 |
+
"Then suggest how to improve System Prompt.\n",
|
363 |
+
"\n",
|
364 |
+
"System Prompt:\n",
|
365 |
+
"```\n",
|
366 |
+
"{system_message}\n",
|
367 |
+
"```\n",
|
368 |
+
"User Message:\n",
|
369 |
+
"```\n",
|
370 |
+
"{user_message}\n",
|
371 |
+
"```\n",
|
372 |
+
"Expected Output: \n",
|
373 |
+
"```\n",
|
374 |
+
"{expected_output}\n",
|
375 |
+
"```\n",
|
376 |
+
"Actual Output: \n",
|
377 |
+
"```\n",
|
378 |
+
"{output}\n",
|
379 |
+
"```\n",
|
380 |
+
"\n",
|
381 |
+
"Acceptance Criteria:\n",
|
382 |
+
"```\n",
|
383 |
+
"{acceptance_criteria}\n",
|
384 |
+
"```\n",
|
385 |
+
"\n",
|
386 |
+
"Analysis:\n",
|
387 |
+
"```\n",
|
388 |
+
"{analysis}\n",
|
389 |
+
"```\n",
|
390 |
+
"\n",
|
391 |
+
"* The goal is to improve the System Prompt to match the Expected Output better.\n",
|
392 |
+
"* Ignore all `Acceptable Differences` and focus on `Unacceptable Differences`.\n",
|
393 |
+
"* Provide your suggestions in a Markdown list, nothing else.\n",
|
394 |
+
"* Expected Output text should not appear in System Message as an example. But\n",
|
395 |
+
" it's OK to use some similar text as an example instead.\n",
|
396 |
+
"\"\"\"\n",
|
397 |
+
"\n",
|
398 |
+
" suggester_prompt = ChatPromptTemplate.from_messages([\n",
|
399 |
+
" (\"system\", suggester_prompt_template)\n",
|
400 |
+
" ])\n",
|
401 |
+
" \n",
|
402 |
+
" # Format the prompt with the current state\n",
|
403 |
+
" formatted_prompt = suggester_prompt.format_messages(**state.dict())\n",
|
404 |
+
" \n",
|
405 |
+
" # Send the prompt to the LLM\n",
|
406 |
+
" response = llm(formatted_prompt)\n",
|
407 |
+
" state.suggestions = response.content\n",
|
408 |
+
"\n",
|
409 |
+
" print(response.content)\n",
|
410 |
+
" \n",
|
411 |
+
" return state\n",
|
412 |
+
"\n",
|
413 |
+
"def should_exit_on_max_age(state: AgentState) -> str:\n",
|
414 |
+
" if state.max_output_age <=0:\n",
|
415 |
+
" # always continue if max age is 0\n",
|
416 |
+
" return \"continue\"\n",
|
417 |
+
" \n",
|
418 |
+
" if state.best_output_age >= state.max_output_age:\n",
|
419 |
+
" return END\n",
|
420 |
+
" \n",
|
421 |
+
" return \"continue\"\n",
|
422 |
+
"\n",
|
423 |
+
"def should_exit_on_acceptable_output(state: AgentState) -> str:\n",
|
424 |
+
" if state.accepted:\n",
|
425 |
+
" return END\n",
|
426 |
+
" else:\n",
|
427 |
+
" return \"continue\"\n",
|
428 |
+
"\n",
|
429 |
+
"\n",
|
430 |
+
"workflow = StateGraph(AgentState)\n",
|
431 |
+
"\n",
|
432 |
+
"workflow.add_node(\"prompt_developer\", prompt_developer)\n",
|
433 |
+
"workflow.add_node(\"prompt_executor\", prompt_executor)\n",
|
434 |
+
"workflow.add_node(\"output_history_analyzer\", output_history_analyzer)\n",
|
435 |
+
"workflow.add_node(\"prompt_analyzer\", prompt_analyzer)\n",
|
436 |
+
"workflow.add_node(\"prompt_suggester\", prompt_suggester)\n",
|
437 |
+
"\n",
|
438 |
+
"workflow.set_entry_point(\"prompt_developer\")\n",
|
439 |
+
"\n",
|
440 |
+
"workflow.add_edge(\"prompt_developer\", \"prompt_executor\")\n",
|
441 |
+
"workflow.add_edge(\"prompt_executor\", \"output_history_analyzer\")\n",
|
442 |
+
"\n",
|
443 |
+
"workflow.add_conditional_edges(\n",
|
444 |
+
" \"output_history_analyzer\",\n",
|
445 |
+
" should_exit_on_max_age,\n",
|
446 |
+
" {\n",
|
447 |
+
" \"continue\": \"prompt_analyzer\",\n",
|
448 |
+
" END: END\n",
|
449 |
+
" }\n",
|
450 |
+
")\n",
|
451 |
+
"\n",
|
452 |
+
"workflow.add_conditional_edges(\n",
|
453 |
+
" \"prompt_analyzer\",\n",
|
454 |
+
" should_exit_on_acceptable_output,\n",
|
455 |
+
" {\n",
|
456 |
+
" \"continue\": \"prompt_suggester\",\n",
|
457 |
+
" END: END\n",
|
458 |
+
" }\n",
|
459 |
+
")\n",
|
460 |
+
"\n",
|
461 |
+
"workflow.add_edge(\"prompt_suggester\", \"prompt_developer\")\n",
|
462 |
+
"\n",
|
463 |
+
"graph = workflow.compile()\n"
|
464 |
+
]
|
465 |
+
},
|
466 |
+
{
|
467 |
+
"cell_type": "code",
|
468 |
+
"execution_count": 2,
|
469 |
+
"metadata": {},
|
470 |
+
"outputs": [
|
471 |
+
{
|
472 |
+
"data": {
|
473 |
+
"image/jpeg": "",
|
474 |
+
"text/plain": [
|
475 |
+
"<IPython.core.display.Image object>"
|
476 |
+
]
|
477 |
+
},
|
478 |
+
"metadata": {},
|
479 |
+
"output_type": "display_data"
|
480 |
+
}
|
481 |
+
],
|
482 |
+
"source": [
|
483 |
+
"from IPython.display import Image, display\n",
|
484 |
+
"\n",
|
485 |
+
"try:\n",
|
486 |
+
" display(Image(graph.get_graph().draw_mermaid_png()))\n",
|
487 |
+
"except Exception:\n",
|
488 |
+
" # This requires some extra dependencies and is optional\n",
|
489 |
+
" pass"
|
490 |
+
]
|
491 |
+
},
|
492 |
+
{
|
493 |
+
"cell_type": "code",
|
494 |
+
"execution_count": 3,
|
495 |
+
"metadata": {},
|
496 |
+
"outputs": [
|
497 |
+
{
|
498 |
+
"name": "stdout",
|
499 |
+
"output_type": "stream",
|
500 |
+
"text": [
|
501 |
+
"User Message: (2+8)*3\n",
|
502 |
+
"Expected Output: (2+8)*3\n",
|
503 |
+
"= 10*3\n",
|
504 |
+
"= 30\n",
|
505 |
+
"\n"
|
506 |
+
]
|
507 |
+
},
|
508 |
+
{
|
509 |
+
"name": "stderr",
|
510 |
+
"output_type": "stream",
|
511 |
+
"text": [
|
512 |
+
"/home/yale/work/meta-prompt/.venv/lib/python3.10/site-packages/langchain_core/_api/deprecation.py:139: LangChainDeprecationWarning: The method `BaseChatModel.__call__` was deprecated in langchain-core 0.1.7 and will be removed in 0.3.0. Use invoke instead.\n",
|
513 |
+
" warn_deprecated(\n"
|
514 |
+
]
|
515 |
+
},
|
516 |
+
{
|
517 |
+
"name": "stdout",
|
518 |
+
"output_type": "stream",
|
519 |
+
"text": [
|
520 |
+
"You are a step-by-step math calculator. When given a mathematical\n",
|
521 |
+
"expression:\n",
|
522 |
+
"\n",
|
523 |
+
"1. Display the original expression on the first line.\n",
|
524 |
+
"2. On the next line, show the first step of the calculation, preceded by '='.\n",
|
525 |
+
"3. Continue showing each step on a new line until the final result is reached.\n",
|
526 |
+
"4. Simplify expressions within parentheses before applying operations outside.\n",
|
527 |
+
"5. Show multiplication using the '*' symbol.\n",
|
528 |
+
"6. Do not explain the steps; simply show the calculations.\n",
|
529 |
+
"\n",
|
530 |
+
"Provide the solution using this clear, concise format to help users understand\n",
|
531 |
+
"the problem-solving process.\n",
|
532 |
+
"(2+8)*3\n",
|
533 |
+
"= 2 + 8\n",
|
534 |
+
"= 10\n",
|
535 |
+
"= 10 * 3\n",
|
536 |
+
"= 30\n",
|
537 |
+
"Based on the provided Expected Output, Actual Output, and Acceptance Criteria, here's the analysis:\n",
|
538 |
+
"\n",
|
539 |
+
"```\n",
|
540 |
+
"- Acceptable Differences: \n",
|
541 |
+
" * Extra line break at the end of the Actual Output\n",
|
542 |
+
"\n",
|
543 |
+
"- Unacceptable Differences: \n",
|
544 |
+
" * Actual Output shows intermediate steps (2 + 8 = 10) not present in Expected Output\n",
|
545 |
+
" * Actual Output separates 10 * 3 into a separate step, which is not in Expected Output\n",
|
546 |
+
"\n",
|
547 |
+
"- Accept: No\n",
|
548 |
+
"```\n",
|
549 |
+
"\n",
|
550 |
+
"The Actual Output differs significantly from the Expected Output in terms of content, showing additional steps in the calculation process that are not present in the Expected Output. These differences go beyond the acceptable criteria of extra spaces or line breaks. Therefore, the Actual Output is not acceptable according to the given Acceptance Criteria.\n",
|
551 |
+
"Here are suggestions to improve the System Prompt:\n",
|
552 |
+
"\n",
|
553 |
+
"- Add a specific instruction to simplify expressions within parentheses in a single step, without showing intermediate calculations.\n",
|
554 |
+
"- Include a clear directive to perform multiplication immediately after simplifying parentheses, without separating it into an additional step.\n",
|
555 |
+
"- Provide an example calculation in the prompt that demonstrates the desired output format, such as `(3+2)*4 = 5*4 = 20`.\n",
|
556 |
+
"- Explicitly state that only two lines of calculation should be shown for expressions with a single set of parentheses: one for simplifying the parentheses and one for the final result.\n",
|
557 |
+
"- Remove or modify the instruction about showing each step, as it may encourage unnecessary intermediate steps.\n",
|
558 |
+
"- Add a note that the solution should be as concise as possible, showing only the essential steps.\n",
|
559 |
+
"You are a step-by-step math calculator. When given a mathematical expression:\n",
|
560 |
+
"\n",
|
561 |
+
"1. Display the original expression on the first line.\n",
|
562 |
+
"2. Simplify expressions within parentheses in a single step, without showing\n",
|
563 |
+
" intermediate calculations.\n",
|
564 |
+
"3. Perform multiplication immediately after simplifying parentheses, without\n",
|
565 |
+
" separating it into an additional step.\n",
|
566 |
+
"4. Show the final result on the last line.\n",
|
567 |
+
"5. Use the '*' symbol for multiplication.\n",
|
568 |
+
"6. For expressions with a single set of parentheses, show only two lines of\n",
|
569 |
+
" calculation: one for simplifying the parentheses and one for the final\n",
|
570 |
+
" result.\n",
|
571 |
+
"7. Provide the solution in the most concise format possible, showing only\n",
|
572 |
+
" essential steps.\n",
|
573 |
+
"\n",
|
574 |
+
"Do not explain the steps; simply show the calculations. Here's an example of\n",
|
575 |
+
"the desired output format:\n",
|
576 |
+
"\n",
|
577 |
+
"(3+2)*4\n",
|
578 |
+
"= 5*4\n",
|
579 |
+
"= 20\n",
|
580 |
+
"\n",
|
581 |
+
"This clear, concise format will help users understand the problem-solving\n",
|
582 |
+
"process efficiently.\n",
|
583 |
+
"(2+8)*3\n",
|
584 |
+
"= 10*3\n",
|
585 |
+
"= 30\n",
|
586 |
+
"After comparing the two outputs with the expected output based on the given acceptance criteria, I can conclude:\n",
|
587 |
+
"\n",
|
588 |
+
"# Better Output ID: B\n",
|
589 |
+
"\n",
|
590 |
+
"Output B is more similar to the expected output. It matches the expected output exactly, with the only difference being a missing line break at the end, which is acceptable according to the criteria. Output A, while reaching the same final result, includes additional steps that are not present in the expected output.\n",
|
591 |
+
"Here's the analysis based on the provided Expected Output, Actual Output, and Acceptance Criteria:\n",
|
592 |
+
"\n",
|
593 |
+
"```\n",
|
594 |
+
"- Acceptable Differences: \n",
|
595 |
+
" * Missing line break at the end of the Actual Output.\n",
|
596 |
+
"\n",
|
597 |
+
"- Unacceptable Differences: \n",
|
598 |
+
" [None]\n",
|
599 |
+
"\n",
|
600 |
+
"- Accept: Yes\n",
|
601 |
+
"```\n",
|
602 |
+
"\n",
|
603 |
+
"The Actual Output matches the Expected Output exactly in terms of content and formatting, with the only difference being a missing line break at the end of the Actual Output. This falls under the acceptable differences as per the Acceptance Criteria, which allows for \"Extra or missing line breaks at the beginning or end of the output.\" Therefore, the Actual Output is acceptable.\n",
|
604 |
+
"Final Result: {'acceptance_criteria': '\\n* Exactly text match.\\n* Acceptable differences:\\n * Extra or missing spaces.\\n * Extra or missing line breaks at the beginning or end of the output.\\n', 'user_message': '(2+8)*3', 'expected_output': '(2+8)*3\\n= 10*3\\n= 30\\n', 'system_message': \"You are a step-by-step math calculator. When given a mathematical expression:\\n\\n1. Display the original expression on the first line.\\n2. Simplify expressions within parentheses in a single step, without showing\\n intermediate calculations.\\n3. Perform multiplication immediately after simplifying parentheses, without\\n separating it into an additional step.\\n4. Show the final result on the last line.\\n5. Use the '*' symbol for multiplication.\\n6. For expressions with a single set of parentheses, show only two lines of\\n calculation: one for simplifying the parentheses and one for the final\\n result.\\n7. Provide the solution in the most concise format possible, showing only\\n essential steps.\\n\\nDo not explain the steps; simply show the calculations. Here's an example of\\nthe desired output format:\\n\\n(3+2)*4\\n= 5*4\\n= 20\\n\\nThis clear, concise format will help users understand the problem-solving\\nprocess efficiently.\", 'output': '(2+8)*3\\n= 10*3\\n= 30', 'suggestions': 'Here are suggestions to improve the System Prompt:\\n\\n- Add a specific instruction to simplify expressions within parentheses in a single step, without showing intermediate calculations.\\n- Include a clear directive to perform multiplication immediately after simplifying parentheses, without separating it into an additional step.\\n- Provide an example calculation in the prompt that demonstrates the desired output format, such as `(3+2)*4 = 5*4 = 20`.\\n- Explicitly state that only two lines of calculation should be shown for expressions with a single set of parentheses: one for simplifying the parentheses and one for the final result.\\n- Remove or modify the instruction about showing each step, as it may encourage unnecessary intermediate steps.\\n- Add a note that the solution should be as concise as possible, showing only the essential steps.', 'accepted': True, 'analysis': 'Here\\'s the analysis based on the provided Expected Output, Actual Output, and Acceptance Criteria:\\n\\n```\\n- Acceptable Differences: \\n * Missing line break at the end of the Actual Output.\\n\\n- Unacceptable Differences: \\n [None]\\n\\n- Accept: Yes\\n```\\n\\nThe Actual Output matches the Expected Output exactly in terms of content and formatting, with the only difference being a missing line break at the end of the Actual Output. This falls under the acceptable differences as per the Acceptance Criteria, which allows for \"Extra or missing line breaks at the beginning or end of the output.\" Therefore, the Actual Output is acceptable.', 'best_output': '(2+8)*3\\n= 10*3\\n= 30', 'best_system_message': \"You are a step-by-step math calculator. When given a mathematical expression:\\n\\n1. Display the original expression on the first line.\\n2. Simplify expressions within parentheses in a single step, without showing\\n intermediate calculations.\\n3. Perform multiplication immediately after simplifying parentheses, without\\n separating it into an additional step.\\n4. Show the final result on the last line.\\n5. Use the '*' symbol for multiplication.\\n6. For expressions with a single set of parentheses, show only two lines of\\n calculation: one for simplifying the parentheses and one for the final\\n result.\\n7. Provide the solution in the most concise format possible, showing only\\n essential steps.\\n\\nDo not explain the steps; simply show the calculations. Here's an example of\\nthe desired output format:\\n\\n(3+2)*4\\n= 5*4\\n= 20\\n\\nThis clear, concise format will help users understand the problem-solving\\nprocess efficiently.\", 'best_output_age': 0, 'max_output_age': 3}\n",
|
605 |
+
"System Message:\n",
|
606 |
+
"You are a step-by-step math calculator. When given a mathematical expression:\n",
|
607 |
+
"\n",
|
608 |
+
"1. Display the original expression on the first line.\n",
|
609 |
+
"2. Simplify expressions within parentheses in a single step, without showing\n",
|
610 |
+
" intermediate calculations.\n",
|
611 |
+
"3. Perform multiplication immediately after simplifying parentheses, without\n",
|
612 |
+
" separating it into an additional step.\n",
|
613 |
+
"4. Show the final result on the last line.\n",
|
614 |
+
"5. Use the '*' symbol for multiplication.\n",
|
615 |
+
"6. For expressions with a single set of parentheses, show only two lines of\n",
|
616 |
+
" calculation: one for simplifying the parentheses and one for the final\n",
|
617 |
+
" result.\n",
|
618 |
+
"7. Provide the solution in the most concise format possible, showing only\n",
|
619 |
+
" essential steps.\n",
|
620 |
+
"\n",
|
621 |
+
"Do not explain the steps; simply show the calculations. Here's an example of\n",
|
622 |
+
"the desired output format:\n",
|
623 |
+
"\n",
|
624 |
+
"(3+2)*4\n",
|
625 |
+
"= 5*4\n",
|
626 |
+
"= 20\n",
|
627 |
+
"\n",
|
628 |
+
"This clear, concise format will help users understand the problem-solving\n",
|
629 |
+
"process efficiently.\n",
|
630 |
+
"Output:\n",
|
631 |
+
"(2+8)*3\n",
|
632 |
+
"= 10*3\n",
|
633 |
+
"= 30\n"
|
634 |
+
]
|
635 |
+
}
|
636 |
+
],
|
637 |
+
"source": [
|
638 |
+
"initial_states = [\n",
|
639 |
+
" AgentState(\n",
|
640 |
+
" max_output_age=3,\n",
|
641 |
+
" user_message=\"(2+8)*3\",\n",
|
642 |
+
" expected_output=\"\"\"(2+8)*3\n",
|
643 |
+
"= 10*3\n",
|
644 |
+
"= 30\n",
|
645 |
+
"\"\"\",\n",
|
646 |
+
" acceptance_criteria=\"\"\"\n",
|
647 |
+
"* Exactly text match.\n",
|
648 |
+
"* Acceptable differences:\n",
|
649 |
+
" * Extra or missing spaces.\n",
|
650 |
+
" * Extra or missing line breaks at the beginning or end of the output.\n",
|
651 |
+
"\"\"\"),\n",
|
652 |
+
" AgentState(\n",
|
653 |
+
" max_output_age=4,\n",
|
654 |
+
" user_message=\"\"\"Here is the GDP data in billions of US dollars (USD) for these years:\n",
|
655 |
+
"\n",
|
656 |
+
"Germany:\n",
|
657 |
+
"\n",
|
658 |
+
"2015: $3,368.29 billion\n",
|
659 |
+
"2016: $3,467.79 billion\n",
|
660 |
+
"2017: $3,677.83 billion\n",
|
661 |
+
"2018: $3,946.00 billion\n",
|
662 |
+
"2019: $3,845.03 billion\n",
|
663 |
+
"France:\n",
|
664 |
+
"\n",
|
665 |
+
"2015: $2,423.47 billion\n",
|
666 |
+
"2016: $2,465.12 billion\n",
|
667 |
+
"2017: $2,582.49 billion\n",
|
668 |
+
"2018: $2,787.86 billion\n",
|
669 |
+
"2019: $2,715.52 billion\n",
|
670 |
+
"United Kingdom:\n",
|
671 |
+
"\n",
|
672 |
+
"2015: $2,860.58 billion\n",
|
673 |
+
"2016: $2,650.90 billion\n",
|
674 |
+
"2017: $2,622.43 billion\n",
|
675 |
+
"2018: $2,828.87 billion\n",
|
676 |
+
"2019: $2,829.21 billion\n",
|
677 |
+
"Italy:\n",
|
678 |
+
"\n",
|
679 |
+
"2015: $1,815.72 billion\n",
|
680 |
+
"2016: $1,852.50 billion\n",
|
681 |
+
"2017: $1,937.80 billion\n",
|
682 |
+
"2018: $2,073.90 billion\n",
|
683 |
+
"2019: $1,988.14 billion\n",
|
684 |
+
"Spain:\n",
|
685 |
+
"\n",
|
686 |
+
"2015: $1,199.74 billion\n",
|
687 |
+
"2016: $1,235.95 billion\n",
|
688 |
+
"2017: $1,313.13 billion\n",
|
689 |
+
"2018: $1,426.19 billion\n",
|
690 |
+
"2019: $1,430.38 billion\n",
|
691 |
+
"\"\"\",\n",
|
692 |
+
" expected_output=\"\"\"Year,Germany,France,United Kingdom,Italy,Spain\n",
|
693 |
+
"2016-2015,2.96%,1.71%,-7.35%,2.02%,3.04%\n",
|
694 |
+
"2017-2016,5.08%,4.78%,-1.07%,4.61%,6.23%\n",
|
695 |
+
"2018-2017,7.48%,7.99%,7.89%,7.10%,8.58%\n",
|
696 |
+
"2019-2018,-2.56%,-2.59%,0.01%,-4.11%,0.30%\n",
|
697 |
+
"\"\"\",\n",
|
698 |
+
" acceptance_criteria=\"\"\"\n",
|
699 |
+
"* Strict text matching of the first row and first column.\n",
|
700 |
+
"* Acceptable differences:\n",
|
701 |
+
" * Differences in digital/percentage values in the table, even significant ones.\n",
|
702 |
+
" * Extra or missing spaces.\n",
|
703 |
+
" * Extra or missing line breaks.\n",
|
704 |
+
"\"\"\"),\n",
|
705 |
+
" AgentState(\n",
|
706 |
+
" max_output_age=3,\n",
|
707 |
+
" user_message=\"\"\"\n",
|
708 |
+
"基因序列:ATGGCCATGGCGCCCAGAACTGAGATCAATAGTACCCGTATTAACGGGTGA\n",
|
709 |
+
"物种:大肠杆菌 (Escherichia coli)\n",
|
710 |
+
"\"\"\",\n",
|
711 |
+
" expected_output=\"\"\"\n",
|
712 |
+
"{\n",
|
713 |
+
" \"基因序列分析结果\": {\n",
|
714 |
+
" \"基本信息\": {\n",
|
715 |
+
" \"序列长度\": 54,\n",
|
716 |
+
" \"GC含量\": \"51.85%\"\n",
|
717 |
+
" },\n",
|
718 |
+
" \"核苷酸组成\": {\n",
|
719 |
+
" \"A\": {\"数量\": 12, \"百分比\": \"22.22%\"},\n",
|
720 |
+
" \"T\": {\"数量\": 11, \"百分比\": \"20.37%\"},\n",
|
721 |
+
" \"G\": {\"数量\": 16, \"百分比\": \"29.63%\"},\n",
|
722 |
+
" \"C\": {\"数量\": 15, \"百分比\": \"27.78%\"}\n",
|
723 |
+
" },\n",
|
724 |
+
" \"密码子分析\": {\n",
|
725 |
+
" \"起始密码子\": \"ATG\",\n",
|
726 |
+
" \"终止密码子\": \"TGA\",\n",
|
727 |
+
" \"密码子表\": [\n",
|
728 |
+
" {\"密码子\": \"ATG\", \"氨基酸\": \"甲硫氨酸\", \"位置\": 1},\n",
|
729 |
+
" {\"密码子\": \"GCC\", \"氨基酸\": \"丙氨酸\", \"位置\": 2},\n",
|
730 |
+
" {\"密码子\": \"ATG\", \"氨基酸\": \"甲硫氨酸\", \"位置\": 3},\n",
|
731 |
+
" // ... 其他密码子 ...\n",
|
732 |
+
" {\"密码子\": \"TGA\", \"氨基酸\": \"终止密码子\", \"位置\": 18}\n",
|
733 |
+
" ]\n",
|
734 |
+
" },\n",
|
735 |
+
" \"潜在功能预测\": {\n",
|
736 |
+
" \"蛋白质长度\": 17,\n",
|
737 |
+
" \"可能的功能域\": [\n",
|
738 |
+
" {\"域名\": \"ABC转运蛋白\", \"起始位置\": 5, \"结束位置\": 15, \"置信度\": \"75%\"},\n",
|
739 |
+
" {\"域名\": \"膜蛋白\", \"起始位置\": 1, \"结束位置\": 17, \"置信度\": \"60%\"}\n",
|
740 |
+
" ],\n",
|
741 |
+
" \"二级结构预测\": {\n",
|
742 |
+
" \"α螺旋\": [\"2-8\", \"12-16\"],\n",
|
743 |
+
" \"β折叠\": [\"9-11\"],\n",
|
744 |
+
" \"无规卷曲\": [\"1\", \"17\"]\n",
|
745 |
+
" }\n",
|
746 |
+
" },\n",
|
747 |
+
" \"同源性分析\": {\n",
|
748 |
+
" \"最相似序列\": [\n",
|
749 |
+
" {\n",
|
750 |
+
" \"基因名\": \"abcT\",\n",
|
751 |
+
" \"物种\": \"沙门氏菌 (Salmonella enterica)\",\n",
|
752 |
+
" \"相似度\": \"89%\",\n",
|
753 |
+
" \"E值\": \"3e-25\"\n",
|
754 |
+
" },\n",
|
755 |
+
" {\n",
|
756 |
+
" \"基因名\": \"yojI\",\n",
|
757 |
+
" \"物种\": \"大肠杆菌 (Escherichia coli)\",\n",
|
758 |
+
" \"相似度\": \"95%\",\n",
|
759 |
+
" \"E值\": \"1e-30\"\n",
|
760 |
+
" }\n",
|
761 |
+
" ]\n",
|
762 |
+
" },\n",
|
763 |
+
" \"突变分析\": {\n",
|
764 |
+
" \"SNP位点\": [\n",
|
765 |
+
" {\"位置\": 27, \"野生型\": \"A\", \"突变型\": \"G\", \"氨基酸变化\": \"谷氨酰胺->精氨酸\"},\n",
|
766 |
+
" {\"位置\": 42, \"野生型\": \"C\", \"突变型\": \"T\", \"氨基酸变化\": \"无(同义突变)\"}\n",
|
767 |
+
" ]\n",
|
768 |
+
" }\n",
|
769 |
+
" }\n",
|
770 |
+
"}\n",
|
771 |
+
"\"\"\",\n",
|
772 |
+
" acceptance_criteria=\"\"\"\n",
|
773 |
+
"* Exactly text match.\n",
|
774 |
+
"* Acceptable differences:\n",
|
775 |
+
" * Extra or missing spaces\n",
|
776 |
+
" * Extra or missing line breaks at the beginning or end of the output\n",
|
777 |
+
" * Different sequence length\n",
|
778 |
+
" * Different GC content\n",
|
779 |
+
" * Different nucleotide composition\n",
|
780 |
+
" * Different codon table\n",
|
781 |
+
" * Different potential function prediction\n",
|
782 |
+
" * Different secondary structure prediction\n",
|
783 |
+
" * Different similarity analysis\n",
|
784 |
+
" * Different mutation analysis\n",
|
785 |
+
"\"\"\"),\n",
|
786 |
+
" AgentState(\n",
|
787 |
+
" max_output_age=3,\n",
|
788 |
+
" user_message=\"\"\"\n",
|
789 |
+
"今天下午3点,在北京国家会议中心,阿里巴巴集团董事局主席马云宣布将投资100亿元人民币用于农村电商发展。这一决定受到了与会代表的热烈欢迎,大家认为这将为中国农村经济带来新的机遇。\n",
|
790 |
+
"\"\"\",\n",
|
791 |
+
" expected_output=\"\"\"\n",
|
792 |
+
"{\n",
|
793 |
+
" \"文本分析结果\": {\n",
|
794 |
+
" \"情感分析\": {\n",
|
795 |
+
" \"整体情感\": \"积极\",\n",
|
796 |
+
" \"情感得分\": 0.82,\n",
|
797 |
+
" \"情感细分\": {\n",
|
798 |
+
" \"乐观\": 0.75,\n",
|
799 |
+
" \"兴奋\": 0.60,\n",
|
800 |
+
" \"期待\": 0.85\n",
|
801 |
+
" }\n",
|
802 |
+
" },\n",
|
803 |
+
" \"实体识别\": [\n",
|
804 |
+
" {\"实体\": \"北京\", \"类型\": \"地点\", \"起始位置\": 7, \"结束位置\": 9},\n",
|
805 |
+
" {\"实体\": \"国家会议中心\", \"类型\": \"地点\", \"起始位置\": 9, \"结束位置\": 15},\n",
|
806 |
+
" {\"实体\": \"阿里巴巴集团\", \"类型\": \"组织\", \"起始位置\": 16, \"结束位置\": 22},\n",
|
807 |
+
" {\"实体\": \"马云\", \"类型\": \"人物\", \"起始位置\": 26, \"结束位置\": 28},\n",
|
808 |
+
" {\"实体\": \"100亿元\", \"类型\": \"金额\", \"起始位置\": 32, \"结束位置\": 37},\n",
|
809 |
+
" {\"实体\": \"人民币\", \"类型\": \"货币\", \"起始位置\": 37, \"结束位置\": 40},\n",
|
810 |
+
" {\"实体\": \"中国\", \"类型\": \"地点\", \"起始位置\": 71, \"结束位置\": 73}\n",
|
811 |
+
" ],\n",
|
812 |
+
" \"关键词提取\": [\n",
|
813 |
+
" {\"关键词\": \"农村电商\", \"权重\": 0.95},\n",
|
814 |
+
" {\"关键词\": \"马云\", \"权重\": 0.85},\n",
|
815 |
+
" {\"关键词\": \"投资\", \"权重\": 0.80},\n",
|
816 |
+
" {\"关键词\": \"阿里巴巴\", \"权重\": 0.75},\n",
|
817 |
+
" {\"关键词\": \"经济机遇\", \"权重\": 0.70}\n",
|
818 |
+
" ]\n",
|
819 |
+
" }\n",
|
820 |
+
"}\n",
|
821 |
+
"\"\"\",\n",
|
822 |
+
" acceptance_criteria=\"\"\"\n",
|
823 |
+
"* Exactly text match, except for the numerical values.\n",
|
824 |
+
"* Acceptable differences:\n",
|
825 |
+
" * Differences in digital values in the table.\n",
|
826 |
+
" * Extra or missing spaces.\n",
|
827 |
+
" * Extra or missing line breaks at the beginning or end of the output.\n",
|
828 |
+
" * Extra or missing 3rd or 4th layer sections or items.\n",
|
829 |
+
" * Differences in section/item orders.\n",
|
830 |
+
"\"\"\")\n",
|
831 |
+
"]\n",
|
832 |
+
"\n",
|
833 |
+
"selected_states = [initial_states[0]]\n",
|
834 |
+
"\n",
|
835 |
+
"for initial_state in selected_states:\n",
|
836 |
+
" print(\"User Message:\", initial_state.user_message)\n",
|
837 |
+
" print(\"Expected Output:\", initial_state.expected_output)\n",
|
838 |
+
"\n",
|
839 |
+
" try: \n",
|
840 |
+
" result = graph.invoke(initial_state, {\"recursion_limit\": 100})\n",
|
841 |
+
" print(\"Final Result:\", result)\n",
|
842 |
+
"\n",
|
843 |
+
" # format system message, break it into multiple lines\n",
|
844 |
+
" print(\"System Message:\")\n",
|
845 |
+
" print(result['best_system_message'])\n",
|
846 |
+
" print(\"Output:\")\n",
|
847 |
+
" print(result['best_output'])\n",
|
848 |
+
" except Exception as e:\n",
|
849 |
+
" # print the error message, saying failed to converge\n",
|
850 |
+
" print(\"Failed to converge.\")\n",
|
851 |
+
" print(e)\n",
|
852 |
+
"\n",
|
853 |
+
" print(\"System Message:\")\n",
|
854 |
+
" print(result['best_system_message'])\n",
|
855 |
+
" print(\"Output:\")\n",
|
856 |
+
" print(result['best_output'])"
|
857 |
+
]
|
858 |
+
}
|
859 |
+
],
|
860 |
+
"metadata": {
|
861 |
+
"kernelspec": {
|
862 |
+
"display_name": ".venv",
|
863 |
+
"language": "python",
|
864 |
+
"name": "python3"
|
865 |
+
},
|
866 |
+
"language_info": {
|
867 |
+
"codemirror_mode": {
|
868 |
+
"name": "ipython",
|
869 |
+
"version": 3
|
870 |
+
},
|
871 |
+
"file_extension": ".py",
|
872 |
+
"mimetype": "text/x-python",
|
873 |
+
"name": "python",
|
874 |
+
"nbconvert_exporter": "python",
|
875 |
+
"pygments_lexer": "ipython3",
|
876 |
+
"version": "3.10.12"
|
877 |
+
}
|
878 |
+
},
|
879 |
+
"nbformat": 4,
|
880 |
+
"nbformat_minor": 2
|
881 |
+
}
|
requirements.txt
CHANGED
@@ -4,15 +4,21 @@ aiosignal==1.3.1
|
|
4 |
altair==5.1.1
|
5 |
annotated-types==0.5.0
|
6 |
anyio==3.7.1
|
|
|
7 |
async-timeout==4.0.3
|
8 |
attrs==23.1.0
|
9 |
certifi==2023.7.22
|
10 |
charset-normalizer==3.2.0
|
11 |
click==8.1.7
|
|
|
12 |
contourpy==1.1.1
|
13 |
cycler==0.11.0
|
14 |
dataclasses-json==0.6.0
|
15 |
-
|
|
|
|
|
|
|
|
|
16 |
fastapi==0.103.1
|
17 |
ffmpy==0.3.1
|
18 |
filelock==3.12.4
|
@@ -28,36 +34,56 @@ httpx==0.25.0
|
|
28 |
huggingface-hub==0.17.2
|
29 |
idna==3.4
|
30 |
importlib-resources==6.1.0
|
|
|
|
|
|
|
31 |
Jinja2==3.1.2
|
32 |
joblib==1.3.2
|
33 |
jsonpatch==1.33
|
34 |
jsonpointer==2.4
|
35 |
jsonschema==4.19.1
|
36 |
jsonschema-specifications==2023.7.1
|
|
|
|
|
37 |
kiwisolver==1.4.5
|
38 |
langchain==0.0.300
|
39 |
-
|
|
|
|
|
|
|
40 |
MarkupSafe==2.1.3
|
41 |
marshmallow==3.20.1
|
42 |
matplotlib==3.8.0
|
|
|
43 |
multidict==6.0.4
|
44 |
mypy-extensions==1.0.0
|
|
|
45 |
numexpr==2.8.6
|
46 |
numpy==1.26.0
|
47 |
-
openai==
|
48 |
-
orjson==3.
|
49 |
-
packaging==
|
50 |
pandas==2.1.1
|
|
|
|
|
51 |
Pillow==10.0.1
|
|
|
|
|
|
|
|
|
|
|
52 |
pydantic==2.3.0
|
53 |
pydantic_core==2.6.3
|
54 |
pydub==0.25.1
|
|
|
55 |
pyparsing==3.1.1
|
56 |
-
python-dateutil==2.
|
57 |
python-multipart==0.0.6
|
58 |
pytz==2023.3.post1
|
59 |
PyYAML==6.0.1
|
|
|
60 |
referencing==0.30.2
|
|
|
61 |
requests==2.31.0
|
62 |
rpds-py==0.10.3
|
63 |
scikit-learn==1.3.1
|
@@ -66,15 +92,20 @@ semantic-version==2.10.0
|
|
66 |
six==1.16.0
|
67 |
sniffio==1.3.0
|
68 |
SQLAlchemy==2.0.21
|
|
|
69 |
starlette==0.27.0
|
70 |
tenacity==8.2.3
|
71 |
threadpoolctl==3.2.0
|
|
|
72 |
toolz==0.12.0
|
|
|
73 |
tqdm==4.66.1
|
|
|
74 |
typing-inspect==0.9.0
|
75 |
-
typing_extensions==4.
|
76 |
tzdata==2023.3
|
77 |
urllib3==2.0.5
|
78 |
uvicorn==0.23.2
|
|
|
79 |
websockets==11.0.3
|
80 |
yarl==1.9.2
|
|
|
4 |
altair==5.1.1
|
5 |
annotated-types==0.5.0
|
6 |
anyio==3.7.1
|
7 |
+
asttokens==2.4.1
|
8 |
async-timeout==4.0.3
|
9 |
attrs==23.1.0
|
10 |
certifi==2023.7.22
|
11 |
charset-normalizer==3.2.0
|
12 |
click==8.1.7
|
13 |
+
comm==0.2.2
|
14 |
contourpy==1.1.1
|
15 |
cycler==0.11.0
|
16 |
dataclasses-json==0.6.0
|
17 |
+
debugpy==1.8.2
|
18 |
+
decorator==5.1.1
|
19 |
+
distro==1.9.0
|
20 |
+
exceptiongroup==1.2.1
|
21 |
+
executing==2.0.1
|
22 |
fastapi==0.103.1
|
23 |
ffmpy==0.3.1
|
24 |
filelock==3.12.4
|
|
|
34 |
huggingface-hub==0.17.2
|
35 |
idna==3.4
|
36 |
importlib-resources==6.1.0
|
37 |
+
ipykernel==6.29.4
|
38 |
+
ipython==8.26.0
|
39 |
+
jedi==0.19.1
|
40 |
Jinja2==3.1.2
|
41 |
joblib==1.3.2
|
42 |
jsonpatch==1.33
|
43 |
jsonpointer==2.4
|
44 |
jsonschema==4.19.1
|
45 |
jsonschema-specifications==2023.7.1
|
46 |
+
jupyter_client==8.6.2
|
47 |
+
jupyter_core==5.7.2
|
48 |
kiwisolver==1.4.5
|
49 |
langchain==0.0.300
|
50 |
+
langchain-core==0.2.10
|
51 |
+
langchain-openai==0.1.13
|
52 |
+
langgraph==0.1.4
|
53 |
+
langsmith==0.1.82
|
54 |
MarkupSafe==2.1.3
|
55 |
marshmallow==3.20.1
|
56 |
matplotlib==3.8.0
|
57 |
+
matplotlib-inline==0.1.7
|
58 |
multidict==6.0.4
|
59 |
mypy-extensions==1.0.0
|
60 |
+
nest-asyncio==1.6.0
|
61 |
numexpr==2.8.6
|
62 |
numpy==1.26.0
|
63 |
+
openai==1.35.7
|
64 |
+
orjson==3.10.5
|
65 |
+
packaging==24.1
|
66 |
pandas==2.1.1
|
67 |
+
parso==0.8.4
|
68 |
+
pexpect==4.9.0
|
69 |
Pillow==10.0.1
|
70 |
+
platformdirs==4.2.2
|
71 |
+
prompt_toolkit==3.0.47
|
72 |
+
psutil==6.0.0
|
73 |
+
ptyprocess==0.7.0
|
74 |
+
pure-eval==0.2.2
|
75 |
pydantic==2.3.0
|
76 |
pydantic_core==2.6.3
|
77 |
pydub==0.25.1
|
78 |
+
Pygments==2.18.0
|
79 |
pyparsing==3.1.1
|
80 |
+
python-dateutil==2.9.0.post0
|
81 |
python-multipart==0.0.6
|
82 |
pytz==2023.3.post1
|
83 |
PyYAML==6.0.1
|
84 |
+
pyzmq==26.0.3
|
85 |
referencing==0.30.2
|
86 |
+
regex==2024.5.15
|
87 |
requests==2.31.0
|
88 |
rpds-py==0.10.3
|
89 |
scikit-learn==1.3.1
|
|
|
92 |
six==1.16.0
|
93 |
sniffio==1.3.0
|
94 |
SQLAlchemy==2.0.21
|
95 |
+
stack-data==0.6.3
|
96 |
starlette==0.27.0
|
97 |
tenacity==8.2.3
|
98 |
threadpoolctl==3.2.0
|
99 |
+
tiktoken==0.7.0
|
100 |
toolz==0.12.0
|
101 |
+
tornado==6.4.1
|
102 |
tqdm==4.66.1
|
103 |
+
traitlets==5.14.3
|
104 |
typing-inspect==0.9.0
|
105 |
+
typing_extensions==4.12.2
|
106 |
tzdata==2023.3
|
107 |
urllib3==2.0.5
|
108 |
uvicorn==0.23.2
|
109 |
+
wcwidth==0.2.13
|
110 |
websockets==11.0.3
|
111 |
yarl==1.9.2
|