import os
import tempfile
import uuid
from openai_server.backend_utils import structure_to_messages, run_download_api_all
from openai_server.agent_utils import get_ret_dict_and_handle_files
from openai_server.agent_prompting import get_full_system_prompt, planning_prompt, planning_final_prompt, \
get_agent_tools
from openai_server.autogen_utils import get_autogen_use_planning_prompt
def run_autogen_2agent(query=None,
visible_models=None,
stream_output=None,
max_new_tokens=None,
authorization=None,
chat_conversation=None,
text_context_list=None,
system_prompt=None,
image_file=None,
# autogen/agent specific parameters
agent_type=None,
agent_accuracy=None,
agent_chat_history=None,
agent_files=None,
agent_work_dir=None,
max_stream_length=None,
max_memory_usage=None,
autogen_use_planning_prompt=None,
autogen_stop_docker_executor=None,
autogen_run_code_in_docker=None,
autogen_max_consecutive_auto_reply=None,
autogen_max_turns=None,
autogen_timeout=None,
autogen_cache_seed=None,
agent_venv_dir=None,
agent_code_writer_system_message=None,
agent_system_site_packages=None,
autogen_code_restrictions_level=None,
autogen_silent_exchange=None,
client_metadata=None,
agent_verbose=None) -> dict:
if client_metadata:
print("BEGIN 2AGENT: client_metadata: %s" % client_metadata, flush=True)
assert agent_type in ['autogen_2agent', 'auto'], "Invalid agent_type: %s" % agent_type
# raise openai.BadRequestError("Testing Error Handling")
# raise ValueError("Testing Error Handling")
# handle parameters from chatAPI and OpenAI -> h2oGPT transcription versions
assert visible_models is not None, "No visible_models specified"
model = visible_models # transcribe early
if stream_output is None:
stream_output = False
assert max_new_tokens is not None, "No max_new_tokens specified"
# handle AutoGen specific parameters
if autogen_stop_docker_executor is None:
autogen_stop_docker_executor = False
if autogen_run_code_in_docker is None:
autogen_run_code_in_docker = False
if autogen_max_consecutive_auto_reply is None:
autogen_max_consecutive_auto_reply = 40
if autogen_max_turns is None:
autogen_max_turns = 40
if autogen_timeout is None:
autogen_timeout = 120
if agent_system_site_packages is None:
agent_system_site_packages = True
if autogen_code_restrictions_level is None:
autogen_code_restrictions_level = 2
if autogen_silent_exchange is None:
autogen_silent_exchange = True
if max_stream_length is None:
max_stream_length = 4096
if max_memory_usage is None:
# per-execution process maximum memory usage
max_memory_usage = 16 * 1024**3 # 16 GB
if agent_chat_history is None:
agent_chat_history = []
if agent_files is None:
agent_files = []
if agent_verbose is None:
agent_verbose = False
if agent_verbose:
print("AutoGen using model=%s." % model, flush=True)
if agent_work_dir is None:
# Create a temporary directory to store the code files.
# temp_dir = tempfile.TemporaryDirectory().name
agent_work_dir = tempfile.mkdtemp()
if agent_files:
# assume list of file_ids for use with File API
run_download_api_all(agent_files, authorization, agent_work_dir)
# iostream = IOStream.get_default()
# iostream.print("\033[32m", end="")
path_agent_tools, list_dir = get_agent_tools()
if agent_accuracy is None:
agent_accuracy = 'standard'
agent_accuracy_enum = ['quick', 'basic', 'standard', 'maximum']
assert agent_accuracy in agent_accuracy_enum, "Invalid agent_accuracy: %s" % agent_accuracy
if agent_accuracy == 'quick':
agent_tools_usage_hard_limits = {k: 1 for k in list_dir}
agent_tools_usage_soft_limits = {k: 1 for k in list_dir}
extra_user_prompt = """Do not verify your response, do not check generated plots or images using the ask_question_about_image tool."""
initial_confidence_level = 1
if autogen_use_planning_prompt is None:
autogen_use_planning_prompt = False
elif agent_accuracy == 'basic':
agent_tools_usage_hard_limits = {k: 3 for k in list_dir}
agent_tools_usage_soft_limits = {k: 2 for k in list_dir}
extra_user_prompt = """Perform only basic level of verification and basic quality checks on your response. Files you make and your response can be basic."""
initial_confidence_level = 1
if autogen_use_planning_prompt is None:
autogen_use_planning_prompt = False
elif agent_accuracy == 'standard':
agent_tools_usage_hard_limits = dict(ask_question_about_image=5)
agent_tools_usage_soft_limits = {k: 5 for k in list_dir}
extra_user_prompt = ""
initial_confidence_level = 0
if autogen_use_planning_prompt is None:
autogen_use_planning_prompt = get_autogen_use_planning_prompt(model)
elif agent_accuracy == 'maximum':
agent_tools_usage_hard_limits = dict(ask_question_about_image=10)
agent_tools_usage_soft_limits = {}
extra_user_prompt = ""
initial_confidence_level = 0
if autogen_use_planning_prompt is None:
autogen_use_planning_prompt = get_autogen_use_planning_prompt(model)
else:
raise ValueError("Invalid agent_accuracy: %s" % agent_accuracy)
# assume by default that if have agent history, continuing with task, not starting new one
if agent_chat_history:
autogen_use_planning_prompt = False
if extra_user_prompt:
query = f"""\n{extra_user_prompt}\n\n\n""" + query
from openai_server.autogen_utils import get_code_executor
if agent_venv_dir is None:
username = str(uuid.uuid4())
agent_venv_dir = ".venv_%s" % username
executor = get_code_executor(
autogen_run_code_in_docker=autogen_run_code_in_docker,
autogen_timeout=autogen_timeout,
agent_system_site_packages=agent_system_site_packages,
autogen_code_restrictions_level=autogen_code_restrictions_level,
agent_work_dir=agent_work_dir,
agent_venv_dir=agent_venv_dir,
agent_tools_usage_hard_limits=agent_tools_usage_hard_limits,
agent_tools_usage_soft_limits=agent_tools_usage_soft_limits,
max_stream_length=max_stream_length,
max_memory_usage=max_memory_usage,
)
code_executor_kwargs = dict(
llm_config=False, # Turn off LLM for this agent.
code_execution_config={"executor": executor}, # Use the local command line code executor.
human_input_mode="NEVER", # Always take human input for this agent for safety.
# NOTE: no termination message, just triggered by executable code blocks present or not
# is_termination_msg=terminate_message_func,
max_consecutive_auto_reply=autogen_max_consecutive_auto_reply,
# max_turns is max times allowed executed some code, should be autogen_max_turns in general
max_turns=autogen_max_turns,
initial_confidence_level=initial_confidence_level,
)
from openai_server.autogen_utils import H2OConversableAgent
code_executor_agent = H2OConversableAgent("code_executor_agent", **code_executor_kwargs)
# FIXME:
# Auto-pip install
# Auto-return file list in each turn
base_url = os.environ['H2OGPT_OPENAI_BASE_URL'] # must exist
api_key = os.environ['H2OGPT_OPENAI_API_KEY'] # must exist
if agent_verbose:
print("base_url: %s" % base_url)
print("max_tokens: %s" % max_new_tokens)
system_message, internal_file_names, system_message_parts = \
get_full_system_prompt(agent_code_writer_system_message,
agent_system_site_packages, system_prompt,
base_url,
api_key, model, text_context_list, image_file,
agent_work_dir, query, autogen_timeout)
enable_caching = True
def code_writer_terminate_func(msg):
# In case code_writer_agent just passed a chatty answer without mentioned,
# then code_executor will return empty string as response (since there was no code block to execute).
# So at this point, we need to terminate the chat otherwise code_writer_agent will keep on chatting.
return isinstance(msg, dict) and msg.get('content', '') == ''
code_writer_kwargs = dict(system_message=system_message,
llm_config={'timeout': autogen_timeout,
'extra_body': dict(enable_caching=enable_caching,
client_metadata=client_metadata,
),
"config_list": [{"model": model,
"api_key": api_key,
"base_url": base_url,
"stream": stream_output,
'max_tokens': max_new_tokens,
'cache_seed': autogen_cache_seed,
}]
},
code_execution_config=False, # Turn off code execution for this agent.
human_input_mode="NEVER",
is_termination_msg=code_writer_terminate_func,
max_consecutive_auto_reply=autogen_max_consecutive_auto_reply,
)
code_writer_agent = H2OConversableAgent("code_writer_agent", **code_writer_kwargs)
planning_messages = []
chat_result_planning = None
if autogen_use_planning_prompt:
# setup planning agents
code_writer_kwargs_planning = code_writer_kwargs.copy()
# terminate immediately
# Note: max_turns and initial_confidence_level not relevant except for code execution agent
code_writer_kwargs_update = dict(max_consecutive_auto_reply=1)
# is_termination_msg=lambda x: True
code_writer_kwargs_planning.update(code_writer_kwargs_update)
code_writer_agent_planning = H2OConversableAgent("code_writer_agent", **code_writer_kwargs_planning)
chat_kwargs = dict(recipient=code_writer_agent_planning,
max_turns=1,
message=planning_prompt(query),
cache=None,
silent=autogen_silent_exchange,
clear_history=False,
)
code_executor_kwargs_planning = code_executor_kwargs.copy()
code_executor_kwargs_planning.update(dict(
max_turns=2,
initial_confidence_level=1,
))
code_executor_agent_planning = H2OConversableAgent("code_executor_agent", **code_executor_kwargs_planning)
chat_result_planning = code_executor_agent_planning.initiate_chat(**chat_kwargs)
# transfer planning result to main agents
if hasattr(chat_result_planning, 'chat_history') and chat_result_planning.chat_history:
planning_messages = chat_result_planning.chat_history
for message in planning_messages:
if 'content' in message:
message['content'] = message['content'].replace('', '').replace('ENDOFTURN', '')
if 'role' in message and message['role'] == 'assistant':
# replace prompt
message['content'] = planning_final_prompt(query)
# apply chat history
if chat_conversation or planning_messages or agent_chat_history:
chat_messages = []
# some high-level chat history
if chat_conversation:
chat_messages.extend(structure_to_messages(None, None, chat_conversation, None))
# pre-append planning
chat_messages.extend(planning_messages)
# actual internal agent chat history
if agent_chat_history:
chat_messages.extend(agent_chat_history)
# apply
for message in chat_messages:
if message['role'] == 'user':
code_writer_agent.send(message['content'], code_executor_agent, request_reply=False, silent=True)
if message['role'] == 'assistant':
code_executor_agent.send(message['content'], code_writer_agent, request_reply=False, silent=True)
chat_kwargs = dict(recipient=code_writer_agent,
max_turns=autogen_max_turns,
message=query,
cache=None,
silent=autogen_silent_exchange,
clear_history=False,
)
if autogen_cache_seed:
from autogen import Cache
# Use DiskCache as cache
cache_root_path = "./autogen_cache"
if not os.path.exists(cache_root_path):
os.makedirs(cache_root_path, exist_ok=True)
with Cache.disk(cache_seed=autogen_cache_seed, cache_path_root=cache_root_path) as cache:
chat_kwargs.update(dict(cache=cache))
chat_result = code_executor_agent.initiate_chat(**chat_kwargs)
else:
chat_result = code_executor_agent.initiate_chat(**chat_kwargs)
if client_metadata:
print("END 2AGENT: client_metadata: %s" % client_metadata, flush=True)
ret_dict = get_ret_dict_and_handle_files(chat_result,
chat_result_planning,
model,
agent_work_dir, agent_verbose, internal_file_names, authorization,
autogen_run_code_in_docker, autogen_stop_docker_executor, executor,
agent_venv_dir, agent_code_writer_system_message,
agent_system_site_packages,
system_message_parts,
autogen_code_restrictions_level, autogen_silent_exchange,
agent_accuracy,
client_metadata=client_metadata)
if client_metadata:
print("END FILES FOR 2AGENT: client_metadata: %s" % client_metadata, flush=True)
return ret_dict