File size: 15,453 Bytes
3943768
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
import os
import tempfile
import uuid

from openai_server.backend_utils import structure_to_messages, run_download_api_all
from openai_server.agent_utils import get_ret_dict_and_handle_files
from openai_server.agent_prompting import get_full_system_prompt, planning_prompt, planning_final_prompt, \
    get_agent_tools

from openai_server.autogen_utils import get_autogen_use_planning_prompt


def run_autogen_2agent(query=None,
                       visible_models=None,
                       stream_output=None,
                       max_new_tokens=None,
                       authorization=None,
                       chat_conversation=None,
                       text_context_list=None,
                       system_prompt=None,
                       image_file=None,
                       # autogen/agent specific parameters
                       agent_type=None,
                       agent_accuracy=None,
                       agent_chat_history=None,
                       agent_files=None,
                       agent_work_dir=None,
                       max_stream_length=None,
                       max_memory_usage=None,
                       autogen_use_planning_prompt=None,
                       autogen_stop_docker_executor=None,
                       autogen_run_code_in_docker=None,
                       autogen_max_consecutive_auto_reply=None,
                       autogen_max_turns=None,
                       autogen_timeout=None,
                       autogen_cache_seed=None,
                       agent_venv_dir=None,
                       agent_code_writer_system_message=None,
                       agent_system_site_packages=None,
                       autogen_code_restrictions_level=None,
                       autogen_silent_exchange=None,
                       client_metadata=None,
                       agent_verbose=None) -> dict:
    if client_metadata:
        print("BEGIN 2AGENT: client_metadata: %s" % client_metadata, flush=True)
    assert agent_type in ['autogen_2agent', 'auto'], "Invalid agent_type: %s" % agent_type
    # raise openai.BadRequestError("Testing Error Handling")
    # raise ValueError("Testing Error Handling")

    # handle parameters from chatAPI and OpenAI -> h2oGPT transcription versions
    assert visible_models is not None, "No visible_models specified"
    model = visible_models  # transcribe early

    if stream_output is None:
        stream_output = False
    assert max_new_tokens is not None, "No max_new_tokens specified"

    # handle AutoGen specific parameters
    if autogen_stop_docker_executor is None:
        autogen_stop_docker_executor = False
    if autogen_run_code_in_docker is None:
        autogen_run_code_in_docker = False
    if autogen_max_consecutive_auto_reply is None:
        autogen_max_consecutive_auto_reply = 40
    if autogen_max_turns is None:
        autogen_max_turns = 40
    if autogen_timeout is None:
        autogen_timeout = 120
    if agent_system_site_packages is None:
        agent_system_site_packages = True
    if autogen_code_restrictions_level is None:
        autogen_code_restrictions_level = 2
    if autogen_silent_exchange is None:
        autogen_silent_exchange = True
    if max_stream_length is None:
        max_stream_length = 4096
    if max_memory_usage is None:
        # per-execution process maximum memory usage
        max_memory_usage = 16 * 1024**3  # 16 GB
    if agent_chat_history is None:
        agent_chat_history = []
    if agent_files is None:
        agent_files = []
    if agent_verbose is None:
        agent_verbose = False
    if agent_verbose:
        print("AutoGen using model=%s." % model, flush=True)

    if agent_work_dir is None:
        # Create a temporary directory to store the code files.
        # temp_dir = tempfile.TemporaryDirectory().name
        agent_work_dir = tempfile.mkdtemp()

    if agent_files:
        # assume list of file_ids for use with File API
        run_download_api_all(agent_files, authorization, agent_work_dir)

    # iostream = IOStream.get_default()
    # iostream.print("\033[32m", end="")

    path_agent_tools, list_dir = get_agent_tools()

    if agent_accuracy is None:
        agent_accuracy = 'standard'
    agent_accuracy_enum = ['quick', 'basic', 'standard', 'maximum']
    assert agent_accuracy in agent_accuracy_enum, "Invalid agent_accuracy: %s" % agent_accuracy

    if agent_accuracy == 'quick':
        agent_tools_usage_hard_limits = {k: 1 for k in list_dir}
        agent_tools_usage_soft_limits = {k: 1 for k in list_dir}
        extra_user_prompt = """Do not verify your response, do not check generated plots or images using the ask_question_about_image tool."""
        initial_confidence_level = 1
        if autogen_use_planning_prompt is None:
            autogen_use_planning_prompt = False
    elif agent_accuracy == 'basic':
        agent_tools_usage_hard_limits = {k: 3 for k in list_dir}
        agent_tools_usage_soft_limits = {k: 2 for k in list_dir}
        extra_user_prompt = """Perform only basic level of verification and basic quality checks on your response.  Files you make and your response can be basic."""
        initial_confidence_level = 1
        if autogen_use_planning_prompt is None:
            autogen_use_planning_prompt = False
    elif agent_accuracy == 'standard':
        agent_tools_usage_hard_limits = dict(ask_question_about_image=5)
        agent_tools_usage_soft_limits = {k: 5 for k in list_dir}
        extra_user_prompt = ""
        initial_confidence_level = 0
        if autogen_use_planning_prompt is None:
            autogen_use_planning_prompt = get_autogen_use_planning_prompt(model)
    elif agent_accuracy == 'maximum':
        agent_tools_usage_hard_limits = dict(ask_question_about_image=10)
        agent_tools_usage_soft_limits = {}
        extra_user_prompt = ""
        initial_confidence_level = 0
        if autogen_use_planning_prompt is None:
            autogen_use_planning_prompt = get_autogen_use_planning_prompt(model)
    else:
        raise ValueError("Invalid agent_accuracy: %s" % agent_accuracy)

    # assume by default that if have agent history, continuing with task, not starting new one
    if agent_chat_history:
        autogen_use_planning_prompt = False

    if extra_user_prompt:
        query = f"""<extra_query_conditions>\n{extra_user_prompt}\n</extra_query_conditions>\n\n""" + query

    from openai_server.autogen_utils import get_code_executor
    if agent_venv_dir is None:
        username = str(uuid.uuid4())
        agent_venv_dir = ".venv_%s" % username

    executor = get_code_executor(
        autogen_run_code_in_docker=autogen_run_code_in_docker,
        autogen_timeout=autogen_timeout,
        agent_system_site_packages=agent_system_site_packages,
        autogen_code_restrictions_level=autogen_code_restrictions_level,
        agent_work_dir=agent_work_dir,
        agent_venv_dir=agent_venv_dir,
        agent_tools_usage_hard_limits=agent_tools_usage_hard_limits,
        agent_tools_usage_soft_limits=agent_tools_usage_soft_limits,
        max_stream_length=max_stream_length,
        max_memory_usage=max_memory_usage,
    )

    code_executor_kwargs = dict(
        llm_config=False,  # Turn off LLM for this agent.
        code_execution_config={"executor": executor},  # Use the local command line code executor.
        human_input_mode="NEVER",  # Always take human input for this agent for safety.
        # NOTE: no termination message, just triggered by executable code blocks present or not
        # is_termination_msg=terminate_message_func,
        max_consecutive_auto_reply=autogen_max_consecutive_auto_reply,
        # max_turns is max times allowed executed some code, should be autogen_max_turns in general
        max_turns=autogen_max_turns,
        initial_confidence_level=initial_confidence_level,
    )

    from openai_server.autogen_utils import H2OConversableAgent
    code_executor_agent = H2OConversableAgent("code_executor_agent", **code_executor_kwargs)

    # FIXME:
    # Auto-pip install
    # Auto-return file list in each turn

    base_url = os.environ['H2OGPT_OPENAI_BASE_URL']  # must exist
    api_key = os.environ['H2OGPT_OPENAI_API_KEY']  # must exist
    if agent_verbose:
        print("base_url: %s" % base_url)
        print("max_tokens: %s" % max_new_tokens)

    system_message, internal_file_names, system_message_parts = \
        get_full_system_prompt(agent_code_writer_system_message,
                               agent_system_site_packages, system_prompt,
                               base_url,
                               api_key, model, text_context_list, image_file,
                               agent_work_dir, query, autogen_timeout)

    enable_caching = True

    def code_writer_terminate_func(msg):
        # In case code_writer_agent just passed a chatty answer without <FINISHED_ALL_TASKS> mentioned,
        # then code_executor will return empty string as response (since there was no code block to execute).
        # So at this point, we need to terminate the chat otherwise code_writer_agent will keep on chatting.
        return isinstance(msg, dict) and msg.get('content', '') == ''

    code_writer_kwargs = dict(system_message=system_message,
                              llm_config={'timeout': autogen_timeout,
                                          'extra_body': dict(enable_caching=enable_caching,
                                                             client_metadata=client_metadata,
                                                             ),
                                          "config_list": [{"model": model,
                                                           "api_key": api_key,
                                                           "base_url": base_url,
                                                           "stream": stream_output,
                                                           'max_tokens': max_new_tokens,
                                                           'cache_seed': autogen_cache_seed,
                                                           }]
                                          },
                              code_execution_config=False,  # Turn off code execution for this agent.
                              human_input_mode="NEVER",
                              is_termination_msg=code_writer_terminate_func,
                              max_consecutive_auto_reply=autogen_max_consecutive_auto_reply,
                              )

    code_writer_agent = H2OConversableAgent("code_writer_agent", **code_writer_kwargs)

    planning_messages = []
    chat_result_planning = None
    if autogen_use_planning_prompt:
        # setup planning agents
        code_writer_kwargs_planning = code_writer_kwargs.copy()
        # terminate immediately
        # Note: max_turns and initial_confidence_level not relevant except for code execution agent
        code_writer_kwargs_update = dict(max_consecutive_auto_reply=1)
        # is_termination_msg=lambda x: True
        code_writer_kwargs_planning.update(code_writer_kwargs_update)
        code_writer_agent_planning = H2OConversableAgent("code_writer_agent", **code_writer_kwargs_planning)

        chat_kwargs = dict(recipient=code_writer_agent_planning,
                           max_turns=1,
                           message=planning_prompt(query),
                           cache=None,
                           silent=autogen_silent_exchange,
                           clear_history=False,
                           )
        code_executor_kwargs_planning = code_executor_kwargs.copy()
        code_executor_kwargs_planning.update(dict(
            max_turns=2,
            initial_confidence_level=1,
        ))
        code_executor_agent_planning = H2OConversableAgent("code_executor_agent", **code_executor_kwargs_planning)

        chat_result_planning = code_executor_agent_planning.initiate_chat(**chat_kwargs)

        # transfer planning result to main agents
        if hasattr(chat_result_planning, 'chat_history') and chat_result_planning.chat_history:
            planning_messages = chat_result_planning.chat_history
            for message in planning_messages:
                if 'content' in message:
                    message['content'] = message['content'].replace('<FINISHED_ALL_TASKS>', '').replace('ENDOFTURN', '')
                if 'role' in message and message['role'] == 'assistant':
                    # replace prompt
                    message['content'] = planning_final_prompt(query)

    # apply chat history
    if chat_conversation or planning_messages or agent_chat_history:
        chat_messages = []

        # some high-level chat history
        if chat_conversation:
            chat_messages.extend(structure_to_messages(None, None, chat_conversation, None))

        # pre-append planning
        chat_messages.extend(planning_messages)

        # actual internal agent chat history
        if agent_chat_history:
            chat_messages.extend(agent_chat_history)

        # apply
        for message in chat_messages:
            if message['role'] == 'user':
                code_writer_agent.send(message['content'], code_executor_agent, request_reply=False, silent=True)
            if message['role'] == 'assistant':
                code_executor_agent.send(message['content'], code_writer_agent, request_reply=False, silent=True)

    chat_kwargs = dict(recipient=code_writer_agent,
                       max_turns=autogen_max_turns,
                       message=query,
                       cache=None,
                       silent=autogen_silent_exchange,
                       clear_history=False,
                       )
    if autogen_cache_seed:
        from autogen import Cache
        # Use DiskCache as cache
        cache_root_path = "./autogen_cache"
        if not os.path.exists(cache_root_path):
            os.makedirs(cache_root_path, exist_ok=True)
        with Cache.disk(cache_seed=autogen_cache_seed, cache_path_root=cache_root_path) as cache:
            chat_kwargs.update(dict(cache=cache))
            chat_result = code_executor_agent.initiate_chat(**chat_kwargs)
    else:
        chat_result = code_executor_agent.initiate_chat(**chat_kwargs)

    if client_metadata:
        print("END 2AGENT: client_metadata: %s" % client_metadata, flush=True)
    ret_dict = get_ret_dict_and_handle_files(chat_result,
                                             chat_result_planning,
                                             model,
                                             agent_work_dir, agent_verbose, internal_file_names, authorization,
                                             autogen_run_code_in_docker, autogen_stop_docker_executor, executor,
                                             agent_venv_dir, agent_code_writer_system_message,
                                             agent_system_site_packages,
                                             system_message_parts,
                                             autogen_code_restrictions_level, autogen_silent_exchange,
                                             agent_accuracy,
                                             client_metadata=client_metadata)
    if client_metadata:
        print("END FILES FOR 2AGENT: client_metadata: %s" % client_metadata, flush=True)

    return ret_dict