File size: 10,106 Bytes
3943768
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
import inspect
import json
import os
import subprocess
import sys
import argparse
import logging
import typing
import uuid
from multiprocessing import Process
from threading import Thread
from typing import Union

import uvicorn
from fastapi import FastAPI

if os.path.dirname(os.path.abspath(__file__)) not in sys.path:
    sys.path.append(os.path.dirname(os.path.abspath(__file__)))

sys.path.append('openai_server')


def run_server(host: str = '0.0.0.0',
               port: int = 5000,
               ssl_certfile: str = None,
               ssl_keyfile: str = None,
               gradio_prefix: str = None,
               gradio_host: str = None,
               gradio_port: str = None,
               h2ogpt_key: str = None,
               auth: Union[typing.List[typing.Tuple[str, str]], str] = None,
               auth_access: str = 'open',
               guest_name: str = '',
               # https://docs.gunicorn.org/en/stable/design.html#how-many-workers
               workers: int = 1,
               app: Union[str, FastAPI] = None,
               is_openai_server: bool = True,
               is_agent_server: bool = False,
               openai_port: int = None,
               agent_server: bool = False,
               openai_server: bool = False,
               multiple_workers_gunicorn: bool = False,
               main_kwargs: str = "",  # json.dumped dict
               verbose=False,
               ):
    if workers == 0:
        workers = min(16, os.cpu_count() * 2 + 1)
    assert app is not None

    if openai_port is None:
        openai_port = port

    # is_agent_server is racy, so started this in process instead of thread nominally, or use gunicorn
    if is_agent_server:
        name = 'Agent'
        os.environ['is_agent_server'] = '1'
    else:
        name = 'OpenAI' if is_openai_server else 'Function'
        os.environ['is_agent_server'] = '0'

    # Note: These envs are risky for race given thread is launching for all 3 servers
    os.environ['GRADIO_PREFIX'] = gradio_prefix or 'http'
    os.environ['GRADIO_SERVER_HOST'] = gradio_host or 'localhost'
    os.environ['GRADIO_SERVER_PORT'] = gradio_port or '7860'
    if h2ogpt_key == 'None':
        h2ogpt_key = None
    os.environ['GRADIO_H2OGPT_H2OGPT_KEY'] = h2ogpt_key or ''  # don't use H2OGPT_H2OGPT_KEY, mixes things up
    # use h2ogpt_key if no server api key, so OpenAI inherits key by default if any keys set and enforced via API for h2oGPT
    # but OpenAI key cannot be '', so dummy value is EMPTY and if EMPTY we ignore the key in authorization
    server_api_key = os.getenv('H2OGPT_OPENAI_API_KEY', os.environ['GRADIO_H2OGPT_H2OGPT_KEY']) or 'EMPTY'
    os.environ['H2OGPT_OPENAI_API_KEY'] = server_api_key

    os.environ['GRADIO_AUTH'] = str(auth)
    os.environ['GRADIO_AUTH_ACCESS'] = auth_access
    os.environ['GRADIO_GUEST_NAME'] = guest_name

    os.environ['H2OGPT_OPENAI_PORT'] = str(openai_port)  # so can know the port
    os.environ['H2OGPT_OPENAI_HOST'] = str(host)  # so can know the host
    ssl_certfile = os.getenv('H2OGPT_OPENAI_CERT_PATH', ssl_certfile)
    ssl_keyfile = os.getenv('H2OGPT_OPENAI_KEY_PATH', ssl_keyfile)
    prefix = 'https' if ssl_keyfile and ssl_certfile else 'http'
    os.environ['H2OGPT_OPENAI_BASE_URL'] = f'{prefix}://{host}:{openai_port}/v1'

    if verbose:
        print('ENVs')
        print(dict(os.environ))
        print('LOCALS')
        print(locals())
    else:
        print("verbose disabled")

    try:
        from openai_server.log import logger
    except ModuleNotFoundError:
        from log import logger
    logger.info(f'{name} API URL: {prefix}://{host}:{port}')
    logger.info(f'{name} API key: {server_api_key}')

    logging.getLogger("uvicorn.error").propagate = False

    if name == 'Function':
        # to pass args through so app can run gen setup
        os.environ['H2OGPT_MAIN_KWARGS'] = main_kwargs

    if not isinstance(app, str):
        workers = None

    if multiple_workers_gunicorn:
        os.environ['multiple_workers_gunicorn'] = 'True'

        assert isinstance(app, str), "app must be string for gunicorn multi-worker mode."
        print(f"Multi-worker {name} Proxy gunicorn: {workers}")
        # Build gunicorn command
        command = [
            'gunicorn',
            '-w', str(workers),
            '-k', 'uvicorn.workers.UvicornWorker',
            '--timeout', '60',
            '-b', f"{host}:{port}",
        ]
        if ssl_certfile:
            command.extend(['--certfile', ssl_certfile])
        if ssl_keyfile:
            command.extend(['--keyfile', ssl_keyfile])
        command.append('openai_server.' + app)  # This should be a string like 'server:app'

        file_path = os.getenv('H2OGPT_OPENAI_LOG_PATH', 'openai_logs')
        if not os.path.exists(file_path):
            try:
                os.makedirs(file_path, exist_ok=True)
            except FileExistsError:
                # for races among workers
                pass
        file_prefix = "gunicorn" + '_' + name + '_' + str(uuid.uuid4()) + '_'
        file_stdout = os.path.join(file_path, file_prefix + 'stdout.log')
        file_stderr = os.path.join(file_path, file_prefix + 'stderr.log')
        f_stdout = open(file_stdout, 'wt')
        f_stderr = open(file_stderr, 'wt')
        process = subprocess.Popen(command, stdout=f_stdout, stderr=f_stderr)
        wait = False
        if wait:
            process.communicate()
    else:
        uvicorn.run(app, host=host, port=port, ssl_certfile=ssl_certfile, ssl_keyfile=ssl_keyfile,
                    workers=workers,
                    )


def run(wait=True, **kwargs):
    assert 'is_openai_server' in kwargs
    if kwargs.get('is_agent_server', False):
        name = 'Agent'
        # if openai server, then launch this as process instead of thread to avoid races with env vars
        as_thread = not kwargs.get('openai_server', False)
    elif kwargs['is_openai_server']:
        name = 'OpenAI'
        # if agent server, then launch this as process instead of thread to avoid races with env vars
        as_thread = not kwargs.get('agent_server', False)
    else:
        name = 'Function'
        # still launch function server as thread since no race for any envs
        as_thread = True

    # has to stay as thread to avoid forking thread issues for gradio
    # just deal with race via sleep
    as_thread = True

    if kwargs.get('verbose', False):
        print(kwargs)

    if kwargs['workers'] > 1 or kwargs['workers'] == 0:
        if not kwargs['multiple_workers_gunicorn']:
            # popen now, so launch uvicorn with string app
            if kwargs.get('verbose', False):
                print(f"Multi-worker {name} Proxy uvicorn: {kwargs['workers']}")
            # avoid CUDA forking
            command = ['python', 'openai_server/server_start.py']
            # Convert the kwargs to command line arguments
            for key, value in kwargs.items():
                command.append(f'--{key}')  # Assume keys are formatted as expected for the script
                command.append(str(value))  # Convert all values to strings to be safe

            file_prefix = "popen" + '_' + name + '_' + str(uuid.uuid4()) + '_'
            file_stdout = file_prefix + 'stdout.log'
            file_stderr = file_prefix + 'stderr.log'
            f_stdout = open(file_stdout, 'wt')
            f_stderr = open(file_stderr, 'wt')
            process = subprocess.Popen(command, stdout=f_stdout, stderr=f_stderr)
            if wait:
                process.communicate()
        else:
            # will launch gunicorn in popen inside run_server
            run_server(**kwargs)
    elif wait:
        kwargs['multiple_workers_gunicorn'] = False  # force uvicorn since not using multiple workers
        # launch uvicorn in this thread/process
        if kwargs.get('verbose', False):
            print(f"Single-worker {name} Proxy uvicorn in this thread: {kwargs['workers']}")
        run_server(**kwargs)
    else:
        kwargs['multiple_workers_gunicorn'] = False  # force uvicorn since not using multiple workers
        # launch uvicorn in this process in new thread
        if as_thread:
            if kwargs.get('verbose', False):
                print(f"Single-worker {name} Proxy uvicorn in new thread: {kwargs['workers']}")
            Thread(target=run_server, kwargs=kwargs, daemon=True).start()
        else:
            if kwargs.get('verbose', False):
                print(f"Single-worker {name} Proxy uvicorn in new process: {kwargs['workers']}")
            Process(target=run_server, kwargs=kwargs).start()


def argv_to_kwargs(argv=None):
    parser = argparse.ArgumentParser(description='Convert command line arguments to kwargs.')

    # Inspect the run_server function to get its arguments and defaults
    sig = inspect.signature(run_server)
    for name, param in sig.parameters.items():
        # Determine if the parameter has a default value
        if param.default == inspect.Parameter.empty:
            # Parameter without a default value (treat it as required positional argument)
            parser.add_argument(f'--{name}')
        else:
            # Parameter with a default value (treat it as optional argument)
            if type(param.default) is int:  # Check if the default value is an integer
                parser.add_argument(f'--{name}', type=int, default=param.default)
            elif type(param.default) is bool:  # Add support for boolean values
                parser.add_argument(f'--{name}', type=lambda x: (str(x).lower() in ['true', '1', 'yes']),
                                    default=param.default)
            else:  # Treat as string by default
                parser.add_argument(f'--{name}', type=str, default=param.default if param.default is not None else '')

    # Parse the command line arguments
    args = parser.parse_args(argv[1:] if argv else None)

    # Convert parsed arguments to a dictionary
    kwargs = vars(args)
    return kwargs


if __name__ == '__main__':
    kwargs = argv_to_kwargs(sys.argv)
    run_server(**kwargs)