import ast import copy import functools import json import os import tempfile import time import traceback import uuid import filelock from enums import LangChainMode, LangChainAction, no_model_str, LangChainTypes, langchain_modes_intrinsic, \ DocumentSubset, unknown_prompt_type, my_db_state0, selection_docs_state0, requests_state0, roles_state0, noneset, \ images_num_max_dict, image_batch_image_prompt0, image_batch_final_prompt0, images_limit_max_new_tokens, \ images_limit_max_new_tokens_list from model_utils import model_lock_to_state from tts_utils import combine_audios from utils import _save_generate_tokens, clear_torch_cache, remove, save_generate_output, str_to_list, \ get_accordion_named, check_input_type, download_image, deepcopy_by_pickle_object from db_utils import length_db1 from evaluate_params import input_args_list, eval_func_param_names, key_overrides, in_model_state_and_evaluate from vision.utils_vision import process_file_list def evaluate_nochat(*args1, default_kwargs1=None, str_api=False, plain_api=False, verifier=False, kwargs={}, my_db_state1=None, selection_docs_state1=None, requests_state1=None, roles_state1=None, model_states=[], **kwargs1): is_public = kwargs1.get('is_public', False) verbose = kwargs1.get('verbose', False) if my_db_state1 is None: if 'my_db_state0' in kwargs1 and kwargs1['my_db_state0'] is not None: my_db_state1 = kwargs1['my_db_state0'] else: my_db_state1 = copy.deepcopy(my_db_state0) if selection_docs_state1 is None: if 'selection_docs_state0' in kwargs1 and kwargs1['selection_docs_state0'] is not None: selection_docs_state1 = kwargs1['selection_docs_state0'] else: selection_docs_state1 = copy.deepcopy(selection_docs_state0) if requests_state1 is None: if 'requests_state0' in kwargs1 and kwargs1['requests_state0'] is not None: requests_state1 = kwargs1['requests_state0'] else: requests_state1 = copy.deepcopy(requests_state0) if roles_state1 is None: if 'roles_state0' in kwargs1 and kwargs1['roles_state0'] is not None: roles_state1 = kwargs1['roles_state0'] else: roles_state1 = copy.deepcopy(roles_state0) kwargs_eval_pop_keys = ['selection_docs_state0', 'requests_state0', 'roles_state0'] for k in kwargs_eval_pop_keys: if k in kwargs1: kwargs1.pop(k) ########################################### # fill args_list with states args_list = list(args1) if str_api: if plain_api: if not verifier: # i.e. not fresh model, tells evaluate to use model_state0 args_list.insert(0, kwargs['model_state_none'].copy()) else: args_list.insert(0, kwargs['verifier_model_state0'].copy()) args_list.insert(1, my_db_state1.copy()) args_list.insert(2, selection_docs_state1.copy()) args_list.insert(3, requests_state1.copy()) args_list.insert(4, roles_state1.copy()) user_kwargs = args_list[len(input_args_list)] assert isinstance(user_kwargs, str) user_kwargs = ast.literal_eval(user_kwargs) else: assert not plain_api user_kwargs = {k: v for k, v in zip(eval_func_param_names, args_list[len(input_args_list):])} ########################################### # control kwargs1 for evaluate if 'answer_with_sources' not in user_kwargs: kwargs1['answer_with_sources'] = -1 # just text chunk, not URL etc. if 'sources_show_text_in_accordion' not in user_kwargs: kwargs1['sources_show_text_in_accordion'] = False if 'append_sources_to_chat' not in user_kwargs: kwargs1['append_sources_to_chat'] = False if 'append_sources_to_answer' not in user_kwargs: kwargs1['append_sources_to_answer'] = False if 'show_link_in_sources' not in user_kwargs: kwargs1['show_link_in_sources'] = False # kwargs1['top_k_docs_max_show'] = 30 ########################################### # modify some user_kwargs # only used for submit_nochat_api user_kwargs['chat'] = False if 'stream_output' not in user_kwargs: user_kwargs['stream_output'] = False if plain_api: user_kwargs['stream_output'] = False if 'langchain_mode' not in user_kwargs: # if user doesn't specify, then assume disabled, not use default if LangChainMode.LLM.value in kwargs['langchain_modes']: user_kwargs['langchain_mode'] = LangChainMode.LLM.value elif len(kwargs['langchain_modes']) >= 1: user_kwargs['langchain_mode'] = kwargs['langchain_modes'][0] else: # disabled should always be allowed user_kwargs['langchain_mode'] = LangChainMode.DISABLED.value if 'langchain_action' not in user_kwargs: user_kwargs['langchain_action'] = LangChainAction.QUERY.value if 'langchain_agents' not in user_kwargs: user_kwargs['langchain_agents'] = [] # be flexible if 'instruction' in user_kwargs and 'instruction_nochat' not in user_kwargs: user_kwargs['instruction_nochat'] = user_kwargs['instruction'] if 'iinput' in user_kwargs and 'iinput_nochat' not in user_kwargs: user_kwargs['iinput_nochat'] = user_kwargs['iinput'] if 'visible_models' not in user_kwargs: if kwargs['visible_models']: if isinstance(kwargs['visible_models'], int): user_kwargs['visible_models'] = [kwargs['visible_models']] elif isinstance(kwargs['visible_models'], list): # only take first one user_kwargs['visible_models'] = [kwargs['visible_models'][0]] else: user_kwargs['visible_models'] = [0] else: # if no user version or default version, then just take first user_kwargs['visible_models'] = [0] if 'visible_vision_models' not in user_kwargs or user_kwargs['visible_vision_models'] is None: # don't assume None, which will trigger default_kwargs # the None case is never really directly useful user_kwargs['visible_vision_models'] = 'auto' if 'h2ogpt_key' not in user_kwargs: user_kwargs['h2ogpt_key'] = None if 'system_prompt' in user_kwargs and user_kwargs['system_prompt'] is None: # avoid worrying about below default_kwargs -> args_list that checks if None user_kwargs['system_prompt'] = 'None' # by default don't do TTS unless specifically requested if 'chatbot_role' not in user_kwargs: user_kwargs['chatbot_role'] = 'None' if 'speaker' not in user_kwargs: user_kwargs['speaker'] = 'None' set1 = set(list(default_kwargs1.keys())) set2 = set(eval_func_param_names) assert set1 == set2, "Set diff: %s %s: %s" % (set1, set2, set1.symmetric_difference(set2)) ########################################### # correct ordering. Note some things may not be in default_kwargs, so can't be default of user_kwargs.get() model_state1 = args_list[0] my_db_state1 = args_list[1] selection_docs_state1 = args_list[2] requests_state1 = args_list[3] roles_state1 = args_list[4] args_list = [user_kwargs[k] if k in user_kwargs and user_kwargs[k] is not None else default_kwargs1[k] for k in eval_func_param_names] assert len(args_list) == len(eval_func_param_names) ########################################### # select model model_lock_client = args_list[eval_func_param_names.index('model_lock')] if model_lock_client: # because cache, if has local model state, then stays in memory # kwargs should be fixed and unchanging, and user should be careful if mutating model_lock_client model_state1 = model_lock_to_state(model_lock_client, cache_model_state=True, **kwargs) elif len(model_states) >= 1: visible_models1 = args_list[eval_func_param_names.index('visible_models')] model_active_choice1 = visible_models_to_model_choice(visible_models1, model_states, api=True) model_state1 = model_states[model_active_choice1 % len(model_states)] for key in key_overrides: if user_kwargs.get(key) is None and model_state1.get(key) is not None: args_list[eval_func_param_names.index(key)] = model_state1[key] if isinstance(model_state1, dict) and \ 'tokenizer' in model_state1 and \ hasattr(model_state1['tokenizer'], 'model_max_length'): # ensure listen to limit, with some buffer # buffer = 50 buffer = 0 args_list[eval_func_param_names.index('max_new_tokens')] = min( args_list[eval_func_param_names.index('max_new_tokens')], model_state1['tokenizer'].model_max_length - buffer) ########################################### # override overall visible_models and h2ogpt_key if have model_specific one # NOTE: only applicable if len(model_states) > 1 at moment # else controlled by evaluate() if 'visible_models' in model_state1 and model_state1['visible_models'] is not None: assert isinstance(model_state1['visible_models'], (int, str, list, tuple)) which_model = visible_models_to_model_choice(model_state1['visible_models'], model_states) args_list[eval_func_param_names.index('visible_models')] = which_model if 'visible_vision_models' in model_state1 and model_state1['visible_vision_models'] is not None: assert isinstance(model_state1['visible_vision_models'], (int, str, list, tuple)) which_model = visible_models_to_model_choice(model_state1['visible_vision_models'], model_states) args_list[eval_func_param_names.index('visible_vision_models')] = which_model if 'h2ogpt_key' in model_state1 and model_state1['h2ogpt_key'] is not None: # remote server key if present # i.e. may be '' and used to override overall local key assert isinstance(model_state1['h2ogpt_key'], str) args_list[eval_func_param_names.index('h2ogpt_key')] = model_state1['h2ogpt_key'] ########################################### # final full bot() like input for prep_bot etc. instruction_nochat1 = args_list[eval_func_param_names.index('instruction_nochat')] or \ args_list[eval_func_param_names.index('instruction')] args_list[eval_func_param_names.index('instruction_nochat')] = \ args_list[eval_func_param_names.index('instruction')] = \ instruction_nochat1 history = [[instruction_nochat1, None]] # NOTE: Set requests_state1 to None, so don't allow UI-like access, in case modify state via API requests_state1_bot = None args_list_bot = args_list + [model_state1, my_db_state1, selection_docs_state1, requests_state1_bot, roles_state1] + [history] # at this point like bot() as input history, fun1, langchain_mode1, db1, requests_state1, \ valid_key, h2ogpt_key1, \ max_time1, stream_output1, \ chatbot_role1, speaker1, tts_language1, roles_state1, tts_speed1, langchain_action1, \ image_files_to_delete = \ prep_bot(*args_list_bot, kwargs_eval=kwargs1, plain_api=plain_api, kwargs=kwargs, verbose=verbose) save_dict = dict() ret = {'error': "No response", 'sources': [], 'sources_str': '', 'prompt_raw': instruction_nochat1, 'llm_answers': []} ret_old = '' history_str_old = '' error_old = '' audios = [] # in case not streaming, since audio is always streaming, need to accumulate for when yield last_yield = None res_dict = {} try: tgen0 = time.time() for res in get_response(fun1, history, chatbot_role1, speaker1, tts_language1, roles_state1, tts_speed1, langchain_action1, langchain_mode1, kwargs=kwargs, api=True, verbose=verbose): history, error, sources, sources_str, prompt_raw, llm_answers, save_dict, audio1 = res res_dict = {} res_dict['response'] = history[-1][1] or '' res_dict['error'] = error res_dict['sources'] = sources res_dict['sources_str'] = sources_str res_dict['prompt_raw'] = prompt_raw res_dict['llm_answers'] = llm_answers res_dict['save_dict'] = save_dict res_dict['audio'] = audio1 error = res_dict.get('error', '') sources = res_dict.get('sources', []) save_dict = res_dict.get('save_dict', {}) # update save_dict save_dict['error'] = error save_dict['sources'] = sources save_dict['valid_key'] = valid_key save_dict['h2ogpt_key'] = h2ogpt_key1 # below works for both list and string for any reasonable string of image that's been byte encoded with b' to start or as file name image_file_check = args_list[eval_func_param_names.index('image_file')] save_dict['image_file_present'] = len(image_file_check) if \ isinstance(image_file_check, (str, list, tuple)) else 0 text_context_list_check = args_list[eval_func_param_names.index('text_context_list')] save_dict['text_context_list_present'] = len(text_context_list_check) if \ isinstance(text_context_list_check, (list, tuple)) else 0 if str_api and plain_api: save_dict['which_api'] = 'str_plain_api' elif str_api: save_dict['which_api'] = 'str_api' elif plain_api: save_dict['which_api'] = 'plain_api' else: save_dict['which_api'] = 'nochat_api' if 'extra_dict' not in save_dict: save_dict['extra_dict'] = {} if requests_state1: save_dict['extra_dict'].update(requests_state1) else: save_dict['extra_dict'].update(dict(username='NO_REQUEST')) if is_public: # don't want to share actual endpoints if 'save_dict' in res_dict and isinstance(res_dict['save_dict'], dict): res_dict['save_dict'].pop('inference_server', None) if 'extra_dict' in res_dict['save_dict'] and isinstance(res_dict['save_dict']['extra_dict'], dict): res_dict['save_dict']['extra_dict'].pop('inference_server', None) # get response if str_api: # full return of dict, except constant items that can be read-off at end res_dict_yield = res_dict.copy() # do not stream: ['save_dict', 'prompt_raw', 'sources', 'sources_str', 'response_no_refs'] only_stream = ['response', 'llm_answers', 'audio'] for key in res_dict: if key not in only_stream: if isinstance(res_dict[key], str): res_dict_yield[key] = '' elif isinstance(res_dict[key], list): res_dict_yield[key] = [] elif isinstance(res_dict[key], dict): res_dict_yield[key] = {} else: print("Unhandled pop: %s" % key) res_dict_yield.pop(key) ret = res_dict_yield elif kwargs['langchain_mode'] == 'Disabled': ret = fix_text_for_gradio(res_dict['response'], fix_latex_dollars=False, fix_angle_brackets=False) else: ret = '
' + fix_text_for_gradio(res_dict['response'], fix_latex_dollars=False, fix_angle_brackets=False) do_yield = False could_yield = ret != ret_old if kwargs['gradio_api_use_same_stream_limits']: history_str = str(ret['response'] if isinstance(ret, dict) else str(ret)) delta_history = abs(len(history_str) - len(str(history_str_old))) # even if enough data, don't yield if has been less than min_seconds enough_data = delta_history > kwargs['gradio_ui_stream_chunk_size'] or (error != error_old) beyond_min_time = last_yield is None or \ last_yield is not None and \ (time.time() - last_yield) > kwargs['gradio_ui_stream_chunk_min_seconds'] do_yield |= enough_data and beyond_min_time # yield even if new data not enough if been long enough and have at least something to yield enough_time = last_yield is None or \ last_yield is not None and \ (time.time() - last_yield) > kwargs['gradio_ui_stream_chunk_seconds'] do_yield |= enough_time and could_yield # DEBUG: print("do_yield: %s : %s %s %s" % (do_yield, enough_data, beyond_min_time, enough_time), flush=True) else: do_yield = could_yield if stream_output1 and do_yield: last_yield = time.time() # yield as it goes, else need to wait since predict only returns first yield if isinstance(ret, dict): ret_old = ret.copy() # copy normal one first from tts_utils import combine_audios ret['audio'] = combine_audios(audios, audio=audio1, sr=24000 if chatbot_role1 else 16000, expect_bytes=kwargs['return_as_byte'], verbose=verbose) audios = [] # reset accumulation yield ret else: ret_old = ret yield ret # just last response, not actually full history like bot() and all_bot() but that's all that changes # we can ignore other dict entries as consequence of changes to main stream in 100% of current cases # even if sources added last after full response done, final yield still yields left over history_str_old = str(ret_old['response'] if isinstance(ret_old, dict) else str(ret_old)) else: # collect unstreamed audios audios.append(res_dict['audio']) if time.time() - tgen0 > max_time1 + 10: # don't use actual, so inner has chance to complete msg = "Took too long evaluate_nochat: %s" % (time.time() - tgen0) if str_api: res_dict['save_dict']['extra_dict']['timeout'] = time.time() - tgen0 res_dict['save_dict']['error'] = msg if verbose: print(msg, flush=True) break # yield if anything left over as can happen # return back last ret if str_api: res_dict['save_dict']['extra_dict'] = _save_generate_tokens(res_dict.get('response', ''), res_dict.get('save_dict', {}).get( 'extra_dict', {})) ret = res_dict.copy() if isinstance(ret, dict): from tts_utils import combine_audios ret['audio'] = combine_audios(audios, audio=None, expect_bytes=kwargs['return_as_byte']) yield ret except Exception as e: ex = traceback.format_exc() if verbose: print("Error in evaluate_nochat: %s" % ex, flush=True) if str_api: ret = {'error': str(e), 'error_ex': str(ex), 'sources': [], 'sources_str': '', 'prompt_raw': '', 'llm_answers': []} yield ret raise finally: clear_torch_cache(allow_skip=True) db1s = my_db_state1 clear_embeddings(user_kwargs['langchain_mode'], kwargs['db_type'], db1s, kwargs['dbs']) for image_file1 in image_files_to_delete: if image_file1 and os.path.isfile(image_file1): remove(image_file1) save_dict['save_dir'] = kwargs['save_dir'] save_generate_output(**save_dict) def visible_models_to_model_choice(visible_models1, model_states1, api=False): if isinstance(visible_models1, list): assert len( visible_models1) >= 1, "Invalid visible_models1=%s, can only be single entry" % visible_models1 # just take first model_active_choice1 = visible_models1[0] elif isinstance(visible_models1, (str, int)): model_active_choice1 = visible_models1 else: assert isinstance(visible_models1, type(None)), "Invalid visible_models1=%s" % visible_models1 model_active_choice1 = visible_models1 if model_active_choice1 is not None: if isinstance(model_active_choice1, str): display_model_list = [x['display_name'] for x in model_states1] if model_active_choice1 in display_model_list: model_active_choice1 = display_model_list.index(model_active_choice1) else: # NOTE: Could raise, but sometimes raising in certain places fails too hard and requires UI restart if api: raise ValueError( "Invalid model %s, valid models are: %s" % (model_active_choice1, display_model_list)) model_active_choice1 = 0 else: model_active_choice1 = 0 return model_active_choice1 def clear_embeddings(langchain_mode1, db_type, db1s, dbs=None): # clear any use of embedding that sits on GPU, else keeps accumulating GPU usage even if clear torch cache if db_type in ['chroma', 'chroma_old'] and langchain_mode1 not in ['LLM', 'Disabled', None, '']: from gpt_langchain import clear_embedding, length_db1 if dbs is not None: db = dbs.get(langchain_mode1) if db is not None and not isinstance(db, str): clear_embedding(db) if db1s is not None and langchain_mode1 in db1s: db1 = db1s[langchain_mode1] if len(db1) == length_db1(): clear_embedding(db1[0]) def fix_text_for_gradio(text, fix_new_lines=False, fix_latex_dollars=True, fix_angle_brackets=True): if isinstance(text, tuple): # images, audio, etc. return text if not isinstance(text, str): # e.g. list for extraction text = str(text) if fix_latex_dollars: ts = text.split('```') for parti, part in enumerate(ts): inside = parti % 2 == 1 if not inside: ts[parti] = ts[parti].replace('$', '﹩') text = '```'.join(ts) if fix_new_lines: # let Gradio handle code, since got improved recently ## FIXME: below conflicts with Gradio, but need to see if can handle multiple \n\n\n etc. properly as is. # ensure good visually, else markdown ignores multiple \n # handle code blocks ts = text.split('```') for parti, part in enumerate(ts): inside = parti % 2 == 1 if not inside: ts[parti] = ts[parti].replace('\n', '
') text = '```'.join(ts) if fix_angle_brackets: # handle code blocks ts = text.split('```') for parti, part in enumerate(ts): inside = parti % 2 == 1 if not inside: if '' not in ts[parti] and \ '
' not in ts[parti]: # try to avoid html best one can ts[parti] = ts[parti].replace('<', '\<').replace('>', '\>') text = '```'.join(ts) return text def get_images_num_max(model_choice, fun_args, visible_vision_models, do_batching, cli_images_num_max): images_num_max1 = None if cli_images_num_max is not None: images_num_max1 = cli_images_num_max if model_choice['images_num_max'] is not None: images_num_max1 = model_choice['images_num_max'] images_num_max_api = fun_args[len(input_args_list) + eval_func_param_names.index('images_num_max')] if images_num_max_api is not None: images_num_max1 = images_num_max_api if isinstance(images_num_max1, float): images_num_max1 = int(images_num_max1) if model_choice['images_num_max'] is not None: images_num_max1 = model_choice['images_num_max'] if images_num_max1 is None: images_num_max1 = images_num_max_dict.get(visible_vision_models) if images_num_max1 == -1: # treat as if didn't set, but we will just change behavior do_batching = True images_num_max1 = None elif images_num_max1 is not None and images_num_max1 < -1: # super expert control over auto-batching do_batching = True images_num_max1 = -images_num_max1 - 1 # may be None now, set from model-specific model_lock or dict as final choice if images_num_max1 is None or images_num_max1 <= -1: images_num_max1 = model_choice.get('images_num_max', images_num_max1) if images_num_max1 is None or images_num_max1 <= -1: # in case not coming from api if model_choice.get('is_actually_vision_model'): images_num_max1 = images_num_max_dict.get(visible_vision_models, 1) if images_num_max1 == -1: # mean never set actual value, revert to 1 images_num_max1 = 1 else: images_num_max1 = images_num_max_dict.get(visible_vision_models, 0) if images_num_max1 == -1: # mean never set actual value, revert to 0 images_num_max1 = 0 if images_num_max1 < -1: images_num_max1 = -images_num_max1 - 1 do_batching = True assert images_num_max1 != -1, "Should not be -1 here" if images_num_max1 is None: # no target, so just default of no vision images_num_max1 = 0 return images_num_max1, do_batching def get_response(fun1, history, chatbot_role1, speaker1, tts_language1, roles_state1, tts_speed1, langchain_action1, langchain_mode1, kwargs={}, api=False, verbose=False): if fun1 is None: yield from _get_response(fun1, history, chatbot_role1, speaker1, tts_language1, roles_state1, tts_speed1, langchain_action1, kwargs=kwargs, api=api, verbose=verbose) return image_files = fun1.args[len(input_args_list) + eval_func_param_names.index('image_file')] if image_files is None: image_files = [] else: image_files = image_files.copy() import pyexiv2 meta_data_images = [] for image_files1 in image_files: try: with pyexiv2.Image(image_files1) as img: metadata = img.read_exif() except RuntimeError as e: if 'unknown image type' in str(e): metadata = {} else: raise if metadata is None: metadata = {} meta_data_images.append(metadata) fun1_args_list = list(fun1.args) chosen_model_state = fun1.args[input_args_list.index('model_state')] base_model = chosen_model_state.get('base_model') display_name = chosen_model_state.get('display_name') visible_vision_models = '' if kwargs['visible_vision_models']: # if in UI, 'auto' is default, but CLI has another default, so use that if set visible_vision_models = kwargs['visible_vision_models'] if chosen_model_state['is_actually_vision_model']: visible_vision_models = chosen_model_state['display_name'] # by here these are just single names, not integers or list # args_list is not just from API, but also uses default_kwargs from CLI if not None but user_args is None or '' visible_vision_models1 = fun1_args_list[len(input_args_list) + eval_func_param_names.index('visible_vision_models')] if visible_vision_models1: if isinstance(visible_vision_models1, list): visible_vision_models1 = visible_vision_models1[0] if visible_vision_models1 != 'auto' and visible_vision_models1 in kwargs['all_possible_vision_display_names']: # e.g. CLI might have had InternVL but model lock only Haiku, filter that out here visible_vision_models = visible_vision_models1 if not visible_vision_models: visible_vision_models = '' if isinstance(visible_vision_models, list): visible_vision_models = visible_vision_models[0] force_batching = False images_num_max, force_batching = get_images_num_max(chosen_model_state, fun1.args, visible_vision_models, force_batching, kwargs['images_num_max']) do_batching = force_batching or len(image_files) > images_num_max or \ visible_vision_models != display_name and \ display_name not in kwargs['all_possible_vision_display_names'] do_batching &= visible_vision_models != '' do_batching &= len(image_files) > 0 # choose batching model if do_batching and visible_vision_models: model_states1 = kwargs['model_states'] model_batch_choice1 = visible_models_to_model_choice(visible_vision_models, model_states1, api=api) model_batch_choice = model_states1[model_batch_choice1 % len(model_states1)] images_num_max_batch, do_batching = get_images_num_max(model_batch_choice, fun1.args, visible_vision_models, do_batching, kwargs['images_num_max']) else: model_batch_choice = None images_num_max_batch = images_num_max batch_display_name = model_batch_choice.get('display_name') if model_batch_choice is not None else display_name do_batching &= images_num_max_batch not in [0, None] # not 0 or None, maybe some unknown model, don't do batching if not do_batching: yield from _get_response(fun1, history, chatbot_role1, speaker1, tts_language1, roles_state1, tts_speed1, langchain_action1, kwargs=kwargs, api=api, verbose=verbose) return else: instruction = fun1_args_list[len(input_args_list) + eval_func_param_names.index('instruction')] instruction_nochat = fun1_args_list[len(input_args_list) + eval_func_param_names.index('instruction_nochat')] instruction = instruction or instruction_nochat or "" prompt_summary = fun1_args_list[len(input_args_list) + eval_func_param_names.index('prompt_summary')] if prompt_summary is None: prompt_summary = kwargs['prompt_summary'] or '' image_batch_image_prompt = fun1_args_list[len(input_args_list) + eval_func_param_names.index( 'image_batch_image_prompt')] or kwargs['image_batch_image_prompt'] or image_batch_image_prompt0 image_batch_final_prompt = fun1_args_list[len(input_args_list) + eval_func_param_names.index( 'image_batch_final_prompt')] or kwargs['image_batch_final_prompt'] or image_batch_final_prompt0 # inject system prompt late, since if early then might not listen to it and generally high priority instructions system_prompt = fun1_args_list[len(input_args_list) + eval_func_param_names.index('system_prompt')] if system_prompt not in [None, 'None', 'auto']: system_prompt_xml = f"""\n\n{system_prompt}\n\n""" if system_prompt else '' else: system_prompt_xml = '' if langchain_action1 == LangChainAction.QUERY.value: instruction_batch = image_batch_image_prompt + system_prompt_xml + instruction instruction_final = image_batch_final_prompt + system_prompt_xml + instruction prompt_summary_batch = prompt_summary prompt_summary_final = prompt_summary elif langchain_action1 == LangChainAction.SUMMARIZE_MAP.value: instruction_batch = instruction instruction_final = instruction prompt_summary_batch = image_batch_image_prompt + system_prompt_xml + prompt_summary prompt_summary_final = image_batch_final_prompt + system_prompt_xml + prompt_summary else: instruction_batch = instruction instruction_final = instruction prompt_summary_batch = prompt_summary prompt_summary_final = prompt_summary batch_output_tokens = 0 batch_time = 0 batch_input_tokens = 0 batch_tokenspersec = 0 batch_results = [] text_context_list = fun1_args_list[len(input_args_list) + eval_func_param_names.index('text_context_list')] text_context_list = str_to_list(text_context_list) text_context_list_copy = copy.deepcopy(text_context_list) # copy before mutating it fun1_args_list_copy = fun1_args_list.copy() # sync all args with model for k, v in model_batch_choice.items(): if k in eval_func_param_names and k in in_model_state_and_evaluate and v is not None: fun1_args_list_copy[len(input_args_list) + eval_func_param_names.index(k)] = v for batch in range(0, len(image_files), images_num_max_batch): fun1_args_list2 = fun1_args_list_copy.copy() # then handle images in batches images_batch = image_files[batch:batch + images_num_max_batch] fun1_args_list2[len(input_args_list) + eval_func_param_names.index('image_file')] = images_batch # disable batching if gradio to gradio, back to auto based upon batch size we sent # Can't pass None, default_kwargs will override, so pass actual value instead fun1_args_list2[len(input_args_list) + eval_func_param_names.index('images_num_max')] = len(images_batch) batch_size = len(fun1_args_list2[len(input_args_list) + eval_func_param_names.index('image_file')]) fun1_args_list2[len(input_args_list) + eval_func_param_names.index('instruction')] = instruction_batch fun1_args_list2[len(input_args_list) + eval_func_param_names.index('prompt_summary')] = prompt_summary_batch # unlikely extended image description possible or required if batch_display_name in images_limit_max_new_tokens_list: max_new_tokens = fun1_args_list2[len(input_args_list) + eval_func_param_names.index('max_new_tokens')] fun1_args_list2[len(input_args_list) + eval_func_param_names.index('max_new_tokens')] = min( images_limit_max_new_tokens, max_new_tokens) # don't include context list, just do image only fun1_args_list2[len(input_args_list) + eval_func_param_names.index('text_context_list')] = [] # intermediate vision results for batching nominally should be normal, let final model do json or others fun1_args_list2[len(input_args_list) + eval_func_param_names.index('response_format')] = 'text' fun1_args_list2[len(input_args_list) + eval_func_param_names.index('guided_json')] = None fun1_args_list2[len(input_args_list) + eval_func_param_names.index('guided_regex')] = None fun1_args_list2[len(input_args_list) + eval_func_param_names.index('guided_grammar')] = None fun1_args_list2[len(input_args_list) + eval_func_param_names.index('guided_choice')] = None # no docs from DB, just image. Don't switch langchain_mode. fun1_args_list2[ len(input_args_list) + eval_func_param_names.index('document_subset')] = [] fun1_args_list2[len(input_args_list) + eval_func_param_names.index('text_context_list')] = [] # don't cause batching inside fun1_args_list2[ len(input_args_list) + eval_func_param_names.index('visible_vision_models')] = visible_vision_models if model_batch_choice: # override for batch model fun1_args_list2[0] = model_batch_choice fun1_args_list2[ len(input_args_list) + eval_func_param_names.index('visible_models')] = visible_vision_models history1 = deepcopy_by_pickle_object(history) # FIXME: is this ok? What if byte images? if not history1: history1 = [['', '']] history1[-1][0] = instruction_batch fun1_args_list2[len(input_args_list) + eval_func_param_names.index('chat_conversation')] = history1 # but don't change what user sees for instruction history1 = deepcopy_by_pickle_object(history) history2 = deepcopy_by_pickle_object(history) fun2 = functools.partial(fun1.func, *tuple(fun1_args_list2), **fun1.keywords) text = '' prompt_raw_saved = '' save_dict1_saved = {} error_saved = '' history_saved = [] sources_saved = [] sources_str_saved = '' llm_answers_saved = {} image_batch_stream = fun1_args_list2[ len(input_args_list) + eval_func_param_names.index('image_batch_stream')] if image_batch_stream is None: image_batch_stream = kwargs['image_batch_stream'] if not image_batch_stream and not api: if not history2: history2 = [['', '']] if len(image_files) > images_num_max_batch: history2[-1][1] = '%s querying image %s/%s' % ( visible_vision_models, 1 + batch, 1 + len(image_files)) else: history2[-1][1] = '%s querying image(s)' % visible_vision_models audio3 = b'' # don't yield audio if not streaming batches yield history2, '', [], '', '', [], {}, audio3 t0_batch = time.time() for response in _get_response(fun2, history1, chatbot_role1, speaker1, tts_language1, roles_state1, tts_speed1, langchain_action1, kwargs=kwargs, api=api, verbose=verbose): if image_batch_stream: yield response history1, error1, sources1, sources_str1, prompt_raw1, llm_answers1, save_dict1, audio2 = response prompt_raw_saved = prompt_raw1 save_dict1_saved = save_dict1 error_saved = error1 history_saved = history1 sources_saved = sources1 sources_str_saved = sources_str1 llm_answers_saved = llm_answers1 text = history1[-1][1] or '' if history1 else '' batch_input_tokens += save_dict1_saved['extra_dict'].get('num_prompt_tokens', 0) save_dict1_saved['extra_dict'] = _save_generate_tokens(text, save_dict1_saved['extra_dict']) ntokens1 = save_dict1_saved['extra_dict'].get('ntokens', 0) batch_output_tokens += ntokens1 batch_time += (time.time() - t0_batch) tokens_per_sec1 = save_dict1_saved['extra_dict'].get('tokens_persecond', 0) batch_tokenspersec += tokens_per_sec1 meta_data = '' for meta_data_image in meta_data_images[batch:batch + images_num_max_batch]: if not meta_data_image: continue meta_data += '\n'.join( [f"""<{key}><{value}\n""" for key, value in meta_data_image.items()]).strip() + '\n' response_final = f'\n\nImage {batch}\n\n{meta_data}\n\n{text}\n\n' batch_results.append(dict(image_ids=list(range(batch, batch + images_num_max_batch)), response=text, response_final=response_final, prompt_raw=prompt_raw_saved, save_dict=save_dict1_saved, error=error_saved, history=history_saved, sources=sources_saved, sources_str=sources_str_saved, llm_answers=llm_answers_saved, )) # last response with no images responses = [x['response_final'] for x in batch_results] batch_tokens_persecond = batch_output_tokens / batch_time if batch_time > 0 else 0 history1 = deepcopy_by_pickle_object(history) # FIXME: is this ok? What if byte images? fun1_args_list2 = fun1_args_list.copy() # sync all args with model for k, v in chosen_model_state.items(): if k in eval_func_param_names and k in in_model_state_and_evaluate and v is not None: fun1_args_list2[len(input_args_list) + eval_func_param_names.index(k)] = v fun1_args_list2[len(input_args_list) + eval_func_param_names.index('image_file')] = [] if not history1: history1 = [['', '']] history1[-1][0] = fun1_args_list2[ len(input_args_list) + eval_func_param_names.index('instruction')] = instruction_final fun1_args_list2[len(input_args_list) + eval_func_param_names.index('chat_conversation')] = history1 # but don't change what user sees for instruction history1 = deepcopy_by_pickle_object(history) fun1_args_list2[len(input_args_list) + eval_func_param_names.index('prompt_summary')] = prompt_summary_final if langchain_action1 == LangChainAction.QUERY.value: instruction = fun1_args_list2[len(input_args_list) + eval_func_param_names.index('instruction')] if langchain_mode1 == LangChainMode.LLM.value and instruction: # pre-append to context directly fun1_args_list2[ len(input_args_list) + eval_func_param_names.index('instruction')] = '\n\n'.join( responses) + instruction else: # pre-append to ensure images used, since first is highest priority for text_context_list fun1_args_list2[len(input_args_list) + eval_func_param_names.index( 'text_context_list')] = responses + text_context_list_copy else: # for summary/extract, put at end, so if part of single call similar to Query in order for best_near_prompt fun1_args_list2[len(input_args_list) + eval_func_param_names.index( 'text_context_list')] = text_context_list_copy + responses fun2 = functools.partial(fun1.func, *tuple(fun1_args_list2), **fun1.keywords) for response in _get_response(fun2, history1, chatbot_role1, speaker1, tts_language1, roles_state1, tts_speed1, langchain_action1, kwargs=kwargs, api=api, verbose=verbose): response_list = list(response) save_dict1 = response_list[6] if 'extra_dict' in save_dict1: if 'num_prompt_tokens' in save_dict1['extra_dict']: save_dict1['extra_dict']['batch_vision_visible_model'] = batch_display_name save_dict1['extra_dict']['batch_num_prompt_tokens'] = batch_input_tokens save_dict1['extra_dict']['batch_ntokens'] = batch_output_tokens save_dict1['extra_dict']['batch_tokens_persecond'] = batch_tokens_persecond if batch_display_name == display_name: save_dict1['extra_dict']['num_prompt_tokens'] += batch_input_tokens # get ntokens so can add to it history1new = response_list[0] if history1new and len(history1new) > 0 and len(history1new[0]) == 2 and history1new[-1][1]: save_dict1['extra_dict'] = _save_generate_tokens(history1new[-1][1], save_dict1['extra_dict']) save_dict1['extra_dict']['ntokens'] += batch_output_tokens save_dict1['extra_dict']['batch_results'] = batch_results response_list[6] = save_dict1 yield tuple(response_list) return def _get_response(fun1, history, chatbot_role1, speaker1, tts_language1, roles_state1, tts_speed1, langchain_action1, kwargs={}, api=False, verbose=False): """ bot that consumes history for user input instruction (from input_list) itself is not consumed by bot :return: """ error = '' sources = [] save_dict = dict() output_no_refs = '' sources_str = '' prompt_raw = '' llm_answers = {} audio0, audio1, no_audio, generate_speech_func_func = \ prepare_audio(chatbot_role1, speaker1, tts_language1, roles_state1, tts_speed1, langchain_action1, kwargs=kwargs, verbose=verbose) if not fun1: yield history, error, sources, sources_str, prompt_raw, llm_answers, save_dict, audio1 return try: for output_fun in fun1(): output = output_fun['response'] output_no_refs = output_fun['response_no_refs'] sources = output_fun['sources'] # FIXME: can show sources in separate text box etc. sources_iter = [] # don't yield full prompt_raw every iteration, just at end sources_str = output_fun['sources_str'] sources_str_iter = '' # don't yield full prompt_raw every iteration, just at end prompt_raw = output_fun['prompt_raw'] prompt_raw_iter = '' # don't yield full prompt_raw every iteration, just at end llm_answers = output_fun['llm_answers'] save_dict = output_fun.get('save_dict', {}) save_dict_iter = {} # ensure good visually, else markdown ignores multiple \n bot_message = fix_text_for_gradio(output, fix_latex_dollars=not api, fix_angle_brackets=not api) history[-1][1] = bot_message if generate_speech_func_func is not None: while True: audio1, sentence, sentence_state = generate_speech_func_func(output_no_refs, is_final=False) if audio0 is not None: yield history, error, sources_iter, sources_str_iter, prompt_raw_iter, llm_answers, save_dict_iter, audio0 audio0 = None yield history, error, sources_iter, sources_str_iter, prompt_raw_iter, llm_answers, save_dict_iter, audio1 if not sentence: # while True to handle case when streaming is fast enough that see multiple sentences in single go break else: yield history, error, sources_iter, sources_str_iter, prompt_raw_iter, llm_answers, save_dict_iter, audio0 if generate_speech_func_func: # print("final %s %s" % (history[-1][1] is None, audio1 is None), flush=True) audio1, sentence, sentence_state = generate_speech_func_func(output_no_refs, is_final=True) if audio0 is not None: yield history, error, sources, sources_str, prompt_raw, llm_answers, save_dict, audio0 else: audio1 = None # print("final2 %s %s" % (history[-1][1] is None, audio1 is None), flush=True) yield history, error, sources, sources_str, prompt_raw, llm_answers, save_dict, audio1 except StopIteration: # print("STOP ITERATION", flush=True) yield history, error, sources, sources_str, prompt_raw, llm_answers, save_dict, no_audio raise except RuntimeError as e: if "generator raised StopIteration" in str(e): # assume last entry was bad, undo history.pop() yield history, error, sources, sources_str, prompt_raw, llm_answers, save_dict, no_audio else: if history and len(history) > 0 and len(history[0]) > 1 and history[-1][1] is None: history[-1][1] = '' yield history, str(e), sources, sources_str, prompt_raw, llm_answers, save_dict, no_audio raise except Exception as e: # put error into user input ex = "Exception: %s" % str(e) if history and len(history) > 0 and len(history[0]) > 1 and history[-1][1] is None: history[-1][1] = '' yield history, ex, sources, sources_str, prompt_raw, llm_answers, save_dict, no_audio raise finally: # clear_torch_cache() # don't clear torch cache here, too early and stalls generation if used for all_bot() pass return def prepare_audio(chatbot_role1, speaker1, tts_language1, roles_state1, tts_speed1, langchain_action1, kwargs={}, verbose=False): assert kwargs from tts_sentence_parsing import init_sentence_state sentence_state = init_sentence_state() if langchain_action1 in [LangChainAction.EXTRACT.value]: # don't do audio for extraction in any case generate_speech_func_func = None audio0 = None audio1 = None no_audio = None elif kwargs['tts_model'].startswith('microsoft') and speaker1 not in [None, "None"]: audio1 = None from tts import get_speaker_embedding speaker_embedding = get_speaker_embedding(speaker1, kwargs['model_tts'].device) # audio0 = 16000, np.array([]).astype(np.int16) from tts_utils import prepare_speech, get_no_audio sr = 16000 audio0 = prepare_speech(sr=sr) no_audio = get_no_audio(sr=sr) generate_speech_func_func = functools.partial(kwargs['generate_speech_func'], speaker=speaker1, speaker_embedding=speaker_embedding, sentence_state=sentence_state, return_as_byte=kwargs['return_as_byte'], sr=sr, tts_speed=tts_speed1, verbose=verbose) elif kwargs['tts_model'].startswith('tts_models/') and chatbot_role1 not in [None, "None"]: audio1 = None from tts_utils import prepare_speech, get_no_audio from tts_coqui import get_latent sr = 24000 audio0 = prepare_speech(sr=sr) no_audio = get_no_audio(sr=sr) latent = get_latent(roles_state1[chatbot_role1], model=kwargs['model_xtt']) generate_speech_func_func = functools.partial(kwargs['generate_speech_func'], latent=latent, language=tts_language1, sentence_state=sentence_state, return_as_byte=kwargs['return_as_byte'], sr=sr, tts_speed=tts_speed1, verbose=verbose) else: generate_speech_func_func = None audio0 = None audio1 = None no_audio = None return audio0, audio1, no_audio, generate_speech_func_func def prep_bot(*args, retry=False, which_model=0, kwargs_eval={}, plain_api=False, kwargs={}, verbose=False): """ :param args: :param retry: :param which_model: identifies which model if doing model_lock API only called for which_model=0, default for inputs_list, but rest should ignore inputs_list :return: last element is True if should run bot, False if should just yield history """ assert kwargs isize = len(input_args_list) + 1 # states + chat history # don't deepcopy, can contain model itself # NOTE: Update plain_api in evaluate_nochat too args_list = list(args).copy() model_state1 = args_list[-isize] my_db_state1 = args_list[-isize + 1] selection_docs_state1 = args_list[-isize + 2] requests_state1 = args_list[-isize + 3] roles_state1 = args_list[-isize + 4] history = args_list[-1] if not history: history = [] # NOTE: For these, could check if None, then automatically use CLI values, but too complex behavior prompt_type1 = args_list[eval_func_param_names.index('prompt_type')] if prompt_type1 == no_model_str: # deal with gradio dropdown prompt_type1 = args_list[eval_func_param_names.index('prompt_type')] = None prompt_dict1 = args_list[eval_func_param_names.index('prompt_dict')] max_time1 = args_list[eval_func_param_names.index('max_time')] stream_output1 = args_list[eval_func_param_names.index('stream_output')] langchain_mode1 = args_list[eval_func_param_names.index('langchain_mode')] langchain_action1 = args_list[eval_func_param_names.index('langchain_action')] document_subset1 = args_list[eval_func_param_names.index('document_subset')] h2ogpt_key1 = args_list[eval_func_param_names.index('h2ogpt_key')] chat_conversation1 = args_list[eval_func_param_names.index('chat_conversation')] valid_key = is_valid_key(kwargs['enforce_h2ogpt_api_key'], kwargs['enforce_h2ogpt_ui_key'], kwargs['h2ogpt_api_keys'], h2ogpt_key1, requests_state1=requests_state1) chatbot_role1 = args_list[eval_func_param_names.index('chatbot_role')] speaker1 = args_list[eval_func_param_names.index('speaker')] tts_language1 = args_list[eval_func_param_names.index('tts_language')] tts_speed1 = args_list[eval_func_param_names.index('tts_speed')] dummy_return = history, None, langchain_mode1, my_db_state1, requests_state1, \ valid_key, h2ogpt_key1, \ max_time1, stream_output1, chatbot_role1, speaker1, tts_language1, roles_state1, tts_speed1, \ langchain_action1, [] if not plain_api and (model_state1['model'] is None or model_state1['model'] == no_model_str): # plain_api has no state, let evaluate() handle switch return dummy_return args_list = args_list[:-isize] # only keep rest needed for evaluate() if not history: if verbose: print("No history", flush=True) return dummy_return instruction1 = history[-1][0] if retry and history: # if retry, pop history and move onto bot stuff history = get_llm_history(history) instruction1 = history[-1][0] if history and history[-1] and len(history[-1]) == 2 else None if history and history[-1]: history[-1][1] = None if not instruction1: return dummy_return elif not instruction1: if not allow_empty_instruction(langchain_mode1, document_subset1, langchain_action1): # if not retrying, then reject empty query return dummy_return elif len(history) > 0 and history[-1][1] not in [None, '']: # reject submit button if already filled and not retrying # None when not filling with '' to keep client happy return dummy_return from gen import evaluate, evaluate_fake evaluate_local = evaluate if valid_key else functools.partial(evaluate_fake, langchain_action=langchain_action1) # shouldn't have to specify in API prompt_type if CLI launched model, so prefer global CLI one if have it prompt_type1, prompt_dict1 = update_prompt(prompt_type1, prompt_dict1, model_state1, which_model=which_model, **kwargs) # apply back to args_list for evaluate() args_list[eval_func_param_names.index('prompt_type')] = prompt_type1 args_list[eval_func_param_names.index('prompt_dict')] = prompt_dict1 context1 = args_list[eval_func_param_names.index('context')] chat_conversation1 = merge_chat_conversation_history(chat_conversation1, history) args_list[eval_func_param_names.index('chat_conversation')] = chat_conversation1 if 'visible_models' in model_state1 and model_state1['visible_models'] is not None: assert isinstance(model_state1['visible_models'], (int, str)) args_list[eval_func_param_names.index('visible_models')] = model_state1['visible_models'] if 'visible_vision_models' in model_state1 and model_state1['visible_vision_models'] is not None: assert isinstance(model_state1['visible_vision_models'], (int, str)) args_list[eval_func_param_names.index('visible_vision_models')] = model_state1['visible_vision_models'] if 'h2ogpt_key' in model_state1 and model_state1['h2ogpt_key'] is not None: # i.e. may be '' and used to override overall local key assert isinstance(model_state1['h2ogpt_key'], str) args_list[eval_func_param_names.index('h2ogpt_key')] = model_state1['h2ogpt_key'] elif not args_list[eval_func_param_names.index('h2ogpt_key')]: # now that checked if key was valid or not, now can inject default key in case gradio inference server # only do if key not already set by user args_list[eval_func_param_names.index('h2ogpt_key')] = kwargs['h2ogpt_key'] ########################################### # deal with image files image_files = args_list[eval_func_param_names.index('image_file')] if isinstance(image_files, str): image_files = [image_files] if image_files is None: image_files = [] video_files = args_list[eval_func_param_names.index('video_file')] if isinstance(video_files, str): video_files = [video_files] if video_files is None: video_files = [] # NOTE: Once done with gradio, image_file and video_file are all in same list image_files.extend(video_files) image_files_to_delete = [] b2imgs = [] for img_file_one in image_files: str_type = check_input_type(img_file_one) if str_type == 'unknown': continue img_file_path = os.path.join(tempfile.gettempdir(), 'image_file_%s' % str(uuid.uuid4())) if str_type == 'url': img_file_one = download_image(img_file_one, img_file_path) # only delete if was made by us image_files_to_delete.append(img_file_one) elif str_type == 'base64': from vision.utils_vision import base64_to_img img_file_one = base64_to_img(img_file_one, img_file_path) # only delete if was made by us image_files_to_delete.append(img_file_one) else: # str_type='file' or 'youtube' or video (can be cached) pass if img_file_one is not None: b2imgs.append(img_file_one) # always just make list args_list[eval_func_param_names.index('image_file')] = b2imgs ########################################### # deal with videos in image list images_file_path = os.path.join(tempfile.gettempdir(), 'image_path_%s' % str(uuid.uuid4())) # don't try to convert resolution here, do later as images image_files = args_list[eval_func_param_names.index('image_file')] image_resolution = args_list[eval_func_param_names.index('image_resolution')] image_format = args_list[eval_func_param_names.index('image_format')] video_frame_period = args_list[eval_func_param_names.index('video_frame_period')] if video_frame_period is not None: video_frame_period = int(video_frame_period) extract_frames = args_list[eval_func_param_names.index('extract_frames')] or kwargs.get('extract_frames', 20) rotate_align_resize_image = args_list[eval_func_param_names.index('rotate_align_resize_image')] or kwargs.get( 'rotate_align_resize_image', True) process_args = (image_files, images_file_path) process_kwargs = dict(resolution=image_resolution, image_format=image_format, rotate_align_resize_image=rotate_align_resize_image, video_frame_period=video_frame_period, extract_frames=extract_frames, verbose=verbose) if image_files and kwargs['function_server']: from function_client import call_function_server image_files = call_function_server('0.0.0.0', kwargs['function_server_port'], 'process_file_list', process_args, process_kwargs, use_disk=True, use_pickle=True, function_api_key=kwargs['function_api_key'], verbose=verbose) else: image_files = process_file_list(*process_args, **process_kwargs) args_list[eval_func_param_names.index('image_file')] = image_files ########################################### # override original instruction with history from user args_list[0] = instruction1 args_list[2] = context1 ########################################### # allow override of expert/user input for other parameters for k in eval_func_param_names: if k in in_model_state_and_evaluate: # already handled continue if k in model_state1 and model_state1[k] is not None: args_list[eval_func_param_names.index(k)] = model_state1[k] eval_args = (model_state1, my_db_state1, selection_docs_state1, requests_state1, roles_state1) assert len(eval_args) == len(input_args_list) fun1 = functools.partial(evaluate_local, *eval_args, *tuple(args_list), **kwargs_eval) return history, fun1, langchain_mode1, my_db_state1, requests_state1, \ valid_key, h2ogpt_key1, \ max_time1, stream_output1, \ chatbot_role1, speaker1, tts_language1, roles_state1, tts_speed1, \ langchain_action1, image_files_to_delete def choose_exc(x, is_public=True): # don't expose ports etc. to exceptions window if is_public: return x #"Endpoint unavailable or failed" else: return x def bot(*args, retry=False, kwargs_evaluate={}, kwargs={}, db_type=None, dbs=None, verbose=False): history, fun1, langchain_mode1, db1, requests_state1, \ valid_key, h2ogpt_key1, \ max_time1, stream_output1, \ chatbot_role1, speaker1, tts_language1, roles_state1, tts_speed1, \ image_files_to_delete, \ langchain_action1 = prep_bot(*args, retry=retry, kwargs_eval=kwargs_evaluate, kwargs=kwargs, verbose=verbose) save_dict = dict() error = '' error_with_str = '' sources = [] history_str_old = '' error_old = '' sources_str = None from tts_utils import get_no_audio no_audio = get_no_audio() audios = [] # in case not streaming, since audio is always streaming, need to accumulate for when yield last_yield = None try: tgen0 = time.time() for res in get_response(fun1, history, chatbot_role1, speaker1, tts_language1, roles_state1, tts_speed1, langchain_action1, langchain_mode1, kwargs=kwargs, api=False, verbose=verbose, ): do_yield = False history, error, sources, sources_str, prompt_raw, llm_answers, save_dict, audio1 = res error_with_str = get_accordion_named(choose_exc(error), "Generate Error", font_size=2) if error not in ['', None, 'None'] else '' # pass back to gradio only these, rest are consumed in this function history_str = str(history) could_yield = ( history_str != history_str_old or error != error_old and (error not in noneset or error_old not in noneset)) if kwargs['gradio_ui_stream_chunk_size'] <= 0: do_yield |= could_yield else: delta_history = abs(len(history_str) - len(history_str_old)) # even if enough data, don't yield if has been less than min_seconds enough_data = delta_history > kwargs['gradio_ui_stream_chunk_size'] or (error != error_old) beyond_min_time = last_yield is None or \ last_yield is not None and \ (time.time() - last_yield) > kwargs['gradio_ui_stream_chunk_min_seconds'] do_yield |= enough_data and beyond_min_time # yield even if new data not enough if been long enough and have at least something to yield enough_time = last_yield is None or \ last_yield is not None and \ (time.time() - last_yield) > kwargs['gradio_ui_stream_chunk_seconds'] do_yield |= enough_time and could_yield # DEBUG: print("do_yield: %s : %s %s %s %s" % (do_yield, delta_history, enough_data, beyond_min_time, enough_time), flush=True) if stream_output1 and do_yield: audio1 = combine_audios(audios, audio=audio1, sr=24000 if chatbot_role1 else 16000, expect_bytes=kwargs['return_as_byte'], verbose=verbose) audios = [] # reset accumulation yield history, error, audio1 history_str_old = history_str error_old = error last_yield = time.time() else: audios.append(audio1) if time.time() - tgen0 > max_time1 + 10: # don't use actual, so inner has chance to complete if verbose: print("Took too long bot: %s" % (time.time() - tgen0), flush=True) break # yield if anything left over final_audio = combine_audios(audios, audio=no_audio, expect_bytes=kwargs['return_as_byte'], verbose=verbose) if error_with_str: if history and history[-1] and len(history[-1]) == 2 and error_with_str: if not history[-1][1]: history[-1][1] = error_with_str else: # separate bot if already text present history.append((None, error_with_str)) if kwargs['append_sources_to_chat'] and sources_str: history.append((None, sources_str)) yield history, error, final_audio except BaseException as e: print("evaluate_nochat exception: %s: %s" % (str(e), str(args)), flush=True) raise finally: clear_torch_cache(allow_skip=True) clear_embeddings(langchain_mode1, db_type, db1, dbs) for image_file1 in image_files_to_delete: if os.path.isfile(image_file1): remove(image_file1) # save if 'extra_dict' not in save_dict: save_dict['extra_dict'] = {} save_dict['valid_key'] = valid_key save_dict['h2ogpt_key'] = h2ogpt_key1 if requests_state1: save_dict['extra_dict'].update(requests_state1) else: save_dict['extra_dict'].update(dict(username='NO_REQUEST')) save_dict['error'] = error save_dict['sources'] = sources save_dict['which_api'] = 'bot' save_dict['save_dir'] = kwargs['save_dir'] save_generate_output(**save_dict) def is_from_ui(requests_state1): return isinstance(requests_state1, dict) and 'username' in requests_state1 and requests_state1['username'] def is_valid_key(enforce_h2ogpt_api_key, enforce_h2ogpt_ui_key, h2ogpt_api_keys, h2ogpt_key1, requests_state1=None): from_ui = is_from_ui(requests_state1) if from_ui and not enforce_h2ogpt_ui_key: # no token barrier return 'not enforced' elif not from_ui and not enforce_h2ogpt_api_key: # no token barrier return 'not enforced' else: valid_key = False if isinstance(h2ogpt_api_keys, list) and h2ogpt_key1 in h2ogpt_api_keys: # passed token barrier valid_key = True elif isinstance(h2ogpt_api_keys, str) and os.path.isfile(h2ogpt_api_keys): with filelock.FileLock(h2ogpt_api_keys + '.lock'): with open(h2ogpt_api_keys, 'rt') as f: h2ogpt_api_keys = json.load(f) if h2ogpt_key1 in h2ogpt_api_keys: valid_key = True return valid_key def get_one_key(h2ogpt_api_keys, enforce_h2ogpt_api_key): if not enforce_h2ogpt_api_key: # return None so OpenAI server has no keyed access if not enforcing API key on h2oGPT regardless if keys passed return None if isinstance(h2ogpt_api_keys, list) and h2ogpt_api_keys: return h2ogpt_api_keys[0] elif isinstance(h2ogpt_api_keys, str) and os.path.isfile(h2ogpt_api_keys): with filelock.FileLock(h2ogpt_api_keys + '.lock'): with open(h2ogpt_api_keys, 'rt') as f: h2ogpt_api_keys = json.load(f) if h2ogpt_api_keys: return h2ogpt_api_keys[0] def get_model_max_length(model_state1, model_state0): if model_state1 and not isinstance(model_state1["tokenizer"], str): tokenizer = model_state1["tokenizer"] elif model_state0 and not isinstance(model_state0["tokenizer"], str): tokenizer = model_state0["tokenizer"] else: tokenizer = None if tokenizer is not None: return int(tokenizer.model_max_length) else: return 2000 def get_llm_history(history): # avoid None users used for sources, errors, etc. if history is None: history = [] for ii in range(len(history) - 1, -1, -1): if history[ii] and history[ii][0] is not None: last_user_ii = ii history = history[:last_user_ii + 1] break return history def gen1_fake(fun1, history): error = '' sources = [] sources_str = '' prompt_raw = '' llm_answers = {} save_dict = dict() audio1 = None yield history, error, sources, sources_str, prompt_raw, llm_answers, save_dict, audio1 return def merge_chat_conversation_history(chat_conversation1, history): # chat_conversation and history ordered so largest index of list is most recent if chat_conversation1: chat_conversation1 = str_to_list(chat_conversation1) for conv1 in chat_conversation1: assert isinstance(conv1, (list, tuple)) assert len(conv1) == 2 if isinstance(history, list): # make copy so only local change if chat_conversation1: # so priority will be newest that comes from actual chat history from UI, then chat_conversation history = chat_conversation1 + history.copy() elif chat_conversation1: history = chat_conversation1 else: history = [] return history def update_langchain_mode_paths(selection_docs_state1): dup = selection_docs_state1['langchain_mode_paths'].copy() for k, v in dup.items(): if k not in selection_docs_state1['langchain_modes']: selection_docs_state1['langchain_mode_paths'].pop(k) for k in selection_docs_state1['langchain_modes']: if k not in selection_docs_state1['langchain_mode_types']: # if didn't specify shared, then assume scratch if didn't login or personal if logged in selection_docs_state1['langchain_mode_types'][k] = LangChainTypes.PERSONAL.value return selection_docs_state1 # Setup some gradio states for per-user dynamic state def my_db_state_done(state): if isinstance(state, dict): for langchain_mode_db, db_state in state.items(): scratch_data = state[langchain_mode_db] if langchain_mode_db in langchain_modes_intrinsic: if len(scratch_data) == length_db1() and hasattr(scratch_data[0], 'delete_collection') and \ scratch_data[1] == scratch_data[2]: # scratch if not logged in scratch_data[0].delete_collection() # try to free from memory scratch_data[0] = None del scratch_data[0] def process_audio(file1, t1=0, t2=30): # use no more than 30 seconds from pydub import AudioSegment # in milliseconds t1 = t1 * 1000 t2 = t2 * 1000 newAudio = AudioSegment.from_wav(file1)[t1:t2] new_file = file1 + '.new.wav' newAudio.export(new_file, format="wav") return new_file def allow_empty_instruction(langchain_mode1, document_subset1, langchain_action1): allow = False allow |= langchain_action1 not in [LangChainAction.QUERY.value, LangChainAction.IMAGE_QUERY.value, LangChainAction.IMAGE_CHANGE.value, LangChainAction.IMAGE_GENERATE.value, LangChainAction.IMAGE_STYLE.value, ] allow |= document_subset1 in [DocumentSubset.TopKSources.name] if langchain_mode1 in [LangChainMode.LLM.value]: allow = False return allow def update_prompt(prompt_type1, prompt_dict1, model_state1, which_model=0, global_scope=False, **kwargs): assert kwargs if not prompt_type1 or which_model != 0: # keep prompt_type and prompt_dict in sync if possible prompt_type1 = kwargs.get('prompt_type', prompt_type1) prompt_dict1 = kwargs.get('prompt_dict', prompt_dict1) # prefer model specific prompt type instead of global one if not global_scope: if not prompt_type1 or which_model != 0: prompt_type1 = model_state1.get('prompt_type', prompt_type1) prompt_dict1 = model_state1.get('prompt_dict', prompt_dict1) if not prompt_dict1 or which_model != 0: # if still not defined, try to get prompt_dict1 = kwargs.get('prompt_dict', prompt_dict1) if not global_scope: if not prompt_dict1 or which_model != 0: prompt_dict1 = model_state1.get('prompt_dict', prompt_dict1) if not global_scope and not prompt_type1: # if still not defined, use unknown prompt_type1 = unknown_prompt_type return prompt_type1, prompt_dict1 def get_fun_with_dict_str_plain(default_kwargs, kwargs, **kwargs_evaluate_nochat): fun_with_dict_str_plain = functools.partial(evaluate_nochat, default_kwargs1=default_kwargs, str_api=True, plain_api=True, kwargs=kwargs, **kwargs_evaluate_nochat, ) return fun_with_dict_str_plain