Spaces:

phyloforfun
/

VoucherVision

Running

VoucherVision / vouchervision /VoucherVision_Config_Builder.py

Major update. Support for 15 LLMs, World Flora Online taxonomy validation, geolocation, 2 OCR methods, significant UI changes, stability improvements, consistent JSON parsing

ae215ea 10 months ago

raw

history blame

30.2 kB

	import os #, yaml, platform, traceback
	from vouchervision.LeafMachine2_Config_Builder import get_default_download_folder #, write_config_file
	# from vouchervision.general_utils import validate_dir, print_main_fail
	# from vouchervision.vouchervision_main import voucher_vision
	from vouchervision.general_utils import get_cfg_from_full_path

	def build_VV_config(loaded_cfg=None):
	if loaded_cfg is None:
	#############################################
	############ Set common defaults ############
	#############################################
	# Changing the values below will set new
	# default values each time you open the
	# VoucherVision user interface
	#############################################
	#############################################
	#############################################

	dir_home = os.path.dirname(os.path.dirname(__file__))
	run_name = 'test'
	# dir_images_local = 'D:/Dropbox/LM2_Env/Image_Datasets/GBIF_BroadSample_3SppPerFamily1'
	dir_images_local = os.path.join(dir_home,'demo','demo_images')

	# The default output location is the computer's "Downloads" folder
	# You can set dir_output directly by typing the folder path,
	# OR you can uncomment the line "dir_output = default_output_folder"
	# to have VoucherVision save to the Downloads folder by default
	default_output_folder = get_default_download_folder()
	dir_output = default_output_folder
	# dir_output = 'D:/D_Desktop/LM2'

	prefix_removal = '' #'MICH-V-'
	suffix_removal = ''
	catalog_numerical_only = False

	save_cropped_annotations = ['label','barcode']

	do_use_trOCR = False
	trOCR_model_path = "microsoft/trocr-large-handwritten"
	OCR_option = 'hand'
	OCR_option_llava = 'llava-v1.6-mistral-7b' # "llava-v1.6-mistral-7b", "llava-v1.6-34b", "llava-v1.6-vicuna-13b", "llava-v1.6-vicuna-7b",
	OCR_option_llava_bit = 'full' # full or 4bit
	double_OCR = False


	tool_GEO = True
	tool_WFO = True
	tool_wikipedia = True

	check_for_illegal_filenames = False

	LLM_version_user = 'Azure GPT 3.5 Instruct' #'Azure GPT 4 Turbo 1106-preview'
	prompt_version = 'SLTPvA_long.yaml' # from ["Version 1", "Version 1 No Domain Knowledge", "Version 2"]
	use_LeafMachine2_collage_images = True # Use LeafMachine2 collage images
	do_create_OCR_helper_image = True

	batch_size = 500
	num_workers = 8

	skip_vertical = False
	pdf_conversion_dpi = 100

	path_domain_knowledge = '' #os.path.join(dir_home,'domain_knowledge','SLTP_UM_AllAsiaMinimalInRegion.xlsx')
	embeddings_database_name = '' #os.path.splitext(os.path.basename(path_domain_knowledge))[0]

	#############################################
	#############################################
	########## DO NOT EDIT BELOW HERE ###########
	#############################################
	#############################################
	return assemble_config(dir_home, run_name, dir_images_local,dir_output,
	prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,num_workers,
	path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
	prompt_version, do_create_OCR_helper_image, do_use_trOCR, trOCR_model_path, OCR_option, OCR_option_llava,
	OCR_option_llava_bit, double_OCR, save_cropped_annotations,
	tool_GEO, tool_WFO, tool_wikipedia,
	check_for_illegal_filenames, skip_vertical, pdf_conversion_dpi, use_domain_knowledge=False)
	else:
	dir_home = os.path.dirname(os.path.dirname(__file__))
	run_name = loaded_cfg['leafmachine']['project']['run_name']
	dir_images_local = loaded_cfg['leafmachine']['project']['dir_images_local']

	default_output_folder = loaded_cfg['leafmachine']['project']['dir_output']
	dir_output = loaded_cfg['leafmachine']['project']['dir_output']

	prefix_removal = loaded_cfg['leafmachine']['project']['prefix_removal']
	suffix_removal = loaded_cfg['leafmachine']['project']['suffix_removal']
	catalog_numerical_only = loaded_cfg['leafmachine']['project']['catalog_numerical_only']

	do_use_trOCR = loaded_cfg['leafmachine']['project']['do_use_trOCR']
	trOCR_model_path = loaded_cfg['leafmachine']['project']['trOCR_model_path']
	OCR_option = loaded_cfg['leafmachine']['project']['OCR_option']
	OCR_option_llava = loaded_cfg['leafmachine']['project']['OCR_option_llava']
	OCR_option_llava_bit = loaded_cfg['leafmachine']['project']['OCR_option_llava_bit']
	double_OCR = loaded_cfg['leafmachine']['project']['double_OCR']

	tool_GEO = loaded_cfg['leafmachine']['project']['tool_GEO']
	tool_WFO = loaded_cfg['leafmachine']['project']['tool_WFO']
	tool_wikipedia = loaded_cfg['leafmachine']['project']['tool_wikipedia']

	pdf_conversion_dpi = loaded_cfg['leafmachine']['project']['pdf_conversion_dpi']

	LLM_version_user = loaded_cfg['leafmachine']['LLM_version']
	prompt_version = loaded_cfg['leafmachine']['project']['prompt_version']
	use_LeafMachine2_collage_images = loaded_cfg['leafmachine']['use_RGB_label_images']
	do_create_OCR_helper_image = loaded_cfg['leafmachine']['do_create_OCR_helper_image']

	batch_size = loaded_cfg['leafmachine']['project']['batch_size']
	num_workers = loaded_cfg['leafmachine']['project']['num_workers']

	path_domain_knowledge = loaded_cfg['leafmachine']['project']['path_to_domain_knowledge_xlsx']
	embeddings_database_name = os.path.splitext(os.path.basename(path_domain_knowledge))[0]

	save_cropped_annotations = loaded_cfg['leafmachine']['cropped_components']['save_cropped_annotations']
	check_for_illegal_filenames = loaded_cfg['leafmachine']['do']['check_for_illegal_filenames']
	skip_vertical = loaded_cfg['leafmachine']['do']['skip_vertical']

	return assemble_config(dir_home, run_name, dir_images_local,dir_output,
	prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,num_workers,
	path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
	prompt_version, do_create_OCR_helper_image, do_use_trOCR, trOCR_model_path, OCR_option, OCR_option_llava,
	OCR_option_llava_bit, double_OCR, save_cropped_annotations,
	tool_GEO, tool_WFO, tool_wikipedia,
	check_for_illegal_filenames, skip_vertical, pdf_conversion_dpi, use_domain_knowledge=False)


	def assemble_config(dir_home, run_name, dir_images_local,dir_output,
	prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,num_workers,
	path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
	prompt_version, do_create_OCR_helper_image_user, do_use_trOCR, trOCR_model_path, OCR_option, OCR_option_llava,
	OCR_option_llava_bit, double_OCR, save_cropped_annotations,
	tool_GEO, tool_WFO, tool_wikipedia,
	check_for_illegal_filenames, skip_vertical, pdf_conversion_dpi, use_domain_knowledge=False):


	# Initialize the base structure
	config_data = {
	'leafmachine': {}
	}

	# Modular sections to be added to 'leafmachine'
	do_section = {
	'check_for_illegal_filenames': check_for_illegal_filenames,
	'check_for_corrupt_images_make_vertical': True,
	'skip_vertical': skip_vertical,
	}

	print_section = {
	'verbose': True,
	'optional_warnings': True
	}

	logging_section = {
	'log_level': None
	}


	project_section = {
	'dir_output': dir_output,
	'run_name': run_name,
	'image_location': 'local',
	'batch_size': batch_size,
	'num_workers': num_workers,
	'dir_images_local': dir_images_local,
	'continue_run_from_partial_xlsx': '',
	'prefix_removal': prefix_removal,
	'suffix_removal': suffix_removal,
	'catalog_numerical_only': catalog_numerical_only,
	'use_domain_knowledge': use_domain_knowledge,
	'embeddings_database_name': embeddings_database_name,
	'build_new_embeddings_database': False,
	'path_to_domain_knowledge_xlsx': path_domain_knowledge,
	'prompt_version': prompt_version,
	'delete_all_temps': False,
	'delete_temps_keep_VVE': False,
	'do_use_trOCR': do_use_trOCR,
	'trOCR_model_path': trOCR_model_path,
	'OCR_option': OCR_option,
	'OCR_option_llava': OCR_option_llava,
	'OCR_option_llava_bit': OCR_option_llava_bit,
	'double_OCR': double_OCR,
	'pdf_conversion_dpi': pdf_conversion_dpi,
	'tool_GEO': tool_GEO,
	'tool_WFO': tool_WFO,
	'tool_wikipedia': tool_wikipedia,
	}

	modules_section = {
	'specimen_crop': True
	}

	LLM_version = LLM_version_user
	use_RGB_label_images = use_LeafMachine2_collage_images # Use LeafMachine2 collage images
	do_create_OCR_helper_image = do_create_OCR_helper_image_user

	cropped_components_section = {
	'do_save_cropped_annotations': True,
	'save_cropped_annotations': save_cropped_annotations,
	'save_per_image': False,
	'save_per_annotation_class': True,
	'binarize_labels': False,
	'binarize_labels_skeletonize': False
	}

	data_section = {
	'save_json_rulers': False,
	'save_json_measurements': False,
	'save_individual_csv_files_rulers': False,
	'save_individual_csv_files_measurements': False,
	'save_individual_csv_files_landmarks': False,
	'save_individual_efd_files': False,
	'include_darwin_core_data_from_combined_file': False,
	'do_apply_conversion_factor': False
	}

	overlay_section = {
	'save_overlay_to_pdf': False,
	'save_overlay_to_jpgs': True,
	'overlay_dpi': 300, # Between 100 to 300
	'overlay_background_color': 'black', # Either 'white' or 'black'

	'show_archival_detections': True,
	'show_plant_detections': True,
	'show_segmentations': True,
	'show_landmarks': True,
	'ignore_archival_detections_classes': [],
	'ignore_plant_detections_classes': ['leaf_whole', 'specimen'], # Could also include 'leaf_partial' and others if needed
	'ignore_landmark_classes': [],

	'line_width_archival': 12, # Previous value given was 2
	'line_width_plant': 12, # Previous value given was 6
	'line_width_seg': 12, # 12 is specified as "thick"
	'line_width_efd': 12, # 3 is specified as "thick" but 12 is given here
	'alpha_transparency_archival': 0.3,
	'alpha_transparency_plant': 0,
	'alpha_transparency_seg_whole_leaf': 0.4,
	'alpha_transparency_seg_partial_leaf': 0.3
	}

	archival_component_detector_section = {
	'detector_type': 'Archival_Detector',
	'detector_version': 'PREP_final',
	'detector_iteration': 'PREP_final',
	'detector_weights': 'best.pt',
	'minimum_confidence_threshold': 0.5, # Default is 0.5
	'do_save_prediction_overlay_images': True,
	'ignore_objects_for_overlay': []
	}

	# Add the sections to the 'leafmachine' key
	config_data['leafmachine']['do'] = do_section
	config_data['leafmachine']['print'] = print_section
	config_data['leafmachine']['logging'] = logging_section
	config_data['leafmachine']['project'] = project_section
	config_data['leafmachine']['LLM_version'] = LLM_version
	config_data['leafmachine']['use_RGB_label_images'] = use_RGB_label_images
	config_data['leafmachine']['do_create_OCR_helper_image'] = do_create_OCR_helper_image
	config_data['leafmachine']['cropped_components'] = cropped_components_section
	config_data['leafmachine']['modules'] = modules_section
	config_data['leafmachine']['data'] = data_section
	config_data['leafmachine']['overlay'] = overlay_section
	config_data['leafmachine']['archival_component_detector'] = archival_component_detector_section

	return config_data, dir_home

	# def build_api_tests(api):
	# dir_home = os.path.dirname(os.path.dirname(__file__))
	# path_to_configs = os.path.join(dir_home,'demo','demo_configs')

	# dir_home = os.path.dirname(os.path.dirname(__file__))
	# dir_images_local = os.path.join(dir_home,'demo','demo_images')
	# validate_dir(os.path.join(dir_home,'demo','demo_configs'))
	# path_domain_knowledge = os.path.join(dir_home,'domain_knowledge','SLTP_UM_AllAsiaMinimalInRegion.xlsx')
	# embeddings_database_name = os.path.splitext(os.path.basename(path_domain_knowledge))[0]
	# prefix_removal = ''
	# suffix_removal = ''
	# catalog_numerical_only = False
	# batch_size = 500
	# do_create_OCR_helper_image = False


	# # ### Option 1: "GPT 4" of ["GPT 4", "GPT 3.5", "Azure GPT 4", "Azure GPT 3.5", "PaLM 2"]
	# # LLM_version_user = 'Azure GPT 4'

	# # ### Option 2: False of [False, True]
	# # use_LeafMachine2_collage_images = False

	# # ### Option 3: False of [False, True]
	# # use_domain_knowledge = True

	# test_results = {}
	# if api == 'openai':
	# OPT1, OPT2, OPT3 = TestOptionsAPI_openai.get_options()
	# elif api == 'palm':
	# OPT1, OPT2, OPT3 = TestOptionsAPI_palm.get_options()
	# elif api == 'azure_openai':
	# OPT1, OPT2, OPT3 = TestOptionsAPI_azure_openai.get_options()
	# else:
	# raise

	# ind = -1
	# ind_opt1 = -1
	# ind_opt2 = -1
	# ind_opt3 = -1

	# for opt1 in OPT1:
	# ind_opt1+= 1
	# for opt2 in OPT2:
	# ind_opt2 += 1
	# for opt3 in OPT3:
	# ind += 1
	# ind_opt3 += 1

	# LLM_version_user = opt1
	# use_LeafMachine2_collage_images = opt2
	# prompt_version = opt3

	# filename = f"{ind}__OPT1-{ind_opt1}__OPT2-{ind_opt2}__OPT3-{ind_opt3}.yaml"
	# run_name = f"{ind}__OPT1-{ind_opt1}__OPT2-{ind_opt2}__OPT3-{ind_opt3}"

	# dir_output = os.path.join(dir_home,'demo','demo_output','run_name')
	# validate_dir(dir_output)

	# config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
	# prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
	# path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
	# prompt_version,do_create_OCR_helper_image)

	# write_config_file(config_data, os.path.join(dir_home,'demo','demo_configs'),filename=filename)

	# test_results[run_name] = False
	# ind_opt3 = -1
	# ind_opt2 = -1
	# ind_opt1 = -1

	# return dir_home, path_to_configs, test_results

	# def build_demo_tests(llm_version):
	# dir_home = os.path.dirname(os.path.dirname(__file__))
	# path_to_configs = os.path.join(dir_home,'demo','demo_configs')

	# dir_home = os.path.dirname(os.path.dirname(__file__))
	# dir_images_local = os.path.join(dir_home,'demo','demo_images')
	# validate_dir(os.path.join(dir_home,'demo','demo_configs'))
	# path_domain_knowledge = os.path.join(dir_home,'domain_knowledge','SLTP_UM_AllAsiaMinimalInRegion.xlsx')
	# embeddings_database_name = os.path.splitext(os.path.basename(path_domain_knowledge))[0]
	# prefix_removal = ''
	# suffix_removal = ''
	# catalog_numerical_only = False
	# batch_size = 500
	# do_create_OCR_helper_image = False

	# # ### Option 1: "GPT 4" of ["GPT 4", "GPT 3.5", "Azure GPT 4", "Azure GPT 3.5", "PaLM 2"]
	# # LLM_version_user = 'Azure GPT 4'

	# # ### Option 2: False of [False, True]
	# # use_LeafMachine2_collage_images = False

	# # ### Option 3: False of [False, True]
	# # use_domain_knowledge = True

	# test_results = {}
	# if llm_version == 'gpt':
	# OPT1, OPT2, OPT3 = TestOptionsGPT.get_options()
	# elif llm_version == 'palm':
	# OPT1, OPT2, OPT3 = TestOptionsPalm.get_options()
	# else:
	# raise

	# ind = -1
	# ind_opt1 = -1
	# ind_opt2 = -1
	# ind_opt3 = -1

	# for opt1 in OPT1:
	# ind_opt1+= 1
	# for opt2 in OPT2:
	# ind_opt2 += 1
	# for opt3 in OPT3:
	# ind += 1
	# ind_opt3 += 1

	# LLM_version_user = opt1
	# use_LeafMachine2_collage_images = opt2
	# prompt_version = opt3

	# filename = f"{ind}__OPT1-{ind_opt1}__OPT2-{ind_opt2}__OPT3-{ind_opt3}.yaml"
	# run_name = f"{ind}__OPT1-{ind_opt1}__OPT2-{ind_opt2}__OPT3-{ind_opt3}"

	# dir_output = os.path.join(dir_home,'demo','demo_output','run_name')
	# validate_dir(dir_output)


	# if llm_version == 'gpt':
	# if prompt_version in ['Version 1']:
	# config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
	# prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
	# path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
	# prompt_version, do_create_OCR_helper_image, use_domain_knowledge=True)
	# else:
	# config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
	# prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
	# path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
	# prompt_version, do_create_OCR_helper_image)
	# elif llm_version == 'palm':
	# if prompt_version in ['Version 1 PaLM 2']:
	# config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
	# prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
	# path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
	# prompt_version, do_create_OCR_helper_image, use_domain_knowledge=True)
	# else:
	# config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
	# prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
	# path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
	# prompt_version, do_create_OCR_helper_image)


	# write_config_file(config_data, os.path.join(dir_home,'demo','demo_configs'),filename=filename)

	# test_results[run_name] = False
	# ind_opt3 = -1
	# ind_opt2 = -1
	# ind_opt1 = -1

	# return dir_home, path_to_configs, test_results

	class TestOptionsGPT:
	OPT1 = ["gpt-4-1106-preview","GPT 4", "GPT 3.5", "Azure GPT 4", "Azure GPT 3.5"]
	OPT2 = [False, True]
	OPT3 = ["Version 1", "Version 1 No Domain Knowledge", "Version 2"]

	@classmethod
	def get_options(cls):
	return cls.OPT1, cls.OPT2, cls.OPT3
	@classmethod
	def get_length(cls):
	return 24

	class TestOptionsPalm:
	OPT1 = ["PaLM 2"]
	OPT2 = [False, True]
	OPT3 = ["Version 1 PaLM 2", "Version 1 PaLM 2 No Domain Knowledge", "Version 2 PaLM 2"]

	@classmethod
	def get_options(cls):
	return cls.OPT1, cls.OPT2, cls.OPT3
	@classmethod
	def get_length(cls):
	return 6

	class TestOptionsAPI_openai:
	OPT1 = ["GPT 3.5"]
	OPT2 = [False]
	OPT3 = ["Version 2"]

	@classmethod
	def get_options(cls):
	return cls.OPT1, cls.OPT2, cls.OPT3
	@classmethod
	def get_length(cls):
	return 24

	class TestOptionsAPI_azure_openai:
	OPT1 = ["Azure GPT 3.5"]
	OPT2 = [False]
	OPT3 = ["Version 2"]

	@classmethod
	def get_options(cls):
	return cls.OPT1, cls.OPT2, cls.OPT3
	@classmethod
	def get_length(cls):
	return 24

	class TestOptionsAPI_palm:
	OPT1 = ["PaLM 2"]
	OPT2 = [False]
	OPT3 = ["Version 2 PaLM 2"]

	@classmethod
	def get_options(cls):
	return cls.OPT1, cls.OPT2, cls.OPT3
	@classmethod
	def get_length(cls):
	return 6

	# def run_demo_tests_GPT(progress_report):
	# dir_home, path_to_configs, test_results = build_demo_tests('gpt')
	# progress_report.set_n_overall(len(test_results.items()))

	# JSON_results = {}

	# for ind, (cfg, result) in enumerate(test_results.items()):
	# OPT1, OPT2, OPT3 = TestOptionsGPT.get_options()

	# test_ind, ind_opt1, ind_opt2, ind_opt3 = cfg.split('__')
	# opt1_readable = OPT1[int(ind_opt1.split('-')[1])]

	# if opt1_readable in ["Azure GPT 4", "Azure GPT 3.5"]:
	# api_version = 'gpt-azure'
	# elif opt1_readable in ["GPT 4", "GPT 3.5"]:
	# api_version = 'gpt'
	# else:
	# raise

	# opt2_readable = "Use LeafMachine2 for Collage Images" if OPT2[int(ind_opt2.split('-')[1])] else "Don't use LeafMachine2 for Collage Images"
	# opt3_readable = f"Prompt {OPT3[int(ind_opt3.split('-')[1])]}"
	# # Construct the human-readable test name
	# human_readable_name = f"{opt1_readable}, {opt2_readable}, {opt3_readable}"
	# get_n_overall = progress_report.get_n_overall()
	# progress_report.update_overall(f"Test {int(test_ind)+1} of {get_n_overall} --- Validating {human_readable_name}")
	# print_main_fail(f"Starting validation test: {human_readable_name}")
	# cfg_file_path = os.path.join(path_to_configs,'.'.join([cfg,'yaml']))

	# if check_API_key(dir_home, api_version) and check_API_key(dir_home, 'google-vision-ocr'):
	# try:
	# last_JSON_response, total_cost = voucher_vision(cfg_file_path, dir_home, cfg_test=None, progress_report=progress_report, test_ind=int(test_ind))
	# test_results[cfg] = True
	# JSON_results[ind] = last_JSON_response
	# except Exception as e:
	# JSON_results[ind] = None
	# test_results[cfg] = False
	# print(f"An exception occurred: {e}")
	# traceback.print_exc() # This will print the full traceback
	# else:
	# fail_response = ''
	# if not check_API_key(dir_home, 'google-vision-ocr'):
	# fail_response += "No API key found for Google Vision OCR"
	# if not check_API_key(dir_home, api_version):
	# fail_response += f" + No API key found for {api_version}"
	# test_results[cfg] = False
	# JSON_results[ind] = fail_response
	# print(f"No API key found for {fail_response}")

	# return test_results, JSON_results

	# def run_demo_tests_Palm(progress_report):
	# api_version = 'palm'

	# dir_home, path_to_configs, test_results = build_demo_tests('palm')
	# progress_report.set_n_overall(len(test_results.items()))

	# JSON_results = {}

	# for ind, (cfg, result) in enumerate(test_results.items()):
	# OPT1, OPT2, OPT3 = TestOptionsPalm.get_options()
	# test_ind, ind_opt1, ind_opt2, ind_opt3 = cfg.split('__')
	# opt1_readable = OPT1[int(ind_opt1.split('-')[1])]
	# opt2_readable = "Use LeafMachine2 for Collage Images" if OPT2[int(ind_opt2.split('-')[1])] else "Don't use LeafMachine2 for Collage Images"
	# opt3_readable = f"Prompt {OPT3[int(ind_opt3.split('-')[1])]}"
	# # opt3_readable = "Use Domain Knowledge" if OPT3[int(ind_opt3.split('-')[1])] else "Don't use Domain Knowledge"
	# # Construct the human-readable test name
	# human_readable_name = f"{opt1_readable}, {opt2_readable}, {opt3_readable}"
	# get_n_overall = progress_report.get_n_overall()
	# progress_report.update_overall(f"Test {int(test_ind)+1} of {get_n_overall} --- Validating {human_readable_name}")
	# print_main_fail(f"Starting validation test: {human_readable_name}")
	# cfg_file_path = os.path.join(path_to_configs,'.'.join([cfg,'yaml']))

	# if check_API_key(dir_home, api_version) and check_API_key(dir_home, 'google-vision-ocr') :
	# try:
	# last_JSON_response, total_cost = voucher_vision(cfg_file_path, dir_home, cfg_test=None, path_custom_prompts=None, progress_report=progress_report, test_ind=int(test_ind))
	# test_results[cfg] = True
	# JSON_results[ind] = last_JSON_response
	# except Exception as e:
	# test_results[cfg] = False
	# JSON_results[ind] = None
	# print(f"An exception occurred: {e}")
	# traceback.print_exc() # This will print the full traceback
	# else:
	# fail_response = ''
	# if not check_API_key(dir_home, 'google-vision-ocr'):
	# fail_response += "No API key found for Google Vision OCR"
	# if not check_API_key(dir_home, api_version):
	# fail_response += f" + No API key found for {api_version}"
	# test_results[cfg] = False
	# JSON_results[ind] = fail_response
	# print(f"No API key found for {fail_response}")

	# return test_results, JSON_results

	# def run_api_tests(api):
	# try:
	# dir_home, path_to_configs, test_results = build_api_tests(api)

	# JSON_results = {}

	# for ind, (cfg, result) in enumerate(test_results.items()):
	# if api == 'openai':
	# OPT1, OPT2, OPT3 = TestOptionsAPI_openai.get_options()
	# elif 'azure_openai':
	# OPT1, OPT2, OPT3 = TestOptionsAPI_azure_openai.get_options()
	# elif 'palm':
	# OPT1, OPT2, OPT3 = TestOptionsAPI_palm.get_options()
	# test_ind, ind_opt1, ind_opt2, ind_opt3 = cfg.split('__')
	# opt1_readable = OPT1[int(ind_opt1.split('-')[1])]
	# opt2_readable = "Use LeafMachine2 for Collage Images" if OPT2[int(ind_opt2.split('-')[1])] else "Don't use LeafMachine2 for Collage Images"
	# opt3_readable = f"Prompt {OPT3[int(ind_opt3.split('-')[1])]}"
	# # opt3_readable = "Use Domain Knowledge" if OPT3[int(ind_opt3.split('-')[1])] else "Don't use Domain Knowledge"
	# # Construct the human-readable test name
	# human_readable_name = f"{opt1_readable}, {opt2_readable}, {opt3_readable}"
	# print_main_fail(f"Starting validation test: {human_readable_name}")
	# cfg_file_path = os.path.join(path_to_configs,'.'.join([cfg,'yaml']))

	# if check_API_key(dir_home, api) and check_API_key(dir_home, 'google-vision-ocr') :
	# try:
	# last_JSON_response, total_cost = voucher_vision(cfg_file_path, dir_home, None,path_custom_prompts=None , cfg_test=None, progress_report=None, test_ind=int(test_ind))
	# test_results[cfg] = True
	# JSON_results[ind] = last_JSON_response
	# return True

	# except Exception as e:
	# print(e)
	# return False
	# else:
	# return False
	# except Exception as e:
	# print(e)
	# return False

	def has_API_key(val):
	if val != '':
	return True
	else:
	return False

	def check_if_usable(is_hf): ############################################################################################################## TODO fix
	if is_hf:
	return True ########### needs actual logic. borrow from another function to not repeat this
	else:
	dir_home = os.path.dirname(os.path.dirname(__file__))
	path_cfg_private = os.path.join(dir_home, 'PRIVATE_DATA.yaml')
	cfg_private = get_cfg_from_full_path(path_cfg_private)

	has_key_openai = has_API_key(cfg_private['openai']['OPENAI_API_KEY'])

	has_key_azure_openai = has_API_key(cfg_private['openai_azure']['OPENAI_API_VERSION'])

	has_key_google_OCR = has_API_key(cfg_private['google']['GOOGLE_APPLICATION_CREDENTIALS'])

	has_key_MISTRAL = has_API_key(cfg_private['mistral']['MISTRAL_API_KEY'])


	if has_key_google_OCR and (has_key_azure_openai or has_key_openai or has_key_MISTRAL):
	return True
	else:
	return False

	# def check_API_key(dir_home, api_version):
	# dir_home = os.path.dirname(os.path.dirname(__file__))
	# path_cfg_private = os.path.join(dir_home, 'PRIVATE_DATA.yaml')
	# cfg_private = get_cfg_from_full_path(path_cfg_private)

	# has_key_openai = has_API_key(cfg_private['openai']['OPENAI_API_KEY'])

	# has_key_azure_openai = has_API_key(cfg_private['openai_azure']['api_version'])

	# # has_key_palm2 = has_API_key(cfg_private['google_palm']['google_palm_api'])

	# has_key_google_OCR = has_API_key(cfg_private['google']['GOOGLE_APPLICATION_CREDENTIALS'])

	# if api_version in ['gpt','openai'] and has_key_openai:
	# return True
	# elif api_version in ['gpt-azure', 'azure_openai'] and has_key_azure_openai:
	# return True
	# elif api_version == 'google-vision-ocr' and has_key_google_OCR:
	# return True
	# else:
	# return False