Spaces:
Running
Running
File size: 8,465 Bytes
87c3140 806953a 87c3140 524a99c 82e0572 7e12cb7 aedd7d9 87c3140 e91ac58 87c3140 26c9c07 87c3140 26c9c07 87c3140 26c9c07 e91ac58 87c3140 e91ac58 87c3140 e91ac58 87c3140 26c9c07 e91ac58 87c3140 e91ac58 93fd830 aedd7d9 9d06861 aedd7d9 9d06861 aedd7d9 524a99c aedd7d9 9d06861 87c3140 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 |
'''
VoucherVision - based on LeafMachine2 Processes
'''
import os, inspect, sys, shutil
from time import perf_counter
# currentdir = os.path.dirname(os.path.dirname(inspect.getfile(inspect.currentframe())))
# parentdir = os.path.dirname(currentdir)
# sys.path.append(parentdir)
# sys.path.append(currentdir)
from vouchervision.component_detector.component_detector import detect_plant_components, detect_archival_components
from vouchervision.general_utils import save_token_info_as_csv, print_main_start, check_for_subdirs_VV, load_config_file, load_config_file_testing, report_config, save_config_file, crop_detections_from_images_VV
from vouchervision.directory_structure_VV import Dir_Structure
from vouchervision.data_project import Project_Info
from vouchervision.LM2_logger import start_logging
from vouchervision.fetch_data import fetch_data
from vouchervision.utils_VoucherVision import VoucherVision, space_saver
from vouchervision.utils_hf import upload_to_drive
def voucher_vision(cfg_file_path, dir_home, path_custom_prompts, cfg_test, progress_report, json_report, path_api_cost=None, test_ind = None, is_hf = True, is_real_run=False):
t_overall = perf_counter()
# Load config file
report_config(dir_home, cfg_file_path, system='VoucherVision')
if cfg_test is None:
cfg = load_config_file(dir_home, cfg_file_path, system='VoucherVision') # For VoucherVision
else:
cfg = cfg_test
# Check to see if there are subdirs
# Yes --> use the names of the subsirs as run_name
run_name, dirs_list, has_subdirs = check_for_subdirs_VV(cfg)
print(f"run_name {run_name} dirs_list{dirs_list} has_subdirs{has_subdirs}")
# Dir structure
if is_real_run:
progress_report.update_overall(f"Creating Output Directory Structure")
print_main_start("Creating Directory Structure")
Dirs = Dir_Structure(cfg)
# logging.info("Hi")
logger = start_logging(Dirs, cfg)
# Check to see if required ML files are ready to use
if is_real_run:
progress_report.update_overall(f"Fetching LeafMachine2 Files")
ready_to_use = fetch_data(logger, dir_home, cfg_file_path)
assert ready_to_use, "Required ML files are not ready to use!\nThe download may have failed,\nor\nthe directory structure of LM2 has been altered"
# Wrangle images and preprocess
print_main_start("Gathering Images and Image Metadata")
Project = Project_Info(cfg, logger, dir_home, Dirs) # Where file names are modified
# Save config file
save_config_file(cfg, logger, Dirs)
# Detect Archival Components
print_main_start("Locating Archival Components")
Project = detect_archival_components(cfg, logger, dir_home, Project, Dirs, is_real_run, progress_report)
# Save cropped detections
crop_detections_from_images_VV(cfg, logger, dir_home, Project, Dirs)
# Process labels
Voucher_Vision = VoucherVision(cfg, logger, dir_home, path_custom_prompts, Project, Dirs, is_hf)
n_images = len(Voucher_Vision.img_paths)
last_JSON_response, final_WFO_record, final_GEO_record, total_tokens_in, total_tokens_out = Voucher_Vision.process_specimen_batch(progress_report, json_report, is_real_run)
total_cost = save_token_info_as_csv(Dirs, cfg['leafmachine']['LLM_version'], path_api_cost, total_tokens_in, total_tokens_out, n_images, dir_home, logger)
t_overall_s = perf_counter()
logger.name = 'Run Complete! :)'
logger.info(f"[Total elapsed time] {round((t_overall_s - t_overall)/60)} minutes")
space_saver(cfg, Dirs, logger)
if is_real_run:
progress_report.update_overall(f"Run Complete!")
Voucher_Vision.close_logger_handlers()
zip_filepath = None
# Create Higging Face zip file
dir_to_zip = os.path.join(Dirs.dir_home, Dirs.run_name)
zip_filename = Dirs.run_name
# Creating a zip file
zip_filepath = make_zipfile(dir_to_zip, zip_filename) ####################################################################################################### TODO Make this configurable
if is_hf:
upload_to_drive(zip_filepath, zip_filename, is_hf, cfg_private=Voucher_Vision.cfg_private, do_upload=True) ###################################### TODO Make this configurable
else:
upload_to_drive(zip_filepath, zip_filename, is_hf, cfg_private=Voucher_Vision.cfg_private, do_upload=False) ##################################### TODO Make this configurable
return {'last_JSON_response': last_JSON_response,
'final_WFO_record': final_WFO_record,
'final_GEO_record': final_GEO_record,
'total_cost': total_cost,
'n_failed_OCR': Voucher_Vision.n_failed_OCR,
'n_failed_LLM_calls': Voucher_Vision.n_failed_LLM_calls,
'zip_filepath': zip_filepath,
}
def make_zipfile(base_dir, output_filename):
# Determine the directory where the zip file should be saved
# Construct the full path for the zip file
full_output_path = os.path.join(base_dir, output_filename)
# Create the zip archive
shutil.make_archive(full_output_path, 'zip', base_dir)
# Return the full path of the created zip file
return os.path.join(base_dir, output_filename + '.zip')
def voucher_vision_OCR_test(cfg_file_path, dir_home, cfg_test, path_to_crop):
# get_n_overall = progress_report.get_n_overall()
# progress_report.update_overall(f"Working on {test_ind+1} of {get_n_overall}")
# Load config file
report_config(dir_home, cfg_file_path, system='VoucherVision')
if cfg_test is None:
cfg = load_config_file(dir_home, cfg_file_path, system='VoucherVision') # For VoucherVision
else:
cfg = cfg_test
# user_cfg = load_config_file(dir_home, cfg_file_path)
# cfg = Config(user_cfg)
# Check to see if there are subdirs
# Yes --> use the names of the subsirs as run_name
run_name, dirs_list, has_subdirs = check_for_subdirs_VV(cfg)
print(f"run_name {run_name} dirs_list{dirs_list} has_subdirs{has_subdirs}")
# for dir_ind, dir_in in enumerate(dirs_list):
# if has_subdirs:
# cfg['leafmachine']['project']['dir_images_local'] = dir_in
# cfg['leafmachine']['project']['run_name'] = run_name[dir_ind]
# Dir structure
print_main_start("Creating Directory Structure")
Dirs = Dir_Structure(cfg)
# logging.info("Hi")
logger = start_logging(Dirs, cfg)
# Check to see if required ML files are ready to use
ready_to_use = fetch_data(logger, dir_home, cfg_file_path)
assert ready_to_use, "Required ML files are not ready to use!\nThe download may have failed,\nor\nthe directory structure of LM2 has been altered"
# Wrangle images and preprocess
print_main_start("Gathering Images and Image Metadata")
Project = Project_Info(cfg, logger, dir_home, Dirs) # Where file names are modified
# Save config file
save_config_file(cfg, logger, Dirs)
# Detect Archival Components
print_main_start("Locating Archival Components")
Project = detect_archival_components(cfg, logger, dir_home, Project, Dirs)
# Save cropped detections
crop_detections_from_images_VV(cfg, logger, dir_home, Project, Dirs)
# Process labels
Voucher_Vision = VoucherVision(cfg, logger, dir_home, None, Project, Dirs)
last_JSON_response = Voucher_Vision.process_specimen_batch_OCR_test(path_to_crop)
if __name__ == '__main__':
is_test = False
# Set LeafMachine2 dir
dir_home = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
if is_test:
cfg_file_path = os.path.join(dir_home, 'demo','demo.yaml') #'D:\Dropbox\LeafMachine2\LeafMachine2.yaml'
# cfg_file_path = 'test_installation'
cfg_testing = load_config_file_testing(dir_home, cfg_file_path)
cfg_testing['leafmachine']['project']['dir_images_local'] = os.path.join(dir_home, cfg_testing['leafmachine']['project']['dir_images_local'][0], cfg_testing['leafmachine']['project']['dir_images_local'][1])
cfg_testing['leafmachine']['project']['dir_output'] = os.path.join(dir_home, cfg_testing['leafmachine']['project']['dir_output'][0], cfg_testing['leafmachine']['project']['dir_output'][1])
last_JSON_response = voucher_vision(cfg_file_path, dir_home, cfg_testing, None)
else:
cfg_file_path = None
cfg_testing = None
last_JSON_response = voucher_vision(cfg_file_path, dir_home, cfg_testing, None) |