VoucherVision / vouchervision /directory_structure_VV.py
phyloforfun's picture
Major update. Support for 15 LLMs, World Flora Online taxonomy validation, geolocation, 2 OCR methods, significant UI changes, stability improvements, consistent JSON parsing
806953a
import os, pathlib #, sys, inspect
from dataclasses import dataclass
# currentdir = os.path.dirname(os.path.dirname(inspect.getfile(inspect.currentframe())))
# parentdir = os.path.dirname(currentdir)
# sys.path.append(parentdir)
# sys.path.append(currentdir)
from vouchervision.general_utils import validate_dir, get_datetime
@dataclass
class Dir_Structure():
# Home
run_name: str = ''
dir_home: str = ''
dir_project: str = ''
# Processing dirs
path_archival_components: str = ''
path_config_file: str = ''
ruler_info: str = ''
ruler_overlay: str = ''
ruler_processed: str = ''
ruler_data: str = ''
ruler_class_overlay: str = ''
ruler_validation_summary: str = ''
ruler_validation: str = ''
save_per_image: str = ''
save_per_annotation_class: str = ''
binarize_labels: str = ''
# logging
path_log: str = ''
def __init__(self, cfg) -> None:
# Home
self.run_name = cfg['leafmachine']['project']['run_name']
self.dir_home = cfg['leafmachine']['project']['dir_output']
self.dir_project = os.path.join(self.dir_home,self.run_name)
validate_dir(self.dir_home)
self.__add_time_to_existing_project_dir()
validate_dir(self.dir_project)
# Processing dirs
self.path_archival_components = os.path.join(self.dir_project,'Archival_Components')
validate_dir(self.path_archival_components)
self.path_config_file = os.path.join(self.dir_project,'Config_File')
validate_dir(self.path_config_file)
self.path_cost = os.path.join(self.dir_project,'Cost')
validate_dir(self.path_cost)
# Logging
self.path_log = os.path.join(self.dir_project,'Logs')
validate_dir(self.path_log)
# self.custom_overlay_pdfs = os.path.join(self.dir_project,'Summary','Custom_Overlay_PDFs')
# self.custom_overlay_images = os.path.join(self.dir_project,'Summary','Custom_Overlay_Images')
###
# self.custom_overlay_pdfs = os.path.join(self.dir_project,'Summary','Custom_Overlay_PDFs')
# if cfg['leafmachine']['overlay']['save_overlay_to_pdf']:
# validate_dir(self.custom_overlay_pdfs)
# self.custom_overlay_images = os.path.join(self.dir_project,'Summary','Custom_Overlay_Images')
# if cfg['leafmachine']['overlay']['save_overlay_to_jpgs']:
# validate_dir(self.custom_overlay_images)
### Rulers
# self.ruler_info = os.path.join(self.dir_project,'Archival_Components','Ruler_Info')
# self.ruler_validation_summary = os.path.join(self.dir_project,'Archival_Components','Ruler_Info', 'Ruler_Validation_Summary')
# self.ruler_validation = os.path.join(self.dir_project,'Archival_Components','Ruler_Info', 'Ruler_Validation')
# self.ruler_processed = os.path.join(self.dir_project,'Archival_Components','Ruler_Info', 'Ruler_Processed')
# validate_dir(self.ruler_info)
validate_dir(os.path.join(self.path_archival_components, 'JSON'))
validate_dir(os.path.join(self.path_archival_components, 'labels'))
### Data
self.transcription = os.path.join(self.dir_project,'Transcription')
validate_dir(self.transcription)
self.transcription_ind = os.path.join(self.dir_project,'Transcription','Individual')
validate_dir(self.transcription_ind)
# self.transcription_ind_helper = os.path.join(self.dir_project,'Transcription','Individual_Helper_Content')
# validate_dir(self.transcription_ind_helper)
self.transcription_ind_OCR = os.path.join(self.dir_project,'Transcription','Individual_OCR')
validate_dir(self.transcription_ind_OCR)
self.transcription_ind_OCR_bounds = os.path.join(self.dir_project,'Transcription','Individual_OCR_Bounds')
validate_dir(self.transcription_ind_OCR_bounds)
self.transcription_ind_OCR_helper = os.path.join(self.dir_project,'Transcription','Individual_OCR_Helper')
validate_dir(self.transcription_ind_OCR_helper)
self.transcription_ind_wiki = os.path.join(self.dir_project,'Transcription','Individual_Wikipedia')
validate_dir(self.transcription_ind_wiki)
self.transcription_ind_prompt = os.path.join(self.dir_project,'Transcription','Individual_Prompt')
validate_dir(self.transcription_ind_prompt)
self.transcription_prompt = os.path.join(self.dir_project,'Transcription','Prompt_Template')
validate_dir(self.transcription_prompt)
self.save_original = os.path.join(self.dir_project,'Original_Images')
validate_dir(self.save_original)
self.save_per_image = os.path.join(self.dir_project,'Cropped_Images', 'By_Image')
self.save_per_annotation_class = os.path.join(self.dir_project,'Cropped_Images', 'By_Class')
self.save_per_annotation_class = os.path.join(self.dir_project,'Cropped_Images', 'By_Class')
if cfg['leafmachine']['cropped_components']['save_per_image']:
validate_dir(self.save_per_image)
if cfg['leafmachine']['cropped_components']['save_per_annotation_class']:
validate_dir(self.save_per_annotation_class)
if cfg['leafmachine']['cropped_components']['binarize_labels']:
validate_dir(self.save_per_annotation_class)
# self.binarize_labels = os.path.join(self.dir_project,'Cropped_Images', 'By_Class','label_binary')
# validate_dir(self.binarize_labels)
def __add_time_to_existing_project_dir(self) -> None:
path = pathlib.Path(self.dir_project)
if path.exists():
now = get_datetime()
path = path.with_name(path.name + "_" + now)
self.run_name = path.name
path.mkdir()
self.dir_project = path
else:
path.mkdir()
self.dir_project = path