Spaces:
Running
Running
File size: 3,439 Bytes
bdf9096 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import glob
import json
import os
import pandas as pd
import re
import shutil
from collections import defaultdict
from pathlib import Path, PurePath
def select_instance(source_dir, log_path, destination=os.path.join("output","generated","instance_selection")):
os.makedirs(destination, exist_ok=True)
try:
source_path=glob.glob(os.path.join(source_dir, log_path))[0]
destination_path = os.path.join(destination, "_".join(source_path.rsplit("/")[-2:]))
shutil.copyfile(source_path, destination_path)
except IndexError:
print(f"ERROR: No files found for {source_dir}{log_path}. Continuing.")
return destination, len(os.listdir(destination))
def read_csvs(input_path, ref_feature):
f_dict = defaultdict(pd.DataFrame)
ref_short_name = get_keys_abbreviation([ref_feature])
for file in glob.glob(f'{input_path}*.csv'):
if ref_short_name in file[:-4].split(os.sep)[-1].split("_"):
c_file = pd.read_csv(file, delimiter=",")
if c_file.columns[0] == 'task':
c_file = c_file.reindex(columns=[c_file.columns[1], c_file.columns[2], c_file.columns[0]])
c_file.rename(columns={"task":"log"}, inplace=True)
f_dict[c_file.columns[0] if c_file.columns[0]!=ref_feature else c_file.columns[1]] = c_file
return f_dict
def sort_files(data):
"""
Returns a alphanumeric sortered list
"""
convert = lambda text: int(text) if text.isdigit() else text.lower()
alphanum_key = lambda key: [convert(c) for c in re.split("([0-9]+)", key)]
return sorted(data, key=alphanum_key)
def get_keys_abbreviation(obj_keys):
abbreviated_keys = []
for obj_key in obj_keys:
key_slices = obj_key.split("_")
chars = []
for key_slice in key_slices:
for idx, single_char in enumerate(key_slice):
if idx == 0 or single_char.isdigit():
chars.append(single_char)
abbreviated_key = ''.join(chars)
abbreviated_keys.append(abbreviated_key)
return '_'.join(abbreviated_keys)
def get_output_key_value_location(obj, output_path, identifier):
obj_sorted = dict(sorted(obj.items()))
obj_keys = [*obj_sorted.keys()]
folder_path = os.path.join(output_path, f"{len(obj_keys)}_{get_keys_abbreviation(obj_keys)}")
obj_values = [round(x, 4) for x in [*obj_sorted.values()]]
obj_values_joined = '_'.join(map(str, obj_values)).replace('.', '')
generated_file_name = f"{identifier}_{obj_values_joined}"
os.makedirs(folder_path, exist_ok=True)
save_path = os.path.join(folder_path, generated_file_name)
return save_path
def dump_features_json(features: dict, output_path, identifier, objectives=None, content_type="features"):
output_parts = PurePath(output_path).parts
feature_dir = os.path.join(output_parts[0], content_type,
*output_parts[1:])
if objectives is not None:
json_path = get_output_key_value_location(objectives,
feature_dir, identifier)+".json"
else:
json_path = os.path.join(feature_dir, identifier)+".json"
os.makedirs(os.path.split(json_path)[0], exist_ok=True)
with open(json_path, 'w') as fp:
json.dump(features, fp, default=int)
print(f"SUCCESS: Saved {len(features)-1} {content_type} in {json_path}")#-1 because 'log' is not a feature
|