欧卫
'add_app_files'
58627fa
import re
import os
import ujson
def get_parts(directory):
extension = '.pt'
parts = sorted([int(filename[: -1 * len(extension)]) for filename in os.listdir(directory)
if filename.endswith(extension)])
assert list(range(len(parts))) == parts, parts
# Integer-sortedness matters.
parts_paths = [os.path.join(directory, '{}{}'.format(filename, extension)) for filename in parts]
samples_paths = [os.path.join(directory, '{}.sample'.format(filename)) for filename in parts]
return parts, parts_paths, samples_paths
def load_doclens(directory, flatten=True):
doclens_filenames = {}
for filename in os.listdir(directory):
match = re.match("doclens.(\d+).json", filename)
if match is not None:
doclens_filenames[int(match.group(1))] = filename
doclens_filenames = [os.path.join(directory, doclens_filenames[i]) for i in sorted(doclens_filenames.keys())]
all_doclens = [ujson.load(open(filename)) for filename in doclens_filenames]
if flatten:
all_doclens = [x for sub_doclens in all_doclens for x in sub_doclens]
if len(all_doclens) == 0:
raise ValueError("Could not load doclens")
return all_doclens
def get_deltas(directory):
extension = '.residuals.pt'
parts = sorted([int(filename[: -1 * len(extension)]) for filename in os.listdir(directory)
if filename.endswith(extension)])
assert list(range(len(parts))) == parts, parts
# Integer-sortedness matters.
parts_paths = [os.path.join(directory, '{}{}'.format(filename, extension)) for filename in parts]
return parts, parts_paths
# def load_compression_data(level, path):
# with open(path, "r") as f:
# for line in f:
# line = line.split(',')
# bits = int(line[0])
# if bits == level:
# return [float(v) for v in line[1:]]
# raise ValueError(f"No data found for {level}-bit compression")