Doa-doa's picture
Upload folder using huggingface_hub
72268ee
import logging
import os
import re
import tarfile
from errno import ELOOP
from tempfile import NamedTemporaryFile
from . import streaming, utils
from .exceptions import CaseInsensitiveFileSystemError, InvalidArchiveError
from .interface import AbstractBaseFormat
LOG = logging.getLogger(__file__)
def _sort_file_order(prefix, files):
"""Sort by filesize, to optimize compression?"""
info_slash = "info" + os.path.sep
def order(f):
# we don't care about empty files so send them back via 100000
fsize = os.lstat(os.path.join(prefix, f)).st_size or 100000
# info/* records will be False == 0, others will be 1.
info_order = int(not f.startswith(info_slash))
if info_order:
_, ext = os.path.splitext(f)
# Strip any .dylib.* and .so.* and rename .dylib to .so
ext = re.sub(r"(\.dylib|\.so).*$", r".so", ext)
if not ext:
# Files without extensions should be sorted by dirname
info_order = 1 + hash(os.path.dirname(f)) % (10**8)
else:
info_order = 1 + abs(hash(ext)) % (10**8)
return info_order, fsize
files_list = list(sorted(files, key=order))
return files_list
def _create_no_libarchive(fullpath, files):
with tarfile.open(fullpath, "w:bz2") as t:
for f in files:
t.add(f, filter=utils.anonymize_tarinfo)
def create_compressed_tarball(
prefix, files, tmpdir, basename, ext, compression_filter, filter_opts=""
):
tmp_path = os.path.join(tmpdir, basename)
files = _sort_file_order(prefix, files)
# add files in order of a) in info directory, b) increasing size so
# we can access small manifest or json files without decompressing
# possible large binary or data files
fullpath = tmp_path + ext
with utils.tmp_chdir(prefix):
_create_no_libarchive(fullpath, files)
return fullpath
class CondaTarBZ2(AbstractBaseFormat):
@staticmethod
def supported(fn):
return fn.endswith(".tar.bz2")
@staticmethod
def extract(fn, dest_dir, **kw):
if not os.path.isdir(dest_dir):
os.makedirs(dest_dir)
if not os.path.isabs(fn):
fn = os.path.normpath(os.path.join(os.getcwd(), fn))
streaming._extract(str(fn), str(dest_dir), components=["pkg"])
@staticmethod
def create(prefix, file_list, out_fn, out_folder=os.getcwd(), **kw):
if os.path.isabs(out_fn):
out_folder = os.path.dirname(out_fn)
out_file = create_compressed_tarball(
prefix,
file_list,
out_folder,
os.path.basename(out_fn).replace(".tar.bz2", ""),
".tar.bz2",
"bzip2",
)
return out_file
@staticmethod
def get_pkg_details(in_file):
stat_result = os.stat(in_file)
size = stat_result.st_size
md5, sha256 = utils.checksums(in_file, ("md5", "sha256"))
return {"size": size, "md5": md5, "sha256": sha256}