Spaces:
Building
Building
# Copyright 2024 The etils Authors. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
"""`os.path` API backend.""" | |
from __future__ import annotations | |
import abc | |
import contextlib | |
import functools | |
import glob as glob_lib | |
import os | |
import pathlib | |
import shutil | |
import stat as stat_lib | |
import typing | |
from typing import Callable, Iterator, NoReturn, Optional, Union | |
from etils.epath import stat_utils | |
from etils.epath.typing import PathLike # pylint: disable=g-importing-member | |
if typing.TYPE_CHECKING: | |
import fsspec | |
class Backend(abc.ABC): | |
"""Abstract backend class.""" | |
def open( | |
self, | |
path: PathLike, | |
mode: str, | |
) -> typing.IO[Union[str, bytes]]: | |
"""`open`. Encoding should be utf-8.""" | |
raise NotImplementedError | |
def exists(self, path: PathLike) -> bool: | |
raise NotImplementedError | |
def isdir(self, path: PathLike) -> bool: | |
raise NotImplementedError | |
def listdir(self, path: PathLike) -> list[str]: | |
raise NotImplementedError | |
def glob(self, path: PathLike) -> list[str]: | |
raise NotImplementedError | |
def walk( | |
self, | |
top: PathLike, | |
*, | |
top_down: bool = True, | |
on_error: Callable[[OSError], object] | None = None, | |
) -> Iterator[tuple[PathLike, list[str], list[str]]]: | |
raise NotImplementedError | |
def makedirs( | |
self, | |
path: PathLike, | |
*, | |
exist_ok: bool = False, | |
mode: Optional[int] = None, | |
) -> None: | |
raise NotImplementedError | |
def mkdir( | |
self, | |
path: PathLike, | |
*, | |
exist_ok: bool = False, | |
mode: Optional[int] = None, | |
) -> None: | |
raise NotImplementedError | |
def rmtree(self, path: PathLike) -> None: | |
raise NotImplementedError | |
def remove(self, path: PathLike) -> None: | |
raise NotImplementedError | |
def rename(self, path: PathLike, dst: PathLike) -> None: | |
raise NotImplementedError | |
def replace(self, path: PathLike, dst: PathLike) -> None: | |
raise NotImplementedError | |
def copy(self, path: PathLike, dst: PathLike, overwrite: bool) -> None: | |
raise NotImplementedError | |
def stat(self, path: PathLike) -> stat_utils.StatResult: | |
raise NotImplementedError | |
class _OsPathBackend(Backend): | |
"""`os.path` backend.""" | |
def open( | |
self, | |
path: PathLike, | |
mode: str, | |
) -> typing.IO[Union[str, bytes]]: | |
if 'b' in mode: | |
encoding = None | |
else: | |
encoding = 'utf-8' | |
return open(path, mode, encoding=encoding) | |
def exists(self, path: PathLike) -> bool: | |
return os.path.exists(path) | |
def isdir(self, path: PathLike) -> bool: | |
return os.path.isdir(path) | |
def listdir(self, path: PathLike) -> list[str]: | |
# GFile filter backup files per default. | |
return [p for p in os.listdir(path) if not p.endswith('~')] | |
def glob(self, path: PathLike) -> list[str]: | |
return glob_lib.glob(path) | |
def walk( | |
self, | |
top: PathLike, | |
*, | |
top_down: bool = True, | |
on_error: Callable[[OSError], object] | None = None, | |
) -> Iterator[tuple[PathLike, list[str], list[str]]]: | |
if hasattr(pathlib.Path, 'walk'): # Python 3.12 | |
yield from pathlib.Path(top).walk(top_down=top_down, on_error=on_error) | |
else: # Backward compatibility | |
# Note that `os.walk` is inconsistent for `symlinks` (always marked as | |
# filenames), but should be fine. | |
yield from os.walk(top, topdown=top_down, onerror=on_error) | |
def makedirs( | |
self, | |
path: PathLike, | |
*, | |
exist_ok: bool = False, | |
mode: Optional[int] = None, | |
) -> None: | |
mode = 0o777 if mode is None else mode | |
os.makedirs(path, exist_ok=exist_ok, mode=mode) | |
def mkdir( | |
self, | |
path: PathLike, | |
*, | |
exist_ok: bool = False, | |
mode: Optional[int] = None, | |
) -> None: | |
mode = 0o777 if mode is None else mode | |
try: | |
os.mkdir(path, mode=mode) | |
except FileExistsError: | |
if self.isdir(path): # No-op if directory already exists | |
if exist_ok: | |
pass | |
else: # Overwriting file raise an error | |
raise | |
else: | |
raise | |
def rmtree(self, path: PathLike) -> None: | |
try: | |
shutil.rmtree(path) | |
except NotADirectoryError: | |
self.remove(path) | |
def remove(self, path: PathLike) -> None: | |
try: | |
os.remove(path) | |
except IsADirectoryError: | |
os.rmdir(path) | |
except PermissionError: | |
# On Mac, `PermissionError` is raised instead of `IsADirectoryError` | |
if self.isdir(path): | |
os.rmdir(path) | |
else: | |
raise | |
def rename(self, path: PathLike, dst: PathLike) -> None: | |
if self.exists(dst): | |
raise FileExistsError( | |
f'Cannot rename {path}. Destination {dst} already exists.' | |
) | |
os.rename(path, dst) | |
def replace(self, path: PathLike, dst: PathLike) -> None: | |
if self.isdir(dst): | |
raise IsADirectoryError(f'Cannot overwrite: {dst} is a directory') | |
os.replace(path, dst) | |
def copy(self, path: PathLike, dst: PathLike, overwrite: bool) -> None: | |
if not overwrite and self.exists(dst): | |
raise FileExistsError(f'{dst} already exists. Cannot copy {path}.') | |
shutil.copyfile(path, dst) | |
def stat(self, path: PathLike) -> stat_utils.StatResult: | |
st = os.stat(path) | |
if os.name == 'nt': | |
owner = None | |
group = None | |
else: | |
import grp # pylint: disable=g-import-not-at-top | |
import pwd # pylint: disable=g-import-not-at-top | |
owner = pwd.getpwuid(st.st_uid).pw_name | |
group = grp.getgrgid(st.st_gid).gr_name | |
return stat_utils.StatResult( | |
is_directory=stat_lib.S_ISDIR(st.st_mode), | |
length=st.st_size, | |
mtime=int(st.st_mtime), | |
owner=owner, | |
group=group, | |
mode=st.st_mode, | |
) | |
class _TfBackend(Backend): | |
"""TensorFlow backend.""" | |
def tf(self): | |
try: | |
import tensorflow # pylint: disable=g-import-not-at-top # pytype: disable=import-error | |
except ImportError as e: | |
raise ImportError( | |
f'{e}. To use epath.Path with gs://, TensorFlow should be installed.' | |
) from None | |
return tensorflow | |
def gfile(self): | |
return self.tf.io.gfile | |
def open( | |
self, | |
path: PathLike, | |
mode: str, | |
) -> Iterator[typing.IO[Union[str, bytes]]]: | |
with self.gfile.GFile(path, mode) as f: # pytype: disable=bad-return-type | |
try: | |
yield f | |
except self.tf.errors.NotFoundError as e: | |
raise FileNotFoundError(e) from None | |
def exists(self, path: PathLike) -> bool: | |
return self.gfile.exists(path) | |
def isdir(self, path: PathLike) -> bool: | |
return self.gfile.isdir(path) | |
def listdir(self, path: PathLike) -> list[str]: | |
return self.gfile.listdir(path) | |
def glob(self, path: PathLike) -> list[str]: | |
return self.gfile.glob(path) | |
def walk( | |
self, | |
top: PathLike, | |
*, | |
top_down: bool = True, | |
on_error: Callable[[OSError], object] | None = None, | |
) -> Iterator[tuple[PathLike, list[str], list[str]]]: | |
yield from self.gfile.walk(top, topdown=top_down, onerror=on_error) | |
def makedirs( | |
self, | |
path: PathLike, | |
*, | |
exist_ok: bool = False, | |
mode: Optional[int] = None, | |
) -> None: | |
mode = 0o777 if mode is None else mode | |
if mode != 0o777: | |
# tf.io.gfile do not support setting `mode=` | |
raise NotImplementedError( | |
'makedirs with custom `mode=` not supported for tf.io.gfile backend.' | |
' Please open an issue.' | |
) | |
# TF do not have a `exist_ok=` kwargs, so have to first check existence. | |
# This has performance impact but can be disabled with `exist_ok=True`. | |
if not exist_ok and self.exists(path): | |
raise FileExistsError(f'{path} already exists.') | |
try: | |
self.gfile.makedirs(path) | |
except self.tf.errors.FailedPreconditionError as e: | |
if 'not a directory' in str(e): | |
raise FileExistsError(str(e)) from None | |
else: | |
raise OSError(str(e)) from None | |
def mkdir( | |
self, | |
path: PathLike, | |
*, | |
exist_ok: bool = False, | |
mode: Optional[int] = None, | |
) -> None: | |
mode = 0o777 if mode is None else mode | |
if mode != 0o777: | |
# tf.io.gfile do not support setting `mode=` | |
raise NotImplementedError( | |
'mkdir with custom `mode=` not supported for tf.io.gfile backend.' | |
' Please open an issue.' | |
) | |
if not exist_ok and self.exists(path): | |
raise FileExistsError(f'{path} already exists.') | |
try: | |
self.gfile.mkdir(path) | |
except self.tf.errors.NotFoundError as e: | |
raise FileNotFoundError(str(e)) from None | |
else: | |
if not self.isdir(path): # TF do not raises error for files | |
raise FileExistsError(f'Cannot create dir. {path} is not a directory') | |
def rmtree(self, path: PathLike) -> None: | |
try: | |
self.gfile.rmtree(path) | |
except self.tf.errors.NotFoundError as e: | |
raise FileNotFoundError(str(e)) from None | |
def remove(self, path: PathLike) -> None: | |
try: | |
self.gfile.remove(path) | |
except self.tf.errors.FailedPreconditionError as e: # Dir not empty | |
raise OSError(str(e)) from None | |
except self.tf.errors.NotFoundError as e: | |
raise FileNotFoundError(str(e)) from None | |
def rename(self, path: PathLike, dst: PathLike) -> None: | |
try: | |
self.gfile.rename(path, dst) | |
except self.tf.errors.OpError as e: | |
self._reraise_error(e) | |
def replace(self, path: PathLike, dst: PathLike) -> None: | |
try: | |
self.gfile.rename(path, dst, overwrite=True) | |
except self.tf.errors.OpError as e: | |
self._reraise_error(e) | |
def copy(self, path: PathLike, dst: PathLike, overwrite: bool) -> None: | |
if overwrite and self.isdir(dst): # For consistency with rename, replace | |
raise IsADirectoryError( | |
f'Cannot copy {path}. Destination {dst} is a directory' | |
) from None | |
try: | |
self.gfile.copy(path, dst, overwrite=overwrite) | |
except self.tf.errors.OpError as e: | |
self._reraise_error(e) | |
def _reraise_error(self, e) -> NoReturn: | |
"""Reraise the TF error.""" | |
e_msg = str(e) | |
if isinstance(e, self.tf.errors.FailedPreconditionError): | |
if 'not a directory' in e_msg.lower(): | |
raise NotADirectoryError(e_msg) from None | |
if 'is a directory' in e_msg.lower(): | |
raise IsADirectoryError(e_msg) from None | |
else: | |
raise OSError(e_msg) from None | |
if isinstance(e, self.tf.errors.AlreadyExistsError): | |
e_msg = str(e) | |
if 'is a directory' in e_msg.lower(): | |
raise IsADirectoryError(e_msg) from None | |
else: | |
raise FileExistsError(e_msg) from None | |
if isinstance(e, self.tf.errors.NotFoundError): | |
raise FileNotFoundError(e_msg) from None | |
else: | |
raise # pylint: disable=misplaced-bare-raise | |
def stat(self, path: PathLike) -> stat_utils.StatResult: | |
st = self.gfile.stat(path) | |
return stat_utils.StatResult( | |
is_directory=st.is_directory, | |
length=st.length, | |
mtime=st.mtime_nsec // 1_000_000_000, | |
owner=None, # Not available. | |
group=None, # Not available. | |
mode=None, | |
) | |
class _FileSystemSpecBackend(Backend): | |
"""FileSystemSpec backend entirely relying on fsspec.""" | |
def _get_filesystem(self, name: str) -> fsspec.AbstractFileSystem: | |
"""Caches the filesystem.""" | |
try: | |
import fsspec # pylint: disable=g-import-not-at-top | |
except ImportError as e: | |
raise ImportError( | |
"To use epath.Path with gs://, fsspec should be installed.'" | |
) from e | |
return fsspec.filesystem(name) | |
def fs(self, path: PathLike) -> fsspec.AbstractFileSystem: | |
"""Returns the proper fsspec filsystem: GCS, S3 or file.""" | |
path = os.fspath(path) | |
if path.startswith('gs://'): | |
return self._get_filesystem('gcs') | |
elif path.startswith('s3://'): | |
return self._get_filesystem('s3') | |
elif path.startswith('az://'): | |
return self._get_filesystem('az') | |
else: | |
return self._get_filesystem('file') | |
def open(self, path: PathLike, mode: str) -> typing.IO[Union[str, bytes]]: | |
return self.fs(path).open(path, mode=mode) | |
def exists(self, path: PathLike) -> bool: | |
return self.fs(path).exists(path) | |
def isdir(self, path: PathLike) -> bool: | |
return self.fs(path).isdir(path) | |
def listdir(self, path: PathLike) -> list[str]: | |
paths = self.fs(path).listdir(path, detail=False) | |
return [os.path.basename(p) for p in paths if not p.endswith('~')] | |
def glob(self, path: PathLike) -> list[str]: | |
protocol = _get_protocol(path) | |
return [protocol + p for p in self.fs(path).glob(path)] | |
def walk( | |
self, | |
top: PathLike, | |
*, | |
top_down: bool = True, | |
on_error: Callable[[OSError], object] | None = None, | |
) -> Iterator[tuple[PathLike, list[str], list[str]]]: | |
if on_error is None: | |
on_error = 'omit' # default behavior for pathlib.Path.walk | |
yield from self.fs(top).walk( # pytype: disable=bad-return-type | |
top, | |
topdown=top_down, | |
on_error=on_error, | |
max_depth=None, | |
) | |
def makedirs( | |
self, | |
path: PathLike, | |
*, | |
exist_ok: bool = False, | |
mode: Optional[int] = None, | |
) -> None: | |
mode = 0o777 if mode is None else mode | |
if mode != 0o777: | |
# FileSystemSpec backend do not support setting `mode=` | |
raise NotImplementedError( | |
'makedirs with custom `mode=` not supported for FileSystemSpec' | |
' backend. Please open an issue.' | |
) | |
return self.fs(path).makedirs(path, exist_ok=exist_ok) | |
def mkdir( | |
self, | |
path: PathLike, | |
*, | |
exist_ok: bool = False, | |
mode: Optional[int] = None, | |
) -> None: | |
mode = 0o777 if mode is None else mode | |
if mode != 0o777: | |
# FileSystemSpec backend do not support setting `mode=` | |
raise NotImplementedError( | |
'mkdir with custom `mode=` not supported for FileSystemSpec backend.' | |
' Please open an issue.' | |
) | |
try: | |
return self.fs(path).mkdir(path, create_parents=False) | |
except FileExistsError: | |
if exist_ok and self.isdir(path): | |
return | |
raise FileExistsError( | |
f'The operation failed because the specified {path=} already exists.' | |
) from None | |
def rmtree(self, path: PathLike) -> None: | |
return self.fs(path).rm(path, recursive=True) | |
def remove(self, path: PathLike) -> None: | |
try: | |
return self.fs(path).rm(path, recursive=False) | |
except (IsADirectoryError, ValueError): | |
return self.fs(path).rmdir(path) | |
def rename(self, path: PathLike, dst: PathLike) -> None: | |
if self.exists(dst): | |
raise FileExistsError(f'{dst} already exists. Cannot rename {path}.') | |
if self.isdir(path) and not self.isdir(dst): | |
if self.exists(dst): | |
raise FileExistsError( | |
f'Cannot rename a file ({path}) to a directory ({dst})' | |
) | |
# Check that `dst` is in an existing folder | |
dst_folder = os.path.dirname(dst) | |
if not self.exists(dst_folder): | |
raise FileNotFoundError(f'folder {dst_folder} does not exist') | |
# Stringify paths, because PosixPaths do not implement len: | |
path, dst = os.fspath(path), os.fspath(dst) | |
return self.fs(path).rename(path, dst, recursive=True) | |
def replace(self, path: PathLike, dst: PathLike) -> None: | |
if self.isdir(dst): | |
raise IsADirectoryError(f'Cannot overwrite: {dst} is a directory') | |
if not self.exists(path): | |
raise FileNotFoundError(f'Cannot replace: path {path} does not exist') | |
if self.isdir(path) and self.exists(dst): | |
raise NotADirectoryError( | |
f'Cannot replace a directory {path} by a file {dst}' | |
) | |
# Check that `dst` is in an existing folder | |
dst_folder = os.path.dirname(dst) | |
if not self.exists(dst_folder): | |
raise FileNotFoundError(f'folder {dst_folder} does not exist') | |
# Stringify paths, because PosixPaths do not implement len: | |
path, dst = os.fspath(path), os.fspath(dst) | |
return self.fs(path).rename(path, dst, recursive=True) | |
def copy(self, path: PathLike, dst: PathLike, overwrite: bool) -> None: | |
if not overwrite and self.exists(dst): | |
raise FileExistsError(f'{dst} already exists. Cannot copy {path}.') | |
is_dir_dst = self.isdir(dst) | |
if self.isdir(path) and not is_dir_dst: | |
raise IsADirectoryError( | |
f'Cannot copy to {dst}. Path {path} is a directory' | |
) | |
if overwrite and is_dir_dst: | |
raise IsADirectoryError( | |
f'Cannot overwrite {path}. Destination {dst} is a directory' | |
) | |
# Stringify paths, because PosixPaths do not implement len: | |
path, dst = os.fspath(path), os.fspath(dst) | |
chunksize = 1024 * 1024 # 1 MB | |
with self.open(path, mode='rb') as src: | |
with self.open(dst, mode='wb') as dst: | |
while True: | |
data = src.read(chunksize) | |
if not data: | |
break | |
dst.write(data) | |
def stat(self, path: PathLike) -> stat_utils.StatResult: | |
info = self.fs(path).info(path) | |
mtime = int(info.get('mtime', 0.0)) | |
return stat_utils.StatResult( | |
is_directory=info.get('type') == 'directory', | |
length=info.get('size'), | |
mtime=mtime, | |
owner=info.get('owner'), | |
group=info.get('group'), | |
mode=info.get('mode'), | |
) | |
def _get_protocol(path: PathLike) -> str: | |
"""Extract the protocol.""" | |
path = os.fspath(path) | |
if '://' in path: | |
return path.split('://', 1)[0] + '://' | |
else: | |
return '' | |
tf_backend = _TfBackend() | |
os_backend = _OsPathBackend() | |
fsspec_backend = _FileSystemSpecBackend() | |