PTWZ's picture
Upload folder using huggingface_hub
f5f3483 verified
# Copyright 2024 The etils Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utils to handle resources."""
from __future__ import annotations
import itertools
import pathlib
import posixpath
import sys
import types
import typing
from typing import Union
import zipfile
from etils.epath import abstract_path
from etils.epath import register
from etils.epath.typing import PathLike # pylint: disable=g-importing-member
import importlib_resources
@register.register_path_cls
class ResourcePath(zipfile.Path):
"""Wrapper around `zipfile.Path` compatible with `os.PathLike`.
Note: Calling `os.fspath` on the path will extract the file so should be
discouraged.
"""
def __fspath__(self) -> str:
"""Path string for `os.path.join`, `open`,...
compatibility.
Note: Calling `os.fspath` on the path extract the file, so should be
discouraged. Prefer using `read_bytes`,... This only works for files,
not directories.
Returns:
the extracted path string.
"""
raise NotImplementedError('zipapp not supported. Please send us a PR.')
# zipfile.Path do not define `__eq__` nor `__hash__`. See:
# https://discuss.python.org/t/missing-zipfile-path-eq-and-zipfile-path-hash/16519
def __eq__(self, other) -> bool:
return (
type(self) == type(other) # pylint: disable=unidiomatic-typecheck
and self.root == other.root # pytype: disable=attribute-error
and self.at == other.at # pytype: disable=attribute-error
)
def __hash__(self) -> int:
return hash((self.root, self.at)) # pytype: disable=attribute-error
if sys.version_info < (3, 10):
# Required due to: https://bugs.python.org/issue42043
def _next(self, at) -> 'ResourcePath': # pylint: disable=g-wrong-blank-lines
return type(self)(self.root, at) # pytype: disable=attribute-error
# Before 3.10, joinpath only accept a single arg
def joinpath(self, *other):
"""Overwrite `joinpath` to be consistent with `pathlib.Path`."""
next_ = posixpath.join(self.at, *other) # pytype: disable=attribute-error
return self._next(self.root.resolve_dir(next_)) # pytype: disable=attribute-error
if sys.version_info < (3, 11):
@property
def suffix(self):
return pathlib.Path(self.at).suffix or self.filename.suffix # pytype: disable=attribute-error
def resource_path(package: Union[str, types.ModuleType]) -> abstract_path.Path:
"""Returns read-only root directory path of the module.
Used to access module resource files.
Usage:
```python
path = epath.resource_path('tensorflow_datasets') / 'README.md'
content = path.read_text()
```
This is compatible with everything, including zipapp (`.par`).
Resource files should be in the `data=` of the `py_library(` (when using
bazel).
To write to your project (e.g. automatically update your code), read-only
resource paths can be converted to read-write paths with
`epath.to_write_path(path)`.
Args:
package: Module or module name.
Returns:
The read-only path to the root module directory
"""
try:
path = importlib_resources.files(package) # pytype: disable=module-attr
except AttributeError:
is_adhoc = True
else:
if isinstance(
path, importlib_resources._adapters.CompatibilityFiles.SpecPath # pylint: disable=protected-access
):
is_adhoc = True
else:
is_adhoc = False
if is_adhoc:
# TODO(b/260333695): `importlib_resources` fail with adhoc imports
# When module are imported with adhoc, `importlib_resources.files` returns
# a non-path object, so convert manually.
# Note this is not the true path (`/google_src/` vs
# `/export/.../server/ml_notebook.runfiles`), but should be equivalent.
path = pathlib.Path(sys.modules[package].__file__)
if path.name == '__init__.py':
path = path.parent
# pylint: disable=undefined-variable
if isinstance(path, pathlib.Path):
# TODO(etils): To ensure compatibility with zipfile.Path, we should ensure
# that the returned `pathlib.Path` isn't missused. More specifically:
# * `os.fspath` should only be called on files (not directories)
# * `str(path)` should be forbidden (only `__format__` allowed).
# In practice, it is trickier to do as `__fspath__` and `__str__` are
# called internally.
# Convert to `GPath` for consistency and compatibility with `MockFs`.
return abstract_path.Path(path)
elif isinstance(path, zipfile.Path):
path = ResourcePath(path.root, path.at)
return typing.cast(abstract_path.Path, path)
elif isinstance(path, importlib_resources.abc.Traversable):
# Is seems like `importlib_resources.files` can return additional types,
# like `MultiplexedPath`.
# Fallback to avoid failure, however those objects might not implement
# `__fspath__`, so might fail later.
return typing.cast(abstract_path.Path, path)
else:
raise TypeError(f'Unknown resource path: {type(path)}: {path}')
# pylint: enable=undefined-variable
def to_write_path(path: abstract_path.Path) -> abstract_path.Path:
"""Cast the `epath.resource_path` to a read-write Path."""
return path