import datetime import io import logging import os import os.path as osp import shutil import stat import tempfile from fsspec import AbstractFileSystem from fsspec.compression import compr from fsspec.core import get_compression from fsspec.utils import isfilelike, stringify_path logger = logging.getLogger("fsspec.local") class LocalFileSystem(AbstractFileSystem): """Interface to files on local storage Parameters ---------- auto_mkdir: bool Whether, when opening a file, the directory containing it should be created (if it doesn't already exist). This is assumed by pyarrow code. """ root_marker = "/" protocol = "file", "local" local_file = True def __init__(self, auto_mkdir=False, **kwargs): super().__init__(**kwargs) self.auto_mkdir = auto_mkdir @property def fsid(self): return "local" def mkdir(self, path, create_parents=True, **kwargs): path = self._strip_protocol(path) if self.exists(path): raise FileExistsError(path) if create_parents: self.makedirs(path, exist_ok=True) else: os.mkdir(path, **kwargs) def makedirs(self, path, exist_ok=False): path = self._strip_protocol(path) os.makedirs(path, exist_ok=exist_ok) def rmdir(self, path): path = self._strip_protocol(path) os.rmdir(path) def ls(self, path, detail=False, **kwargs): path = self._strip_protocol(path) info = self.info(path) if info["type"] == "directory": with os.scandir(path) as it: infos = [self.info(f) for f in it] else: infos = [info] if not detail: return [i["name"] for i in infos] return infos def info(self, path, **kwargs): if isinstance(path, os.DirEntry): # scandir DirEntry out = path.stat(follow_symlinks=False) link = path.is_symlink() if path.is_dir(follow_symlinks=False): t = "directory" elif path.is_file(follow_symlinks=False): t = "file" else: t = "other" path = self._strip_protocol(path.path) else: # str or path-like path = self._strip_protocol(path) out = os.stat(path, follow_symlinks=False) link = stat.S_ISLNK(out.st_mode) if link: out = os.stat(path, follow_symlinks=True) if stat.S_ISDIR(out.st_mode): t = "directory" elif stat.S_ISREG(out.st_mode): t = "file" else: t = "other" result = { "name": path, "size": out.st_size, "type": t, "created": out.st_ctime, "islink": link, } for field in ["mode", "uid", "gid", "mtime", "ino", "nlink"]: result[field] = getattr(out, f"st_{field}") if result["islink"]: result["destination"] = os.readlink(path) try: out2 = os.stat(path, follow_symlinks=True) result["size"] = out2.st_size except OSError: result["size"] = 0 return result def lexists(self, path, **kwargs): return osp.lexists(path) def cp_file(self, path1, path2, **kwargs): path1 = self._strip_protocol(path1) path2 = self._strip_protocol(path2) if self.auto_mkdir: self.makedirs(self._parent(path2), exist_ok=True) if self.isfile(path1): shutil.copyfile(path1, path2) elif self.isdir(path1): self.mkdirs(path2, exist_ok=True) else: raise FileNotFoundError(path1) def isfile(self, path): path = self._strip_protocol(path) return os.path.isfile(path) def isdir(self, path): path = self._strip_protocol(path) return os.path.isdir(path) def get_file(self, path1, path2, callback=None, **kwargs): if isfilelike(path2): with open(path1, "rb") as f: shutil.copyfileobj(f, path2) else: return self.cp_file(path1, path2, **kwargs) def put_file(self, path1, path2, callback=None, **kwargs): return self.cp_file(path1, path2, **kwargs) def mv(self, path1, path2, **kwargs): path1 = self._strip_protocol(path1) path2 = self._strip_protocol(path2) shutil.move(path1, path2) def link(self, src, dst, **kwargs): src = self._strip_protocol(src) dst = self._strip_protocol(dst) os.link(src, dst, **kwargs) def symlink(self, src, dst, **kwargs): src = self._strip_protocol(src) dst = self._strip_protocol(dst) os.symlink(src, dst, **kwargs) def islink(self, path) -> bool: return os.path.islink(self._strip_protocol(path)) def rm_file(self, path): os.remove(self._strip_protocol(path)) def rm(self, path, recursive=False, maxdepth=None): if not isinstance(path, list): path = [path] for p in path: p = self._strip_protocol(p) if self.isdir(p): if not recursive: raise ValueError("Cannot delete directory, set recursive=True") if osp.abspath(p) == os.getcwd(): raise ValueError("Cannot delete current working directory") shutil.rmtree(p) else: os.remove(p) def unstrip_protocol(self, name): name = self._strip_protocol(name) # normalise for local/win/... return f"file://{name}" def _open(self, path, mode="rb", block_size=None, **kwargs): path = self._strip_protocol(path) if self.auto_mkdir and "w" in mode: self.makedirs(self._parent(path), exist_ok=True) return LocalFileOpener(path, mode, fs=self, **kwargs) def touch(self, path, truncate=True, **kwargs): path = self._strip_protocol(path) if self.auto_mkdir: self.makedirs(self._parent(path), exist_ok=True) if self.exists(path): os.utime(path, None) else: open(path, "a").close() if truncate: os.truncate(path, 0) def created(self, path): info = self.info(path=path) return datetime.datetime.fromtimestamp( info["created"], tz=datetime.timezone.utc ) def modified(self, path): info = self.info(path=path) return datetime.datetime.fromtimestamp(info["mtime"], tz=datetime.timezone.utc) @classmethod def _parent(cls, path): path = cls._strip_protocol(path) if os.sep == "/": # posix native return path.rsplit("/", 1)[0] or "/" else: # NT path_ = path.rsplit("/", 1)[0] if len(path_) <= 3: if path_[1:2] == ":": # nt root (something like c:/) return path_[0] + ":/" # More cases may be required here return path_ @classmethod def _strip_protocol(cls, path): path = stringify_path(path) if path.startswith("file://"): path = path[7:] elif path.startswith("file:"): path = path[5:] elif path.startswith("local://"): path = path[8:] elif path.startswith("local:"): path = path[6:] path = make_path_posix(path) if os.sep != "/": # This code-path is a stripped down version of # > drive, path = ntpath.splitdrive(path) if path[1:2] == ":": # Absolute drive-letter path, e.g. X:\Windows # Relative path with drive, e.g. X:Windows drive, path = path[:2], path[2:] elif path[:2] == "//": # UNC drives, e.g. \\server\share or \\?\UNC\server\share # Device drives, e.g. \\.\device or \\?\device if (index1 := path.find("/", 2)) == -1 or ( index2 := path.find("/", index1 + 1) ) == -1: drive, path = path, "" else: drive, path = path[:index2], path[index2:] else: # Relative path, e.g. Windows drive = "" path = path.rstrip("/") or cls.root_marker return drive + path else: return path.rstrip("/") or cls.root_marker def _isfilestore(self): # Inheriting from DaskFileSystem makes this False (S3, etc. were) # the original motivation. But we are a posix-like file system. # See https://github.com/dask/dask/issues/5526 return True def chmod(self, path, mode): path = stringify_path(path) return os.chmod(path, mode) def make_path_posix(path): """Make path generic and absolute for current OS""" if not isinstance(path, str): if isinstance(path, (list, set, tuple)): return type(path)(make_path_posix(p) for p in path) else: path = stringify_path(path) if not isinstance(path, str): raise TypeError(f"could not convert {path!r} to string") if os.sep == "/": # Native posix if path.startswith("/"): # most common fast case for posix return path elif path.startswith("~"): return osp.expanduser(path) elif path.startswith("./"): path = path[2:] elif path == ".": path = "" return f"{os.getcwd()}/{path}" else: # NT handling if path[0:1] == "/" and path[2:3] == ":": # path is like "/c:/local/path" path = path[1:] if path[1:2] == ":": # windows full path like "C:\\local\\path" if len(path) <= 3: # nt root (something like c:/) return path[0] + ":/" path = path.replace("\\", "/") return path elif path[0:1] == "~": return make_path_posix(osp.expanduser(path)) elif path.startswith(("\\\\", "//")): # windows UNC/DFS-style paths return "//" + path[2:].replace("\\", "/") elif path.startswith(("\\", "/")): # windows relative path with root path = path.replace("\\", "/") return f"{osp.splitdrive(os.getcwd())[0]}{path}" else: path = path.replace("\\", "/") if path.startswith("./"): path = path[2:] elif path == ".": path = "" return f"{make_path_posix(os.getcwd())}/{path}" def trailing_sep(path): """Return True if the path ends with a path separator. A forward slash is always considered a path separator, even on Operating Systems that normally use a backslash. """ # TODO: if all incoming paths were posix-compliant then separator would # always be a forward slash, simplifying this function. # See https://github.com/fsspec/filesystem_spec/pull/1250 return path.endswith(os.sep) or (os.altsep is not None and path.endswith(os.altsep)) class LocalFileOpener(io.IOBase): def __init__( self, path, mode, autocommit=True, fs=None, compression=None, **kwargs ): logger.debug("open file: %s", path) self.path = path self.mode = mode self.fs = fs self.f = None self.autocommit = autocommit self.compression = get_compression(path, compression) self.blocksize = io.DEFAULT_BUFFER_SIZE self._open() def _open(self): if self.f is None or self.f.closed: if self.autocommit or "w" not in self.mode: self.f = open(self.path, mode=self.mode) if self.compression: compress = compr[self.compression] self.f = compress(self.f, mode=self.mode) else: # TODO: check if path is writable? i, name = tempfile.mkstemp() os.close(i) # we want normal open and normal buffered file self.temp = name self.f = open(name, mode=self.mode) if "w" not in self.mode: self.size = self.f.seek(0, 2) self.f.seek(0) self.f.size = self.size def _fetch_range(self, start, end): # probably only used by cached FS if "r" not in self.mode: raise ValueError self._open() self.f.seek(start) return self.f.read(end - start) def __setstate__(self, state): self.f = None loc = state.pop("loc", None) self.__dict__.update(state) if "r" in state["mode"]: self.f = None self._open() self.f.seek(loc) def __getstate__(self): d = self.__dict__.copy() d.pop("f") if "r" in self.mode: d["loc"] = self.f.tell() else: if not self.f.closed: raise ValueError("Cannot serialise open write-mode local file") return d def commit(self): if self.autocommit: raise RuntimeError("Can only commit if not already set to autocommit") shutil.move(self.temp, self.path) def discard(self): if self.autocommit: raise RuntimeError("Cannot discard if set to autocommit") os.remove(self.temp) def readable(self) -> bool: return True def writable(self) -> bool: return "r" not in self.mode def read(self, *args, **kwargs): return self.f.read(*args, **kwargs) def write(self, *args, **kwargs): return self.f.write(*args, **kwargs) def tell(self, *args, **kwargs): return self.f.tell(*args, **kwargs) def seek(self, *args, **kwargs): return self.f.seek(*args, **kwargs) def seekable(self, *args, **kwargs): return self.f.seekable(*args, **kwargs) def readline(self, *args, **kwargs): return self.f.readline(*args, **kwargs) def readlines(self, *args, **kwargs): return self.f.readlines(*args, **kwargs) def close(self): return self.f.close() def truncate(self, size=None) -> int: return self.f.truncate(size) @property def closed(self): return self.f.closed def fileno(self): return self.raw.fileno() def flush(self) -> None: self.f.flush() def __iter__(self): return self.f.__iter__() def __getattr__(self, item): return getattr(self.f, item) def __enter__(self): self._incontext = True return self def __exit__(self, exc_type, exc_value, traceback): self._incontext = False self.f.__exit__(exc_type, exc_value, traceback)