Spaces:
Running
Running
import os | |
import sys | |
import uuid | |
import warnings | |
from ftplib import FTP, Error, error_perm | |
from typing import Any | |
from ..spec import AbstractBufferedFile, AbstractFileSystem | |
from ..utils import infer_storage_options, isfilelike | |
class FTPFileSystem(AbstractFileSystem): | |
"""A filesystem over classic FTP""" | |
root_marker = "/" | |
cachable = False | |
protocol = "ftp" | |
def __init__( | |
self, | |
host, | |
port=21, | |
username=None, | |
password=None, | |
acct=None, | |
block_size=None, | |
tempdir=None, | |
timeout=30, | |
encoding="utf-8", | |
**kwargs, | |
): | |
""" | |
You can use _get_kwargs_from_urls to get some kwargs from | |
a reasonable FTP url. | |
Authentication will be anonymous if username/password are not | |
given. | |
Parameters | |
---------- | |
host: str | |
The remote server name/ip to connect to | |
port: int | |
Port to connect with | |
username: str or None | |
If authenticating, the user's identifier | |
password: str of None | |
User's password on the server, if using | |
acct: str or None | |
Some servers also need an "account" string for auth | |
block_size: int or None | |
If given, the read-ahead or write buffer size. | |
tempdir: str | |
Directory on remote to put temporary files when in a transaction | |
timeout: int | |
Timeout of the ftp connection in seconds | |
encoding: str | |
Encoding to use for directories and filenames in FTP connection | |
""" | |
super().__init__(**kwargs) | |
self.host = host | |
self.port = port | |
self.tempdir = tempdir or "/tmp" | |
self.cred = username, password, acct | |
self.timeout = timeout | |
self.encoding = encoding | |
if block_size is not None: | |
self.blocksize = block_size | |
else: | |
self.blocksize = 2**16 | |
self._connect() | |
def _connect(self): | |
if sys.version_info >= (3, 9): | |
self.ftp = FTP(timeout=self.timeout, encoding=self.encoding) | |
elif self.encoding: | |
warnings.warn("`encoding` not supported for python<3.9, ignoring") | |
self.ftp = FTP(timeout=self.timeout) | |
else: | |
self.ftp = FTP(timeout=self.timeout) | |
self.ftp.connect(self.host, self.port) | |
self.ftp.login(*self.cred) | |
def _strip_protocol(cls, path): | |
return "/" + infer_storage_options(path)["path"].lstrip("/").rstrip("/") | |
def _get_kwargs_from_urls(urlpath): | |
out = infer_storage_options(urlpath) | |
out.pop("path", None) | |
out.pop("protocol", None) | |
return out | |
def ls(self, path, detail=True, **kwargs): | |
path = self._strip_protocol(path) | |
out = [] | |
if path not in self.dircache: | |
try: | |
try: | |
out = [ | |
(fn, details) | |
for (fn, details) in self.ftp.mlsd(path) | |
if fn not in [".", ".."] | |
and details["type"] not in ["pdir", "cdir"] | |
] | |
except error_perm: | |
out = _mlsd2(self.ftp, path) # Not platform independent | |
for fn, details in out: | |
if path == "/": | |
path = "" # just for forming the names, below | |
details["name"] = "/".join([path, fn.lstrip("/")]) | |
if details["type"] == "file": | |
details["size"] = int(details["size"]) | |
else: | |
details["size"] = 0 | |
if details["type"] == "dir": | |
details["type"] = "directory" | |
self.dircache[path] = out | |
except Error: | |
try: | |
info = self.info(path) | |
if info["type"] == "file": | |
out = [(path, info)] | |
except (Error, IndexError): | |
raise FileNotFoundError(path) | |
files = self.dircache.get(path, out) | |
if not detail: | |
return sorted([fn for fn, details in files]) | |
return [details for fn, details in files] | |
def info(self, path, **kwargs): | |
# implement with direct method | |
path = self._strip_protocol(path) | |
if path == "/": | |
# special case, since this dir has no real entry | |
return {"name": "/", "size": 0, "type": "directory"} | |
files = self.ls(self._parent(path).lstrip("/"), True) | |
try: | |
out = [f for f in files if f["name"] == path][0] | |
except IndexError: | |
raise FileNotFoundError(path) | |
return out | |
def get_file(self, rpath, lpath, **kwargs): | |
if self.isdir(rpath): | |
if not os.path.exists(lpath): | |
os.mkdir(lpath) | |
return | |
if isfilelike(lpath): | |
outfile = lpath | |
else: | |
outfile = open(lpath, "wb") | |
def cb(x): | |
outfile.write(x) | |
self.ftp.retrbinary( | |
f"RETR {rpath}", | |
blocksize=self.blocksize, | |
callback=cb, | |
) | |
if not isfilelike(lpath): | |
outfile.close() | |
def cat_file(self, path, start=None, end=None, **kwargs): | |
if end is not None: | |
return super().cat_file(path, start, end, **kwargs) | |
out = [] | |
def cb(x): | |
out.append(x) | |
try: | |
self.ftp.retrbinary( | |
f"RETR {path}", | |
blocksize=self.blocksize, | |
rest=start, | |
callback=cb, | |
) | |
except (Error, error_perm) as orig_exc: | |
raise FileNotFoundError(path) from orig_exc | |
return b"".join(out) | |
def _open( | |
self, | |
path, | |
mode="rb", | |
block_size=None, | |
cache_options=None, | |
autocommit=True, | |
**kwargs, | |
): | |
path = self._strip_protocol(path) | |
block_size = block_size or self.blocksize | |
return FTPFile( | |
self, | |
path, | |
mode=mode, | |
block_size=block_size, | |
tempdir=self.tempdir, | |
autocommit=autocommit, | |
cache_options=cache_options, | |
) | |
def _rm(self, path): | |
path = self._strip_protocol(path) | |
self.ftp.delete(path) | |
self.invalidate_cache(self._parent(path)) | |
def rm(self, path, recursive=False, maxdepth=None): | |
paths = self.expand_path(path, recursive=recursive, maxdepth=maxdepth) | |
for p in reversed(paths): | |
if self.isfile(p): | |
self.rm_file(p) | |
else: | |
self.rmdir(p) | |
def mkdir(self, path: str, create_parents: bool = True, **kwargs: Any) -> None: | |
path = self._strip_protocol(path) | |
parent = self._parent(path) | |
if parent != self.root_marker and not self.exists(parent) and create_parents: | |
self.mkdir(parent, create_parents=create_parents) | |
self.ftp.mkd(path) | |
self.invalidate_cache(self._parent(path)) | |
def makedirs(self, path: str, exist_ok: bool = False) -> None: | |
path = self._strip_protocol(path) | |
if self.exists(path): | |
# NB: "/" does not "exist" as it has no directory entry | |
if not exist_ok: | |
raise FileExistsError(f"{path} exists without `exist_ok`") | |
# exists_ok=True -> no-op | |
else: | |
self.mkdir(path, create_parents=True) | |
def rmdir(self, path): | |
path = self._strip_protocol(path) | |
self.ftp.rmd(path) | |
self.invalidate_cache(self._parent(path)) | |
def mv(self, path1, path2, **kwargs): | |
path1 = self._strip_protocol(path1) | |
path2 = self._strip_protocol(path2) | |
self.ftp.rename(path1, path2) | |
self.invalidate_cache(self._parent(path1)) | |
self.invalidate_cache(self._parent(path2)) | |
def __del__(self): | |
self.ftp.close() | |
def invalidate_cache(self, path=None): | |
if path is None: | |
self.dircache.clear() | |
else: | |
self.dircache.pop(path, None) | |
super().invalidate_cache(path) | |
class TransferDone(Exception): | |
"""Internal exception to break out of transfer""" | |
pass | |
class FTPFile(AbstractBufferedFile): | |
"""Interact with a remote FTP file with read/write buffering""" | |
def __init__( | |
self, | |
fs, | |
path, | |
mode="rb", | |
block_size="default", | |
autocommit=True, | |
cache_type="readahead", | |
cache_options=None, | |
**kwargs, | |
): | |
super().__init__( | |
fs, | |
path, | |
mode=mode, | |
block_size=block_size, | |
autocommit=autocommit, | |
cache_type=cache_type, | |
cache_options=cache_options, | |
**kwargs, | |
) | |
if not autocommit: | |
self.target = self.path | |
self.path = "/".join([kwargs["tempdir"], str(uuid.uuid4())]) | |
def commit(self): | |
self.fs.mv(self.path, self.target) | |
def discard(self): | |
self.fs.rm(self.path) | |
def _fetch_range(self, start, end): | |
"""Get bytes between given byte limits | |
Implemented by raising an exception in the fetch callback when the | |
number of bytes received reaches the requested amount. | |
Will fail if the server does not respect the REST command on | |
retrieve requests. | |
""" | |
out = [] | |
total = [0] | |
def callback(x): | |
total[0] += len(x) | |
if total[0] > end - start: | |
out.append(x[: (end - start) - total[0]]) | |
if end < self.size: | |
raise TransferDone | |
else: | |
out.append(x) | |
if total[0] == end - start and end < self.size: | |
raise TransferDone | |
try: | |
self.fs.ftp.retrbinary( | |
f"RETR {self.path}", | |
blocksize=self.blocksize, | |
rest=start, | |
callback=callback, | |
) | |
except TransferDone: | |
try: | |
# stop transfer, we got enough bytes for this block | |
self.fs.ftp.abort() | |
self.fs.ftp.getmultiline() | |
except Error: | |
self.fs._connect() | |
return b"".join(out) | |
def _upload_chunk(self, final=False): | |
self.buffer.seek(0) | |
self.fs.ftp.storbinary( | |
f"STOR {self.path}", self.buffer, blocksize=self.blocksize, rest=self.offset | |
) | |
return True | |
def _mlsd2(ftp, path="."): | |
""" | |
Fall back to using `dir` instead of `mlsd` if not supported. | |
This parses a Linux style `ls -l` response to `dir`, but the response may | |
be platform dependent. | |
Parameters | |
---------- | |
ftp: ftplib.FTP | |
path: str | |
Expects to be given path, but defaults to ".". | |
""" | |
lines = [] | |
minfo = [] | |
ftp.dir(path, lines.append) | |
for line in lines: | |
split_line = line.split() | |
if len(split_line) < 9: | |
continue | |
this = ( | |
split_line[-1], | |
{ | |
"modify": " ".join(split_line[5:8]), | |
"unix.owner": split_line[2], | |
"unix.group": split_line[3], | |
"unix.mode": split_line[0], | |
"size": split_line[4], | |
}, | |
) | |
if "d" == this[1]["unix.mode"][0]: | |
this[1]["type"] = "dir" | |
else: | |
this[1]["type"] = "file" | |
minfo.append(this) | |
return minfo | |