import base64 import io import re import requests import fsspec class JupyterFileSystem(fsspec.AbstractFileSystem): """View of the files as seen by a Jupyter server (notebook or lab)""" protocol = ("jupyter", "jlab") def __init__(self, url, tok=None, **kwargs): """ Parameters ---------- url : str Base URL of the server, like "http://127.0.0.1:8888". May include token in the string, which is given by the process when starting up tok : str If the token is obtained separately, can be given here kwargs """ if "?" in url: if tok is None: try: tok = re.findall("token=([a-z0-9]+)", url)[0] except IndexError as e: raise ValueError("Could not determine token") from e url = url.split("?", 1)[0] self.url = url.rstrip("/") + "/api/contents" self.session = requests.Session() if tok: self.session.headers["Authorization"] = f"token {tok}" super().__init__(**kwargs) def ls(self, path, detail=True, **kwargs): path = self._strip_protocol(path) r = self.session.get(f"{self.url}/{path}") if r.status_code == 404: return FileNotFoundError(path) r.raise_for_status() out = r.json() if out["type"] == "directory": out = out["content"] else: out = [out] for o in out: o["name"] = o.pop("path") o.pop("content") if o["type"] == "notebook": o["type"] = "file" if detail: return out return [o["name"] for o in out] def cat_file(self, path, start=None, end=None, **kwargs): path = self._strip_protocol(path) r = self.session.get(f"{self.url}/{path}") if r.status_code == 404: return FileNotFoundError(path) r.raise_for_status() out = r.json() if out["format"] == "text": # data should be binary b = out["content"].encode() else: b = base64.b64decode(out["content"]) return b[start:end] def pipe_file(self, path, value, **_): path = self._strip_protocol(path) json = { "name": path.rsplit("/", 1)[-1], "path": path, "size": len(value), "content": base64.b64encode(value).decode(), "format": "base64", "type": "file", } self.session.put(f"{self.url}/{path}", json=json) def mkdir(self, path, create_parents=True, **kwargs): path = self._strip_protocol(path) if create_parents and "/" in path: self.mkdir(path.rsplit("/", 1)[0], True) json = { "name": path.rsplit("/", 1)[-1], "path": path, "size": None, "content": None, "type": "directory", } self.session.put(f"{self.url}/{path}", json=json) def _rm(self, path): path = self._strip_protocol(path) self.session.delete(f"{self.url}/{path}") def _open(self, path, mode="rb", **kwargs): path = self._strip_protocol(path) if mode == "rb": data = self.cat_file(path) return io.BytesIO(data) else: return SimpleFileWriter(self, path, mode="wb") class SimpleFileWriter(fsspec.spec.AbstractBufferedFile): def _upload_chunk(self, final=False): """Never uploads a chunk until file is done Not suitable for large files """ if final is False: return False self.buffer.seek(0) data = self.buffer.read() self.fs.pipe_file(self.path, data)