File size: 46,977 Bytes

d1ceb73

"""A contents manager that uses the local file system for storage."""

# Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License.
from __future__ import annotations

import asyncio
import errno
import math
import mimetypes
import os
import platform
import shutil
import stat
import subprocess
import sys
import typing as t
import warnings
from datetime import datetime
from pathlib import Path

import nbformat
from anyio.to_thread import run_sync
from jupyter_core.paths import exists, is_file_hidden, is_hidden
from send2trash import send2trash
from tornado import web
from traitlets import Bool, Int, TraitError, Unicode, default, validate

from jupyter_server import _tz as tz
from jupyter_server.base.handlers import AuthenticatedFileHandler
from jupyter_server.transutils import _i18n
from jupyter_server.utils import to_api_path

from .filecheckpoints import AsyncFileCheckpoints, FileCheckpoints
from .fileio import AsyncFileManagerMixin, FileManagerMixin
from .manager import AsyncContentsManager, ContentsManager, copy_pat

try:
    from os.path import samefile
except ImportError:
    # windows
    from jupyter_server.utils import samefile_simple as samefile  # type:ignore[assignment]

_script_exporter = None


class FileContentsManager(FileManagerMixin, ContentsManager):
    """A file contents manager."""

    root_dir = Unicode(config=True)

    max_copy_folder_size_mb = Int(500, config=True, help="The max folder size that can be copied")

    @default("root_dir")
    def _default_root_dir(self):
        if not self.parent:
            return os.getcwd()
        return self.parent.root_dir

    @validate("root_dir")
    def _validate_root_dir(self, proposal):
        value = proposal["value"]
        if not os.path.isabs(value):
            # If we receive a non-absolute path, make it absolute.
            value = os.path.abspath(value)
        if not os.path.isdir(value):
            raise TraitError("%r is not a directory" % value)
        return value

    @default("preferred_dir")
    def _default_preferred_dir(self):
        if not self.parent:
            return ""
        try:
            value = self.parent.preferred_dir
            if value == self.parent.root_dir:
                value = None
        except AttributeError:
            pass
        else:
            if value is not None:
                warnings.warn(
                    "ServerApp.preferred_dir config is deprecated in jupyter-server 2.0. Use FileContentsManager.preferred_dir instead",
                    FutureWarning,
                    stacklevel=3,
                )
                try:
                    path = Path(value)
                    return path.relative_to(self.root_dir).as_posix()
                except ValueError:
                    raise TraitError("%s is outside root contents directory" % value) from None
        return ""

    @validate("preferred_dir")
    def _validate_preferred_dir(self, proposal):
        # It should be safe to pass an API path through this method:
        proposal["value"] = to_api_path(proposal["value"], self.root_dir)
        return super()._validate_preferred_dir(proposal)

    @default("checkpoints_class")
    def _checkpoints_class_default(self):
        return FileCheckpoints

    delete_to_trash = Bool(
        True,
        config=True,
        help="""If True (default), deleting files will send them to the
        platform's trash/recycle bin, where they can be recovered. If False,
        deleting files really deletes them.""",
    )

    always_delete_dir = Bool(
        False,
        config=True,
        help="""If True, deleting a non-empty directory will always be allowed.
        WARNING this may result in files being permanently removed; e.g. on Windows,
        if the data size is too big for the trash/recycle bin the directory will be permanently
        deleted. If False (default), the non-empty directory will be sent to the trash only
        if safe. And if ``delete_to_trash`` is True, the directory won't be deleted.""",
    )

    @default("files_handler_class")
    def _files_handler_class_default(self):
        return AuthenticatedFileHandler

    @default("files_handler_params")
    def _files_handler_params_default(self):
        return {"path": self.root_dir}

    def is_hidden(self, path):
        """Does the API style path correspond to a hidden directory or file?

        Parameters
        ----------
        path : str
            The path to check. This is an API path (`/` separated,
            relative to root_dir).

        Returns
        -------
        hidden : bool
            Whether the path exists and is hidden.
        """
        path = path.strip("/")
        os_path = self._get_os_path(path=path)
        return is_hidden(os_path, self.root_dir)

    def is_writable(self, path):
        """Does the API style path correspond to a writable directory or file?

        Parameters
        ----------
        path : str
            The path to check. This is an API path (`/` separated,
            relative to root_dir).

        Returns
        -------
        hidden : bool
            Whether the path exists and is writable.
        """
        path = path.strip("/")
        os_path = self._get_os_path(path=path)
        try:
            return os.access(os_path, os.W_OK)
        except OSError:
            self.log.error("Failed to check write permissions on %s", os_path)
            return False

    def file_exists(self, path):
        """Returns True if the file exists, else returns False.

        API-style wrapper for os.path.isfile

        Parameters
        ----------
        path : str
            The relative path to the file (with '/' as separator)

        Returns
        -------
        exists : bool
            Whether the file exists.
        """
        path = path.strip("/")
        os_path = self._get_os_path(path)
        return os.path.isfile(os_path)

    def dir_exists(self, path):
        """Does the API-style path refer to an extant directory?

        API-style wrapper for os.path.isdir

        Parameters
        ----------
        path : str
            The path to check. This is an API path (`/` separated,
            relative to root_dir).

        Returns
        -------
        exists : bool
            Whether the path is indeed a directory.
        """
        path = path.strip("/")
        os_path = self._get_os_path(path=path)
        return os.path.isdir(os_path)

    def exists(self, path):
        """Returns True if the path exists, else returns False.

        API-style wrapper for os.path.exists

        Parameters
        ----------
        path : str
            The API path to the file (with '/' as separator)

        Returns
        -------
        exists : bool
            Whether the target exists.
        """
        path = path.strip("/")
        os_path = self._get_os_path(path=path)
        return exists(os_path)

    def _base_model(self, path):
        """Build the common base of a contents model"""
        os_path = self._get_os_path(path)
        info = os.lstat(os_path)

        four_o_four = "file or directory does not exist: %r" % path

        if not self.allow_hidden and is_hidden(os_path, self.root_dir):
            self.log.info("Refusing to serve hidden file or directory %r, via 404 Error", os_path)
            raise web.HTTPError(404, four_o_four)

        try:
            # size of file
            size = info.st_size
        except (ValueError, OSError):
            self.log.warning("Unable to get size.")
            size = None

        try:
            last_modified = tz.utcfromtimestamp(info.st_mtime)
        except (ValueError, OSError):
            # Files can rarely have an invalid timestamp
            # https://github.com/jupyter/notebook/issues/2539
            # https://github.com/jupyter/notebook/issues/2757
            # Use the Unix epoch as a fallback so we don't crash.
            self.log.warning("Invalid mtime %s for %s", info.st_mtime, os_path)
            last_modified = datetime(1970, 1, 1, 0, 0, tzinfo=tz.UTC)

        try:
            created = tz.utcfromtimestamp(info.st_ctime)
        except (ValueError, OSError):  # See above
            self.log.warning("Invalid ctime %s for %s", info.st_ctime, os_path)
            created = datetime(1970, 1, 1, 0, 0, tzinfo=tz.UTC)

        # Create the base model.
        model = {}
        model["name"] = path.rsplit("/", 1)[-1]
        model["path"] = path
        model["last_modified"] = last_modified
        model["created"] = created
        model["content"] = None
        model["format"] = None
        model["mimetype"] = None
        model["size"] = size
        model["writable"] = self.is_writable(path)
        model["hash"] = None
        model["hash_algorithm"] = None

        return model

    def _dir_model(self, path, content=True):
        """Build a model for a directory

        if content is requested, will include a listing of the directory
        """
        os_path = self._get_os_path(path)

        four_o_four = "directory does not exist: %r" % path

        if not os.path.isdir(os_path):
            raise web.HTTPError(404, four_o_four)
        elif not self.allow_hidden and is_hidden(os_path, self.root_dir):
            self.log.info("Refusing to serve hidden directory %r, via 404 Error", os_path)
            raise web.HTTPError(404, four_o_four)

        model = self._base_model(path)
        model["type"] = "directory"
        model["size"] = None
        if content:
            model["content"] = contents = []
            os_dir = self._get_os_path(path)
            for name in os.listdir(os_dir):
                try:
                    os_path = os.path.join(os_dir, name)
                except UnicodeDecodeError as e:
                    self.log.warning("failed to decode filename '%s': %r", name, e)
                    continue

                try:
                    st = os.lstat(os_path)
                except OSError as e:
                    # skip over broken symlinks in listing
                    if e.errno == errno.ENOENT:
                        self.log.warning("%s doesn't exist", os_path)
                    elif e.errno != errno.EACCES:  # Don't provide clues about protected files
                        self.log.warning("Error stat-ing %s: %r", os_path, e)
                    continue

                if (
                    not stat.S_ISLNK(st.st_mode)
                    and not stat.S_ISREG(st.st_mode)
                    and not stat.S_ISDIR(st.st_mode)
                ):
                    self.log.debug("%s not a regular file", os_path)
                    continue

                try:
                    if self.should_list(name) and (
                        self.allow_hidden or not is_file_hidden(os_path, stat_res=st)
                    ):
                        contents.append(self.get(path=f"{path}/{name}", content=False))
                except OSError as e:
                    # ELOOP: recursive symlink, also don't show failure due to permissions
                    if e.errno not in [errno.ELOOP, errno.EACCES]:
                        self.log.warning(
                            "Unknown error checking if file %r is hidden",
                            os_path,
                            exc_info=True,
                        )

            model["format"] = "json"

        return model

    def _file_model(self, path, content=True, format=None, require_hash=False):
        """Build a model for a file

        if content is requested, include the file contents.

        format:
          If 'text', the contents will be decoded as UTF-8.
          If 'base64', the raw bytes contents will be encoded as base64.
          If not specified, try to decode as UTF-8, and fall back to base64

        if require_hash is true, the model will include 'hash'
        """
        model = self._base_model(path)
        model["type"] = "file"

        os_path = self._get_os_path(path)
        model["mimetype"] = mimetypes.guess_type(os_path)[0]

        bytes_content = None
        if content:
            content, format, bytes_content = self._read_file(os_path, format, raw=True)  # type: ignore[misc]
            if model["mimetype"] is None:
                default_mime = {
                    "text": "text/plain",
                    "base64": "application/octet-stream",
                }[format]
                model["mimetype"] = default_mime

            model.update(
                content=content,
                format=format,
            )

        if require_hash:
            if bytes_content is None:
                bytes_content, _ = self._read_file(os_path, "byte")  # type: ignore[assignment,misc]
            model.update(**self._get_hash(bytes_content))  # type: ignore[arg-type]

        return model

    def _notebook_model(self, path, content=True, require_hash=False):
        """Build a notebook model

        if content is requested, the notebook content will be populated
        as a JSON structure (not double-serialized)

        if require_hash is true, the model will include 'hash'
        """
        model = self._base_model(path)
        model["type"] = "notebook"
        os_path = self._get_os_path(path)

        bytes_content = None
        if content:
            validation_error: dict[str, t.Any] = {}
            nb, bytes_content = self._read_notebook(
                os_path, as_version=4, capture_validation_error=validation_error, raw=True
            )
            self.mark_trusted_cells(nb, path)
            model["content"] = nb
            model["format"] = "json"
            self.validate_notebook_model(model, validation_error)

        if require_hash:
            if bytes_content is None:
                bytes_content, _ = self._read_file(os_path, "byte")  # type: ignore[misc]
            model.update(**self._get_hash(bytes_content))  # type: ignore[arg-type]

        return model

    def get(self, path, content=True, type=None, format=None, require_hash=False):
        """Takes a path for an entity and returns its model

        Parameters
        ----------
        path : str
            the API path that describes the relative path for the target
        content : bool
            Whether to include the contents in the reply
        type : str, optional
            The requested type - 'file', 'notebook', or 'directory'.
            Will raise HTTPError 400 if the content doesn't match.
        format : str, optional
            The requested format for file contents. 'text' or 'base64'.
            Ignored if this returns a notebook or directory model.
        require_hash: bool, optional
            Whether to include the hash of the file contents.

        Returns
        -------
        model : dict
            the contents model. If content=True, returns the contents
            of the file or directory as well.
        """
        path = path.strip("/")
        os_path = self._get_os_path(path)
        four_o_four = "file or directory does not exist: %r" % path

        if not self.exists(path):
            raise web.HTTPError(404, four_o_four)

        if not self.allow_hidden and is_hidden(os_path, self.root_dir):
            self.log.info("Refusing to serve hidden file or directory %r, via 404 Error", os_path)
            raise web.HTTPError(404, four_o_four)

        if os.path.isdir(os_path):
            if type not in (None, "directory"):
                raise web.HTTPError(
                    400,
                    f"{path} is a directory, not a {type}",
                    reason="bad type",
                )
            model = self._dir_model(path, content=content)
        elif type == "notebook" or (type is None and path.endswith(".ipynb")):
            model = self._notebook_model(path, content=content, require_hash=require_hash)
        else:
            if type == "directory":
                raise web.HTTPError(400, "%s is not a directory" % path, reason="bad type")
            model = self._file_model(
                path, content=content, format=format, require_hash=require_hash
            )
        self.emit(data={"action": "get", "path": path})
        return model

    def _save_directory(self, os_path, model, path=""):
        """create a directory"""
        if not self.allow_hidden and is_hidden(os_path, self.root_dir):
            raise web.HTTPError(400, "Cannot create directory %r" % os_path)
        if not os.path.exists(os_path):
            with self.perm_to_403():
                os.mkdir(os_path)
        elif not os.path.isdir(os_path):
            raise web.HTTPError(400, "Not a directory: %s" % (os_path))
        else:
            self.log.debug("Directory %r already exists", os_path)

    def save(self, model, path=""):
        """Save the file model and return the model with no content."""
        path = path.strip("/")

        self.run_pre_save_hooks(model=model, path=path)

        if "type" not in model:
            raise web.HTTPError(400, "No file type provided")
        if "content" not in model and model["type"] != "directory":
            raise web.HTTPError(400, "No file content provided")
        os_path = self._get_os_path(path)

        if not self.allow_hidden and is_hidden(os_path, self.root_dir):
            raise web.HTTPError(400, f"Cannot create file or directory {os_path!r}")

        self.log.debug("Saving %s", os_path)

        validation_error: dict[str, t.Any] = {}
        try:
            if model["type"] == "notebook":
                nb = nbformat.from_dict(model["content"])
                self.check_and_sign(nb, path)
                self._save_notebook(os_path, nb, capture_validation_error=validation_error)
                # One checkpoint should always exist for notebooks.
                if not self.checkpoints.list_checkpoints(path):
                    self.create_checkpoint(path)
            elif model["type"] == "file":
                # Missing format will be handled internally by _save_file.
                self._save_file(os_path, model["content"], model.get("format"))
            elif model["type"] == "directory":
                self._save_directory(os_path, model, path)
            else:
                raise web.HTTPError(400, "Unhandled contents type: %s" % model["type"])
        except web.HTTPError:
            raise
        except Exception as e:
            self.log.error("Error while saving file: %s %s", path, e, exc_info=True)
            raise web.HTTPError(500, f"Unexpected error while saving file: {path} {e}") from e

        validation_message = None
        if model["type"] == "notebook":
            self.validate_notebook_model(model, validation_error=validation_error)
            validation_message = model.get("message", None)

        model = self.get(path, content=False)
        if validation_message:
            model["message"] = validation_message

        self.run_post_save_hooks(model=model, os_path=os_path)
        self.emit(data={"action": "save", "path": path})
        return model

    def delete_file(self, path):
        """Delete file at path."""
        path = path.strip("/")
        os_path = self._get_os_path(path)
        rm = os.unlink

        if not self.allow_hidden and is_hidden(os_path, self.root_dir):
            raise web.HTTPError(400, f"Cannot delete file or directory {os_path!r}")

        four_o_four = "file or directory does not exist: %r" % path
        if not self.exists(path):
            raise web.HTTPError(404, four_o_four)

        def is_non_empty_dir(os_path):
            if os.path.isdir(os_path):
                # A directory containing only leftover checkpoints is
                # considered empty.
                cp_dir = getattr(self.checkpoints, "checkpoint_dir", None)
                if set(os.listdir(os_path)) - {cp_dir}:
                    return True

            return False

        if self.delete_to_trash:
            if not self.always_delete_dir and sys.platform == "win32" and is_non_empty_dir(os_path):
                # send2trash can really delete files on Windows, so disallow
                # deleting non-empty files. See Github issue 3631.
                raise web.HTTPError(400, "Directory %s not empty" % os_path)
            # send2trash now supports deleting directories. see #1290
            if not self.is_writable(path):
                raise web.HTTPError(403, "Permission denied: %s" % path) from None
            self.log.debug("Sending %s to trash", os_path)
            try:
                send2trash(os_path)
            except OSError as e:
                raise web.HTTPError(400, "send2trash failed: %s" % e) from e
            return

        if os.path.isdir(os_path):
            # Don't permanently delete non-empty directories.
            if not self.always_delete_dir and is_non_empty_dir(os_path):
                raise web.HTTPError(400, "Directory %s not empty" % os_path)
            self.log.debug("Removing directory %s", os_path)
            with self.perm_to_403():
                shutil.rmtree(os_path)
        else:
            self.log.debug("Unlinking file %s", os_path)
            with self.perm_to_403():
                rm(os_path)

    def rename_file(self, old_path, new_path):
        """Rename a file."""
        old_path = old_path.strip("/")
        new_path = new_path.strip("/")
        if new_path == old_path:
            return

        new_os_path = self._get_os_path(new_path)
        old_os_path = self._get_os_path(old_path)

        if not self.allow_hidden and (
            is_hidden(old_os_path, self.root_dir) or is_hidden(new_os_path, self.root_dir)
        ):
            raise web.HTTPError(400, f"Cannot rename file or directory {old_os_path!r}")

        # Should we proceed with the move?
        if os.path.exists(new_os_path) and not samefile(old_os_path, new_os_path):
            raise web.HTTPError(409, "File already exists: %s" % new_path)

        # Move the file
        try:
            with self.perm_to_403():
                shutil.move(old_os_path, new_os_path)
        except web.HTTPError:
            raise
        except Exception as e:
            raise web.HTTPError(500, f"Unknown error renaming file: {old_path} {e}") from e

    def info_string(self):
        """Get the information string for the manager."""
        return _i18n("Serving notebooks from local directory: %s") % self.root_dir

    def get_kernel_path(self, path, model=None):
        """Return the initial API path of  a kernel associated with a given notebook"""
        if self.dir_exists(path):
            return path
        parent_dir = path.rsplit("/", 1)[0] if "/" in path else ""
        return parent_dir

    def copy(self, from_path, to_path=None):
        """
        Copy an existing file or directory and return its new model.
        If to_path not specified, it will be the parent directory of from_path.
        If copying a file and to_path is a directory, filename/directoryname will increment `from_path-Copy#.ext`.
        Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`.
        For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot.
        from_path must be a full path to a file or directory.
        """
        to_path_original = str(to_path)
        path = from_path.strip("/")
        if to_path is not None:
            to_path = to_path.strip("/")

        if "/" in path:
            from_dir, from_name = path.rsplit("/", 1)
        else:
            from_dir = ""
            from_name = path

        model = self.get(path)
        # limit the size of folders being copied to prevent a timeout error
        if model["type"] == "directory":
            self.check_folder_size(path)
        else:
            # let the super class handle copying files
            return super().copy(from_path=from_path, to_path=to_path)

        is_destination_specified = to_path is not None
        to_name = copy_pat.sub(".", from_name)
        if not is_destination_specified:
            to_path = from_dir
        if self.dir_exists(to_path):
            name = copy_pat.sub(".", from_name)
            to_name = super().increment_filename(name, to_path, insert="-Copy")
        to_path = f"{to_path}/{to_name}"

        return self._copy_dir(
            from_path=from_path,
            to_path_original=to_path_original,
            to_name=to_name,
            to_path=to_path,
        )

    def _copy_dir(self, from_path, to_path_original, to_name, to_path):
        """
        handles copying directories
        returns the model for the copied directory
        """
        try:
            os_from_path = self._get_os_path(from_path.strip("/"))
            os_to_path = f'{self._get_os_path(to_path_original.strip("/"))}/{to_name}'
            shutil.copytree(os_from_path, os_to_path)
            model = self.get(to_path, content=False)
        except OSError as err:
            self.log.error(f"OSError in _copy_dir: {err}")
            raise web.HTTPError(
                400,
                f"Can't copy '{from_path}' into Folder '{to_path}'",
            ) from err

        return model

    def check_folder_size(self, path):
        """
        limit the size of folders being copied to be no more than the
        trait max_copy_folder_size_mb to prevent a timeout error
        """
        limit_bytes = self.max_copy_folder_size_mb * 1024 * 1024
        size = int(self._get_dir_size(self._get_os_path(path)))
        # convert from KB to Bytes for macOS
        size = size * 1024 if platform.system() == "Darwin" else size

        if size > limit_bytes:
            raise web.HTTPError(
                400,
                f"""
                    Can't copy folders larger than {self.max_copy_folder_size_mb}MB,
                    "{path}" is {self._human_readable_size(size)}
                """,
            )

    def _get_dir_size(self, path="."):
        """
        calls the command line program du to get the directory size
        """
        try:
            if platform.system() == "Darwin":
                # returns the size of the folder in KB
                result = subprocess.run(
                    ["du", "-sk", path],  # noqa: S607
                    capture_output=True,
                    check=True,
                ).stdout.split()
            else:
                result = subprocess.run(
                    ["du", "-s", "--block-size=1", path],  # noqa: S607
                    capture_output=True,
                    check=True,
                ).stdout.split()

            self.log.info(f"current status of du command {result}")
            size = result[0].decode("utf-8")
        except Exception:
            self.log.warning(
                "Not able to get the size of the %s directory. Copying might be slow if the directory is large!",
                path,
            )
            return "0"
        return size

    def _human_readable_size(self, size):
        """
        returns folder size in a human readable format
        """
        if size == 0:
            return "0 Bytes"

        units = ["Bytes", "KB", "MB", "GB", "TB", "PB"]
        order = int(math.log2(size) / 10) if size else 0

        return f"{size / (1 << (order * 10)):.4g} {units[order]}"


class AsyncFileContentsManager(FileContentsManager, AsyncFileManagerMixin, AsyncContentsManager):
    """An async file contents manager."""

    @default("checkpoints_class")
    def _checkpoints_class_default(self):
        return AsyncFileCheckpoints

    async def _dir_model(self, path, content=True):
        """Build a model for a directory

        if content is requested, will include a listing of the directory
        """
        os_path = self._get_os_path(path)

        four_o_four = "directory does not exist: %r" % path

        if not os.path.isdir(os_path):
            raise web.HTTPError(404, four_o_four)
        elif not self.allow_hidden and is_hidden(os_path, self.root_dir):
            self.log.info("Refusing to serve hidden directory %r, via 404 Error", os_path)
            raise web.HTTPError(404, four_o_four)

        model = self._base_model(path)
        model["type"] = "directory"
        model["size"] = None
        if content:
            model["content"] = contents = []
            os_dir = self._get_os_path(path)
            dir_contents = await run_sync(os.listdir, os_dir)
            for name in dir_contents:
                try:
                    os_path = os.path.join(os_dir, name)
                except UnicodeDecodeError as e:
                    self.log.warning("failed to decode filename '%s': %r", name, e)
                    continue

                try:
                    st = await run_sync(os.lstat, os_path)
                except OSError as e:
                    # skip over broken symlinks in listing
                    if e.errno == errno.ENOENT:
                        self.log.warning("%s doesn't exist", os_path)
                    elif e.errno != errno.EACCES:  # Don't provide clues about protected files
                        self.log.warning("Error stat-ing %s: %r", os_path, e)
                    continue

                if (
                    not stat.S_ISLNK(st.st_mode)
                    and not stat.S_ISREG(st.st_mode)
                    and not stat.S_ISDIR(st.st_mode)
                ):
                    self.log.debug("%s not a regular file", os_path)
                    continue

                try:
                    if self.should_list(name) and (
                        self.allow_hidden or not is_file_hidden(os_path, stat_res=st)
                    ):
                        contents.append(await self.get(path=f"{path}/{name}", content=False))
                except OSError as e:
                    # ELOOP: recursive symlink, also don't show failure due to permissions
                    if e.errno not in [errno.ELOOP, errno.EACCES]:
                        self.log.warning(
                            "Unknown error checking if file %r is hidden",
                            os_path,
                            exc_info=True,
                        )

            model["format"] = "json"

        return model

    async def _file_model(self, path, content=True, format=None, require_hash=False):
        """Build a model for a file

        if content is requested, include the file contents.

        format:
          If 'text', the contents will be decoded as UTF-8.
          If 'base64', the raw bytes contents will be encoded as base64.
          If not specified, try to decode as UTF-8, and fall back to base64

        if require_hash is true, the model will include 'hash'
        """
        model = self._base_model(path)
        model["type"] = "file"

        os_path = self._get_os_path(path)
        model["mimetype"] = mimetypes.guess_type(os_path)[0]

        bytes_content = None
        if content:
            content, format, bytes_content = await self._read_file(os_path, format, raw=True)  # type: ignore[misc]
            if model["mimetype"] is None:
                default_mime = {
                    "text": "text/plain",
                    "base64": "application/octet-stream",
                }[format]
                model["mimetype"] = default_mime

            model.update(
                content=content,
                format=format,
            )

        if require_hash:
            if bytes_content is None:
                bytes_content, _ = await self._read_file(os_path, "byte")  # type: ignore[assignment,misc]
            model.update(**self._get_hash(bytes_content))  # type: ignore[arg-type]

        return model

    async def _notebook_model(self, path, content=True, require_hash=False):
        """Build a notebook model

        if content is requested, the notebook content will be populated
        as a JSON structure (not double-serialized)
        """
        model = self._base_model(path)
        model["type"] = "notebook"
        os_path = self._get_os_path(path)

        bytes_content = None
        if content:
            validation_error: dict[str, t.Any] = {}
            nb, bytes_content = await self._read_notebook(
                os_path, as_version=4, capture_validation_error=validation_error, raw=True
            )
            self.mark_trusted_cells(nb, path)
            model["content"] = nb
            model["format"] = "json"
            self.validate_notebook_model(model, validation_error)

        if require_hash:
            if bytes_content is None:
                bytes_content, _ = await self._read_file(os_path, "byte")  # type: ignore[misc]
            model.update(**(self._get_hash(bytes_content)))  # type: ignore[arg-type]

        return model

    async def get(self, path, content=True, type=None, format=None, require_hash=False):
        """Takes a path for an entity and returns its model

        Parameters
        ----------
        path : str
            the API path that describes the relative path for the target
        content : bool
            Whether to include the contents in the reply
        type : str, optional
            The requested type - 'file', 'notebook', or 'directory'.
            Will raise HTTPError 400 if the content doesn't match.
        format : str, optional
            The requested format for file contents. 'text' or 'base64'.
            Ignored if this returns a notebook or directory model.
        require_hash: bool, optional
            Whether to include the hash of the file contents.

        Returns
        -------
        model : dict
            the contents model. If content=True, returns the contents
            of the file or directory as well.
        """
        path = path.strip("/")

        if not self.exists(path):
            raise web.HTTPError(404, "No such file or directory: %s" % path)

        os_path = self._get_os_path(path)
        if os.path.isdir(os_path):
            if type not in (None, "directory"):
                raise web.HTTPError(
                    400,
                    f"{path} is a directory, not a {type}",
                    reason="bad type",
                )
            model = await self._dir_model(path, content=content)
        elif type == "notebook" or (type is None and path.endswith(".ipynb")):
            model = await self._notebook_model(path, content=content, require_hash=require_hash)
        else:
            if type == "directory":
                raise web.HTTPError(400, "%s is not a directory" % path, reason="bad type")
            model = await self._file_model(
                path, content=content, format=format, require_hash=require_hash
            )
        self.emit(data={"action": "get", "path": path})
        return model

    async def _save_directory(self, os_path, model, path=""):
        """create a directory"""
        if not self.allow_hidden and is_hidden(os_path, self.root_dir):
            raise web.HTTPError(400, "Cannot create hidden directory %r" % os_path)
        if not os.path.exists(os_path):
            with self.perm_to_403():
                await run_sync(os.mkdir, os_path)
        elif not os.path.isdir(os_path):
            raise web.HTTPError(400, "Not a directory: %s" % (os_path))
        else:
            self.log.debug("Directory %r already exists", os_path)

    async def save(self, model, path=""):
        """Save the file model and return the model with no content."""
        path = path.strip("/")

        self.run_pre_save_hooks(model=model, path=path)

        if "type" not in model:
            raise web.HTTPError(400, "No file type provided")
        if "content" not in model and model["type"] != "directory":
            raise web.HTTPError(400, "No file content provided")

        os_path = self._get_os_path(path)
        self.log.debug("Saving %s", os_path)

        validation_error: dict[str, t.Any] = {}
        try:
            if model["type"] == "notebook":
                nb = nbformat.from_dict(model["content"])
                self.check_and_sign(nb, path)
                await self._save_notebook(os_path, nb, capture_validation_error=validation_error)
                # One checkpoint should always exist for notebooks.
                if not (await self.checkpoints.list_checkpoints(path)):
                    await self.create_checkpoint(path)
            elif model["type"] == "file":
                # Missing format will be handled internally by _save_file.
                await self._save_file(os_path, model["content"], model.get("format"))
            elif model["type"] == "directory":
                await self._save_directory(os_path, model, path)
            else:
                raise web.HTTPError(400, "Unhandled contents type: %s" % model["type"])
        except web.HTTPError:
            raise
        except Exception as e:
            self.log.error("Error while saving file: %s %s", path, e, exc_info=True)
            raise web.HTTPError(500, f"Unexpected error while saving file: {path} {e}") from e

        validation_message = None
        if model["type"] == "notebook":
            self.validate_notebook_model(model, validation_error=validation_error)
            validation_message = model.get("message", None)

        model = await self.get(path, content=False)
        if validation_message:
            model["message"] = validation_message

        self.run_post_save_hooks(model=model, os_path=os_path)
        self.emit(data={"action": "save", "path": path})
        return model

    async def delete_file(self, path):
        """Delete file at path."""
        path = path.strip("/")
        os_path = self._get_os_path(path)
        rm = os.unlink

        if not self.allow_hidden and is_hidden(os_path, self.root_dir):
            raise web.HTTPError(400, f"Cannot delete file or directory {os_path!r}")

        if not os.path.exists(os_path):
            raise web.HTTPError(404, "File or directory does not exist: %s" % os_path)

        async def is_non_empty_dir(os_path):
            if os.path.isdir(os_path):
                # A directory containing only leftover checkpoints is
                # considered empty.
                cp_dir = getattr(self.checkpoints, "checkpoint_dir", None)
                dir_contents = set(await run_sync(os.listdir, os_path))
                if dir_contents - {cp_dir}:
                    return True

            return False

        if self.delete_to_trash:
            if (
                not self.always_delete_dir
                and sys.platform == "win32"
                and await is_non_empty_dir(os_path)
            ):
                # send2trash can really delete files on Windows, so disallow
                # deleting non-empty files. See Github issue 3631.
                raise web.HTTPError(400, "Directory %s not empty" % os_path)
            # send2trash now supports deleting directories. see #1290
            if not self.is_writable(path):
                raise web.HTTPError(403, "Permission denied: %s" % path) from None
            self.log.debug("Sending %s to trash", os_path)
            try:
                send2trash(os_path)
            except OSError as e:
                raise web.HTTPError(400, "send2trash failed: %s" % e) from e
            return

        if os.path.isdir(os_path):
            # Don't permanently delete non-empty directories.
            if not self.always_delete_dir and await is_non_empty_dir(os_path):
                raise web.HTTPError(400, "Directory %s not empty" % os_path)
            self.log.debug("Removing directory %s", os_path)
            with self.perm_to_403():
                await run_sync(shutil.rmtree, os_path)
        else:
            self.log.debug("Unlinking file %s", os_path)
            with self.perm_to_403():
                await run_sync(rm, os_path)

    async def rename_file(self, old_path, new_path):
        """Rename a file."""
        old_path = old_path.strip("/")
        new_path = new_path.strip("/")
        if new_path == old_path:
            return

        new_os_path = self._get_os_path(new_path)
        old_os_path = self._get_os_path(old_path)

        if not self.allow_hidden and (
            is_hidden(old_os_path, self.root_dir) or is_hidden(new_os_path, self.root_dir)
        ):
            raise web.HTTPError(400, f"Cannot rename file or directory {old_os_path!r}")

        # Should we proceed with the move?
        if os.path.exists(new_os_path) and not samefile(old_os_path, new_os_path):
            raise web.HTTPError(409, "File already exists: %s" % new_path)

        # Move the file
        try:
            with self.perm_to_403():
                await run_sync(shutil.move, old_os_path, new_os_path)
        except web.HTTPError:
            raise
        except Exception as e:
            raise web.HTTPError(500, f"Unknown error renaming file: {old_path} {e}") from e

    async def dir_exists(self, path):
        """Does a directory exist at the given path"""
        path = path.strip("/")
        os_path = self._get_os_path(path=path)
        return os.path.isdir(os_path)

    async def file_exists(self, path):
        """Does a file exist at the given path"""
        path = path.strip("/")
        os_path = self._get_os_path(path)
        return os.path.isfile(os_path)

    async def is_hidden(self, path):
        """Is path a hidden directory or file"""
        path = path.strip("/")
        os_path = self._get_os_path(path=path)
        return is_hidden(os_path, self.root_dir)

    async def get_kernel_path(self, path, model=None):
        """Return the initial API path of a kernel associated with a given notebook"""
        if await self.dir_exists(path):
            return path
        parent_dir = path.rsplit("/", 1)[0] if "/" in path else ""
        return parent_dir

    async def copy(self, from_path, to_path=None):
        """
        Copy an existing file or directory and return its new model.
        If to_path not specified, it will be the parent directory of from_path.
        If copying a file and to_path is a directory, filename/directoryname will increment `from_path-Copy#.ext`.
        Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`.
        For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot.
        from_path must be a full path to a file or directory.
        """
        to_path_original = str(to_path)
        path = from_path.strip("/")
        if to_path is not None:
            to_path = to_path.strip("/")

        if "/" in path:
            from_dir, from_name = path.rsplit("/", 1)
        else:
            from_dir = ""
            from_name = path

        model = await self.get(path)
        # limit the size of folders being copied to prevent a timeout error
        if model["type"] == "directory":
            await self.check_folder_size(path)
        else:
            # let the super class handle copying files
            return await AsyncContentsManager.copy(self, from_path=from_path, to_path=to_path)

        is_destination_specified = to_path is not None
        to_name = copy_pat.sub(".", from_name)
        if not is_destination_specified:
            to_path = from_dir
        if await self.dir_exists(to_path):
            name = copy_pat.sub(".", from_name)
            to_name = await super().increment_filename(name, to_path, insert="-Copy")
        to_path = f"{to_path}/{to_name}"

        return await self._copy_dir(
            from_path=from_path,
            to_path_original=to_path_original,
            to_name=to_name,
            to_path=to_path,
        )

    async def _copy_dir(
        self, from_path: str, to_path_original: str, to_name: str, to_path: str
    ) -> dict[str, t.Any]:
        """
        handles copying directories
        returns the model for the copied directory
        """
        try:
            os_from_path = self._get_os_path(from_path.strip("/"))
            os_to_path = f'{self._get_os_path(to_path_original.strip("/"))}/{to_name}'
            shutil.copytree(os_from_path, os_to_path)
            model = await self.get(to_path, content=False)
        except OSError as err:
            self.log.error(f"OSError in _copy_dir: {err}")
            raise web.HTTPError(
                400,
                f"Can't copy '{from_path}' into read-only Folder '{to_path}'",
            ) from err

        return model  # type:ignore[no-any-return]

    async def check_folder_size(self, path: str) -> None:
        """
        limit the size of folders being copied to be no more than the
        trait max_copy_folder_size_mb to prevent a timeout error
        """
        limit_bytes = self.max_copy_folder_size_mb * 1024 * 1024

        size = int(await self._get_dir_size(self._get_os_path(path)))
        # convert from KB to Bytes for macOS
        size = size * 1024 if platform.system() == "Darwin" else size
        if size > limit_bytes:
            raise web.HTTPError(
                400,
                f"""
                    Can't copy folders larger than {self.max_copy_folder_size_mb}MB,
                    "{path}" is {await self._human_readable_size(size)}
                """,
            )

    async def _get_dir_size(self, path: str = ".") -> str:
        """
        calls the command line program du to get the directory size
        """
        try:
            if platform.system() == "Darwin":
                # returns the size of the folder in KB
                args = ["-sk", path]
            else:
                args = ["-s", "--block-size=1", path]
            proc = await asyncio.create_subprocess_exec(
                "du", *args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
            )

            stdout, _ = await proc.communicate()
            result = await proc.wait()
            self.log.info(f"current status of du command {result}")
            assert result == 0
            size = stdout.decode("utf-8").split()[0]
        except Exception:
            self.log.warning(
                "Not able to get the size of the %s directory. Copying might be slow if the directory is large!",
                path,
            )
            return "0"
        return size

    async def _human_readable_size(self, size: int) -> str:
        """
        returns folder size in a human readable format
        """
        if size == 0:
            return "0 Bytes"

        units = ["Bytes", "KB", "MB", "GB", "TB", "PB"]
        order = int(math.log2(size) / 10) if size else 0

        return f"{size / (1 << (order * 10)):.4g} {units[order]}"