Upload folder using huggingface_hub

d1ceb73 verified 11 months ago

47 kB

	"""A contents manager that uses the local file system for storage."""

	# Copyright (c) Jupyter Development Team.
	# Distributed under the terms of the Modified BSD License.
	from __future__ import annotations

	import asyncio
	import errno
	import math
	import mimetypes
	import os
	import platform
	import shutil
	import stat
	import subprocess
	import sys
	import typing as t
	import warnings
	from datetime import datetime
	from pathlib import Path

	import nbformat
	from anyio.to_thread import run_sync
	from jupyter_core.paths import exists, is_file_hidden, is_hidden
	from send2trash import send2trash
	from tornado import web
	from traitlets import Bool, Int, TraitError, Unicode, default, validate

	from jupyter_server import _tz as tz
	from jupyter_server.base.handlers import AuthenticatedFileHandler
	from jupyter_server.transutils import _i18n
	from jupyter_server.utils import to_api_path

	from .filecheckpoints import AsyncFileCheckpoints, FileCheckpoints
	from .fileio import AsyncFileManagerMixin, FileManagerMixin
	from .manager import AsyncContentsManager, ContentsManager, copy_pat

	try:
	from os.path import samefile
	except ImportError:
	# windows
	from jupyter_server.utils import samefile_simple as samefile # type:ignore[assignment]

	_script_exporter = None


	class FileContentsManager(FileManagerMixin, ContentsManager):
	"""A file contents manager."""

	root_dir = Unicode(config=True)

	max_copy_folder_size_mb = Int(500, config=True, help="The max folder size that can be copied")

	@default("root_dir")
	def _default_root_dir(self):
	if not self.parent:
	return os.getcwd()
	return self.parent.root_dir

	@validate("root_dir")
	def _validate_root_dir(self, proposal):
	value = proposal["value"]
	if not os.path.isabs(value):
	# If we receive a non-absolute path, make it absolute.
	value = os.path.abspath(value)
	if not os.path.isdir(value):
	raise TraitError("%r is not a directory" % value)
	return value

	@default("preferred_dir")
	def _default_preferred_dir(self):
	if not self.parent:
	return ""
	try:
	value = self.parent.preferred_dir
	if value == self.parent.root_dir:
	value = None
	except AttributeError:
	pass
	else:
	if value is not None:
	warnings.warn(
	"ServerApp.preferred_dir config is deprecated in jupyter-server 2.0. Use FileContentsManager.preferred_dir instead",
	FutureWarning,
	stacklevel=3,
	)
	try:
	path = Path(value)
	return path.relative_to(self.root_dir).as_posix()
	except ValueError:
	raise TraitError("%s is outside root contents directory" % value) from None
	return ""

	@validate("preferred_dir")
	def _validate_preferred_dir(self, proposal):
	# It should be safe to pass an API path through this method:
	proposal["value"] = to_api_path(proposal["value"], self.root_dir)
	return super()._validate_preferred_dir(proposal)

	@default("checkpoints_class")
	def _checkpoints_class_default(self):
	return FileCheckpoints

	delete_to_trash = Bool(
	True,
	config=True,
	help="""If True (default), deleting files will send them to the
	platform's trash/recycle bin, where they can be recovered. If False,
	deleting files really deletes them.""",
	)

	always_delete_dir = Bool(
	False,
	config=True,
	help="""If True, deleting a non-empty directory will always be allowed.
	WARNING this may result in files being permanently removed; e.g. on Windows,
	if the data size is too big for the trash/recycle bin the directory will be permanently
	deleted. If False (default), the non-empty directory will be sent to the trash only
	if safe. And if ``delete_to_trash`` is True, the directory won't be deleted.""",
	)

	@default("files_handler_class")
	def _files_handler_class_default(self):
	return AuthenticatedFileHandler

	@default("files_handler_params")
	def _files_handler_params_default(self):
	return {"path": self.root_dir}

	def is_hidden(self, path):
	"""Does the API style path correspond to a hidden directory or file?

	Parameters
	----------
	path : str
	The path to check. This is an API path (`/` separated,
	relative to root_dir).

	Returns
	-------
	hidden : bool
	Whether the path exists and is hidden.
	"""
	path = path.strip("/")
	os_path = self._get_os_path(path=path)
	return is_hidden(os_path, self.root_dir)

	def is_writable(self, path):
	"""Does the API style path correspond to a writable directory or file?

	Parameters
	----------
	path : str
	The path to check. This is an API path (`/` separated,
	relative to root_dir).

	Returns
	-------
	hidden : bool
	Whether the path exists and is writable.
	"""
	path = path.strip("/")
	os_path = self._get_os_path(path=path)
	try:
	return os.access(os_path, os.W_OK)
	except OSError:
	self.log.error("Failed to check write permissions on %s", os_path)
	return False

	def file_exists(self, path):
	"""Returns True if the file exists, else returns False.

	API-style wrapper for os.path.isfile

	Parameters
	----------
	path : str
	The relative path to the file (with '/' as separator)

	Returns
	-------
	exists : bool
	Whether the file exists.
	"""
	path = path.strip("/")
	os_path = self._get_os_path(path)
	return os.path.isfile(os_path)

	def dir_exists(self, path):
	"""Does the API-style path refer to an extant directory?

	API-style wrapper for os.path.isdir

	Parameters
	----------
	path : str
	The path to check. This is an API path (`/` separated,
	relative to root_dir).

	Returns
	-------
	exists : bool
	Whether the path is indeed a directory.
	"""
	path = path.strip("/")
	os_path = self._get_os_path(path=path)
	return os.path.isdir(os_path)

	def exists(self, path):
	"""Returns True if the path exists, else returns False.

	API-style wrapper for os.path.exists

	Parameters
	----------
	path : str
	The API path to the file (with '/' as separator)

	Returns
	-------
	exists : bool
	Whether the target exists.
	"""
	path = path.strip("/")
	os_path = self._get_os_path(path=path)
	return exists(os_path)

	def _base_model(self, path):
	"""Build the common base of a contents model"""
	os_path = self._get_os_path(path)
	info = os.lstat(os_path)

	four_o_four = "file or directory does not exist: %r" % path

	if not self.allow_hidden and is_hidden(os_path, self.root_dir):
	self.log.info("Refusing to serve hidden file or directory %r, via 404 Error", os_path)
	raise web.HTTPError(404, four_o_four)

	try:
	# size of file
	size = info.st_size
	except (ValueError, OSError):
	self.log.warning("Unable to get size.")
	size = None

	try:
	last_modified = tz.utcfromtimestamp(info.st_mtime)
	except (ValueError, OSError):
	# Files can rarely have an invalid timestamp
	# https://github.com/jupyter/notebook/issues/2539
	# https://github.com/jupyter/notebook/issues/2757
	# Use the Unix epoch as a fallback so we don't crash.
	self.log.warning("Invalid mtime %s for %s", info.st_mtime, os_path)
	last_modified = datetime(1970, 1, 1, 0, 0, tzinfo=tz.UTC)

	try:
	created = tz.utcfromtimestamp(info.st_ctime)
	except (ValueError, OSError): # See above
	self.log.warning("Invalid ctime %s for %s", info.st_ctime, os_path)
	created = datetime(1970, 1, 1, 0, 0, tzinfo=tz.UTC)

	# Create the base model.
	model = {}
	model["name"] = path.rsplit("/", 1)[-1]
	model["path"] = path
	model["last_modified"] = last_modified
	model["created"] = created
	model["content"] = None
	model["format"] = None
	model["mimetype"] = None
	model["size"] = size
	model["writable"] = self.is_writable(path)
	model["hash"] = None
	model["hash_algorithm"] = None

	return model

	def _dir_model(self, path, content=True):
	"""Build a model for a directory

	if content is requested, will include a listing of the directory
	"""
	os_path = self._get_os_path(path)

	four_o_four = "directory does not exist: %r" % path

	if not os.path.isdir(os_path):
	raise web.HTTPError(404, four_o_four)
	elif not self.allow_hidden and is_hidden(os_path, self.root_dir):
	self.log.info("Refusing to serve hidden directory %r, via 404 Error", os_path)
	raise web.HTTPError(404, four_o_four)

	model = self._base_model(path)
	model["type"] = "directory"
	model["size"] = None
	if content:
	model["content"] = contents = []
	os_dir = self._get_os_path(path)
	for name in os.listdir(os_dir):
	try:
	os_path = os.path.join(os_dir, name)
	except UnicodeDecodeError as e:
	self.log.warning("failed to decode filename '%s': %r", name, e)
	continue

	try:
	st = os.lstat(os_path)
	except OSError as e:
	# skip over broken symlinks in listing
	if e.errno == errno.ENOENT:
	self.log.warning("%s doesn't exist", os_path)
	elif e.errno != errno.EACCES: # Don't provide clues about protected files
	self.log.warning("Error stat-ing %s: %r", os_path, e)
	continue

	if (
	not stat.S_ISLNK(st.st_mode)
	and not stat.S_ISREG(st.st_mode)
	and not stat.S_ISDIR(st.st_mode)
	):
	self.log.debug("%s not a regular file", os_path)
	continue

	try:
	if self.should_list(name) and (
	self.allow_hidden or not is_file_hidden(os_path, stat_res=st)
	):
	contents.append(self.get(path=f"{path}/{name}", content=False))
	except OSError as e:
	# ELOOP: recursive symlink, also don't show failure due to permissions
	if e.errno not in [errno.ELOOP, errno.EACCES]:
	self.log.warning(
	"Unknown error checking if file %r is hidden",
	os_path,
	exc_info=True,
	)

	model["format"] = "json"

	return model

	def _file_model(self, path, content=True, format=None, require_hash=False):
	"""Build a model for a file

	if content is requested, include the file contents.

	format:
	If 'text', the contents will be decoded as UTF-8.
	If 'base64', the raw bytes contents will be encoded as base64.
	If not specified, try to decode as UTF-8, and fall back to base64

	if require_hash is true, the model will include 'hash'
	"""
	model = self._base_model(path)
	model["type"] = "file"

	os_path = self._get_os_path(path)
	model["mimetype"] = mimetypes.guess_type(os_path)[0]

	bytes_content = None
	if content:
	content, format, bytes_content = self._read_file(os_path, format, raw=True) # type: ignore[misc]
	if model["mimetype"] is None:
	default_mime = {
	"text": "text/plain",
	"base64": "application/octet-stream",
	}[format]
	model["mimetype"] = default_mime

	model.update(
	content=content,
	format=format,
	)

	if require_hash:
	if bytes_content is None:
	bytes_content, _ = self._read_file(os_path, "byte") # type: ignore[assignment,misc]
	model.update(**self._get_hash(bytes_content)) # type: ignore[arg-type]

	return model

	def _notebook_model(self, path, content=True, require_hash=False):
	"""Build a notebook model

	if content is requested, the notebook content will be populated
	as a JSON structure (not double-serialized)

	if require_hash is true, the model will include 'hash'
	"""
	model = self._base_model(path)
	model["type"] = "notebook"
	os_path = self._get_os_path(path)

	bytes_content = None
	if content:
	validation_error: dict[str, t.Any] = {}
	nb, bytes_content = self._read_notebook(
	os_path, as_version=4, capture_validation_error=validation_error, raw=True
	)
	self.mark_trusted_cells(nb, path)
	model["content"] = nb
	model["format"] = "json"
	self.validate_notebook_model(model, validation_error)

	if require_hash:
	if bytes_content is None:
	bytes_content, _ = self._read_file(os_path, "byte") # type: ignore[misc]
	model.update(**self._get_hash(bytes_content)) # type: ignore[arg-type]

	return model

	def get(self, path, content=True, type=None, format=None, require_hash=False):
	"""Takes a path for an entity and returns its model

	Parameters
	----------
	path : str
	the API path that describes the relative path for the target
	content : bool
	Whether to include the contents in the reply
	type : str, optional
	The requested type - 'file', 'notebook', or 'directory'.
	Will raise HTTPError 400 if the content doesn't match.
	format : str, optional
	The requested format for file contents. 'text' or 'base64'.
	Ignored if this returns a notebook or directory model.
	require_hash: bool, optional
	Whether to include the hash of the file contents.

	Returns
	-------
	model : dict
	the contents model. If content=True, returns the contents
	of the file or directory as well.
	"""
	path = path.strip("/")
	os_path = self._get_os_path(path)
	four_o_four = "file or directory does not exist: %r" % path

	if not self.exists(path):
	raise web.HTTPError(404, four_o_four)

	if not self.allow_hidden and is_hidden(os_path, self.root_dir):
	self.log.info("Refusing to serve hidden file or directory %r, via 404 Error", os_path)
	raise web.HTTPError(404, four_o_four)

	if os.path.isdir(os_path):
	if type not in (None, "directory"):
	raise web.HTTPError(
	400,
	f"{path} is a directory, not a {type}",
	reason="bad type",
	)
	model = self._dir_model(path, content=content)
	elif type == "notebook" or (type is None and path.endswith(".ipynb")):
	model = self._notebook_model(path, content=content, require_hash=require_hash)
	else:
	if type == "directory":
	raise web.HTTPError(400, "%s is not a directory" % path, reason="bad type")
	model = self._file_model(
	path, content=content, format=format, require_hash=require_hash
	)
	self.emit(data={"action": "get", "path": path})
	return model

	def _save_directory(self, os_path, model, path=""):
	"""create a directory"""
	if not self.allow_hidden and is_hidden(os_path, self.root_dir):
	raise web.HTTPError(400, "Cannot create directory %r" % os_path)
	if not os.path.exists(os_path):
	with self.perm_to_403():
	os.mkdir(os_path)
	elif not os.path.isdir(os_path):
	raise web.HTTPError(400, "Not a directory: %s" % (os_path))
	else:
	self.log.debug("Directory %r already exists", os_path)

	def save(self, model, path=""):
	"""Save the file model and return the model with no content."""
	path = path.strip("/")

	self.run_pre_save_hooks(model=model, path=path)

	if "type" not in model:
	raise web.HTTPError(400, "No file type provided")
	if "content" not in model and model["type"] != "directory":
	raise web.HTTPError(400, "No file content provided")
	os_path = self._get_os_path(path)

	if not self.allow_hidden and is_hidden(os_path, self.root_dir):
	raise web.HTTPError(400, f"Cannot create file or directory {os_path!r}")

	self.log.debug("Saving %s", os_path)

	validation_error: dict[str, t.Any] = {}
	try:
	if model["type"] == "notebook":
	nb = nbformat.from_dict(model["content"])
	self.check_and_sign(nb, path)
	self._save_notebook(os_path, nb, capture_validation_error=validation_error)
	# One checkpoint should always exist for notebooks.
	if not self.checkpoints.list_checkpoints(path):
	self.create_checkpoint(path)
	elif model["type"] == "file":
	# Missing format will be handled internally by _save_file.
	self._save_file(os_path, model["content"], model.get("format"))
	elif model["type"] == "directory":
	self._save_directory(os_path, model, path)
	else:
	raise web.HTTPError(400, "Unhandled contents type: %s" % model["type"])
	except web.HTTPError:
	raise
	except Exception as e:
	self.log.error("Error while saving file: %s %s", path, e, exc_info=True)
	raise web.HTTPError(500, f"Unexpected error while saving file: {path} {e}") from e

	validation_message = None
	if model["type"] == "notebook":
	self.validate_notebook_model(model, validation_error=validation_error)
	validation_message = model.get("message", None)

	model = self.get(path, content=False)
	if validation_message:
	model["message"] = validation_message

	self.run_post_save_hooks(model=model, os_path=os_path)
	self.emit(data={"action": "save", "path": path})
	return model

	def delete_file(self, path):
	"""Delete file at path."""
	path = path.strip("/")
	os_path = self._get_os_path(path)
	rm = os.unlink

	if not self.allow_hidden and is_hidden(os_path, self.root_dir):
	raise web.HTTPError(400, f"Cannot delete file or directory {os_path!r}")

	four_o_four = "file or directory does not exist: %r" % path
	if not self.exists(path):
	raise web.HTTPError(404, four_o_four)

	def is_non_empty_dir(os_path):
	if os.path.isdir(os_path):
	# A directory containing only leftover checkpoints is
	# considered empty.
	cp_dir = getattr(self.checkpoints, "checkpoint_dir", None)
	if set(os.listdir(os_path)) - {cp_dir}:
	return True

	return False

	if self.delete_to_trash:
	if not self.always_delete_dir and sys.platform == "win32" and is_non_empty_dir(os_path):
	# send2trash can really delete files on Windows, so disallow
	# deleting non-empty files. See Github issue 3631.
	raise web.HTTPError(400, "Directory %s not empty" % os_path)
	# send2trash now supports deleting directories. see #1290
	if not self.is_writable(path):
	raise web.HTTPError(403, "Permission denied: %s" % path) from None
	self.log.debug("Sending %s to trash", os_path)
	try:
	send2trash(os_path)
	except OSError as e:
	raise web.HTTPError(400, "send2trash failed: %s" % e) from e
	return

	if os.path.isdir(os_path):
	# Don't permanently delete non-empty directories.
	if not self.always_delete_dir and is_non_empty_dir(os_path):
	raise web.HTTPError(400, "Directory %s not empty" % os_path)
	self.log.debug("Removing directory %s", os_path)
	with self.perm_to_403():
	shutil.rmtree(os_path)
	else:
	self.log.debug("Unlinking file %s", os_path)
	with self.perm_to_403():
	rm(os_path)

	def rename_file(self, old_path, new_path):
	"""Rename a file."""
	old_path = old_path.strip("/")
	new_path = new_path.strip("/")
	if new_path == old_path:
	return

	new_os_path = self._get_os_path(new_path)
	old_os_path = self._get_os_path(old_path)

	if not self.allow_hidden and (
	is_hidden(old_os_path, self.root_dir) or is_hidden(new_os_path, self.root_dir)
	):
	raise web.HTTPError(400, f"Cannot rename file or directory {old_os_path!r}")

	# Should we proceed with the move?
	if os.path.exists(new_os_path) and not samefile(old_os_path, new_os_path):
	raise web.HTTPError(409, "File already exists: %s" % new_path)

	# Move the file
	try:
	with self.perm_to_403():
	shutil.move(old_os_path, new_os_path)
	except web.HTTPError:
	raise
	except Exception as e:
	raise web.HTTPError(500, f"Unknown error renaming file: {old_path} {e}") from e

	def info_string(self):
	"""Get the information string for the manager."""
	return _i18n("Serving notebooks from local directory: %s") % self.root_dir

	def get_kernel_path(self, path, model=None):
	"""Return the initial API path of a kernel associated with a given notebook"""
	if self.dir_exists(path):
	return path
	parent_dir = path.rsplit("/", 1)[0] if "/" in path else ""
	return parent_dir

	def copy(self, from_path, to_path=None):
	"""
	Copy an existing file or directory and return its new model.
	If to_path not specified, it will be the parent directory of from_path.
	If copying a file and to_path is a directory, filename/directoryname will increment `from_path-Copy#.ext`.
	Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`.
	For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot.
	from_path must be a full path to a file or directory.
	"""
	to_path_original = str(to_path)
	path = from_path.strip("/")
	if to_path is not None:
	to_path = to_path.strip("/")

	if "/" in path:
	from_dir, from_name = path.rsplit("/", 1)
	else:
	from_dir = ""
	from_name = path

	model = self.get(path)
	# limit the size of folders being copied to prevent a timeout error
	if model["type"] == "directory":
	self.check_folder_size(path)
	else:
	# let the super class handle copying files
	return super().copy(from_path=from_path, to_path=to_path)

	is_destination_specified = to_path is not None
	to_name = copy_pat.sub(".", from_name)
	if not is_destination_specified:
	to_path = from_dir
	if self.dir_exists(to_path):
	name = copy_pat.sub(".", from_name)
	to_name = super().increment_filename(name, to_path, insert="-Copy")
	to_path = f"{to_path}/{to_name}"

	return self._copy_dir(
	from_path=from_path,
	to_path_original=to_path_original,
	to_name=to_name,
	to_path=to_path,
	)

	def _copy_dir(self, from_path, to_path_original, to_name, to_path):
	"""
	handles copying directories
	returns the model for the copied directory
	"""
	try:
	os_from_path = self._get_os_path(from_path.strip("/"))
	os_to_path = f'{self._get_os_path(to_path_original.strip("/"))}/{to_name}'
	shutil.copytree(os_from_path, os_to_path)
	model = self.get(to_path, content=False)
	except OSError as err:
	self.log.error(f"OSError in _copy_dir: {err}")
	raise web.HTTPError(
	400,
	f"Can't copy '{from_path}' into Folder '{to_path}'",
	) from err

	return model

	def check_folder_size(self, path):
	"""
	limit the size of folders being copied to be no more than the
	trait max_copy_folder_size_mb to prevent a timeout error
	"""
	limit_bytes = self.max_copy_folder_size_mb * 1024 * 1024
	size = int(self._get_dir_size(self._get_os_path(path)))
	# convert from KB to Bytes for macOS
	size = size * 1024 if platform.system() == "Darwin" else size

	if size > limit_bytes:
	raise web.HTTPError(
	400,
	f"""
	Can't copy folders larger than {self.max_copy_folder_size_mb}MB,
	"{path}" is {self._human_readable_size(size)}
	""",
	)

	def _get_dir_size(self, path="."):
	"""
	calls the command line program du to get the directory size
	"""
	try:
	if platform.system() == "Darwin":
	# returns the size of the folder in KB
	result = subprocess.run(
	["du", "-sk", path], # noqa: S607
	capture_output=True,
	check=True,
	).stdout.split()
	else:
	result = subprocess.run(
	["du", "-s", "--block-size=1", path], # noqa: S607
	capture_output=True,
	check=True,
	).stdout.split()

	self.log.info(f"current status of du command {result}")
	size = result[0].decode("utf-8")
	except Exception:
	self.log.warning(
	"Not able to get the size of the %s directory. Copying might be slow if the directory is large!",
	path,
	)
	return "0"
	return size

	def _human_readable_size(self, size):
	"""
	returns folder size in a human readable format
	"""
	if size == 0:
	return "0 Bytes"

	units = ["Bytes", "KB", "MB", "GB", "TB", "PB"]
	order = int(math.log2(size) / 10) if size else 0

	return f"{size / (1 << (order * 10)):.4g} {units[order]}"


	class AsyncFileContentsManager(FileContentsManager, AsyncFileManagerMixin, AsyncContentsManager):
	"""An async file contents manager."""

	@default("checkpoints_class")
	def _checkpoints_class_default(self):
	return AsyncFileCheckpoints

	async def _dir_model(self, path, content=True):
	"""Build a model for a directory

	if content is requested, will include a listing of the directory
	"""
	os_path = self._get_os_path(path)

	four_o_four = "directory does not exist: %r" % path

	if not os.path.isdir(os_path):
	raise web.HTTPError(404, four_o_four)
	elif not self.allow_hidden and is_hidden(os_path, self.root_dir):
	self.log.info("Refusing to serve hidden directory %r, via 404 Error", os_path)
	raise web.HTTPError(404, four_o_four)

	model = self._base_model(path)
	model["type"] = "directory"
	model["size"] = None
	if content:
	model["content"] = contents = []
	os_dir = self._get_os_path(path)
	dir_contents = await run_sync(os.listdir, os_dir)
	for name in dir_contents:
	try:
	os_path = os.path.join(os_dir, name)
	except UnicodeDecodeError as e:
	self.log.warning("failed to decode filename '%s': %r", name, e)
	continue

	try:
	st = await run_sync(os.lstat, os_path)
	except OSError as e:
	# skip over broken symlinks in listing
	if e.errno == errno.ENOENT:
	self.log.warning("%s doesn't exist", os_path)
	elif e.errno != errno.EACCES: # Don't provide clues about protected files
	self.log.warning("Error stat-ing %s: %r", os_path, e)
	continue

	if (
	not stat.S_ISLNK(st.st_mode)
	and not stat.S_ISREG(st.st_mode)
	and not stat.S_ISDIR(st.st_mode)
	):
	self.log.debug("%s not a regular file", os_path)
	continue

	try:
	if self.should_list(name) and (
	self.allow_hidden or not is_file_hidden(os_path, stat_res=st)
	):
	contents.append(await self.get(path=f"{path}/{name}", content=False))
	except OSError as e:
	# ELOOP: recursive symlink, also don't show failure due to permissions
	if e.errno not in [errno.ELOOP, errno.EACCES]:
	self.log.warning(
	"Unknown error checking if file %r is hidden",
	os_path,
	exc_info=True,
	)

	model["format"] = "json"

	return model

	async def _file_model(self, path, content=True, format=None, require_hash=False):
	"""Build a model for a file

	if content is requested, include the file contents.

	format:
	If 'text', the contents will be decoded as UTF-8.
	If 'base64', the raw bytes contents will be encoded as base64.
	If not specified, try to decode as UTF-8, and fall back to base64

	if require_hash is true, the model will include 'hash'
	"""
	model = self._base_model(path)
	model["type"] = "file"

	os_path = self._get_os_path(path)
	model["mimetype"] = mimetypes.guess_type(os_path)[0]

	bytes_content = None
	if content:
	content, format, bytes_content = await self._read_file(os_path, format, raw=True) # type: ignore[misc]
	if model["mimetype"] is None:
	default_mime = {
	"text": "text/plain",
	"base64": "application/octet-stream",
	}[format]
	model["mimetype"] = default_mime

	model.update(
	content=content,
	format=format,
	)

	if require_hash:
	if bytes_content is None:
	bytes_content, _ = await self._read_file(os_path, "byte") # type: ignore[assignment,misc]
	model.update(**self._get_hash(bytes_content)) # type: ignore[arg-type]

	return model

	async def _notebook_model(self, path, content=True, require_hash=False):
	"""Build a notebook model

	if content is requested, the notebook content will be populated
	as a JSON structure (not double-serialized)
	"""
	model = self._base_model(path)
	model["type"] = "notebook"
	os_path = self._get_os_path(path)

	bytes_content = None
	if content:
	validation_error: dict[str, t.Any] = {}
	nb, bytes_content = await self._read_notebook(
	os_path, as_version=4, capture_validation_error=validation_error, raw=True
	)
	self.mark_trusted_cells(nb, path)
	model["content"] = nb
	model["format"] = "json"
	self.validate_notebook_model(model, validation_error)

	if require_hash:
	if bytes_content is None:
	bytes_content, _ = await self._read_file(os_path, "byte") # type: ignore[misc]
	model.update(**(self._get_hash(bytes_content))) # type: ignore[arg-type]

	return model

	async def get(self, path, content=True, type=None, format=None, require_hash=False):
	"""Takes a path for an entity and returns its model

	Parameters
	----------
	path : str
	the API path that describes the relative path for the target
	content : bool
	Whether to include the contents in the reply
	type : str, optional
	The requested type - 'file', 'notebook', or 'directory'.
	Will raise HTTPError 400 if the content doesn't match.
	format : str, optional
	The requested format for file contents. 'text' or 'base64'.
	Ignored if this returns a notebook or directory model.
	require_hash: bool, optional
	Whether to include the hash of the file contents.

	Returns
	-------
	model : dict
	the contents model. If content=True, returns the contents
	of the file or directory as well.
	"""
	path = path.strip("/")

	if not self.exists(path):
	raise web.HTTPError(404, "No such file or directory: %s" % path)

	os_path = self._get_os_path(path)
	if os.path.isdir(os_path):
	if type not in (None, "directory"):
	raise web.HTTPError(
	400,
	f"{path} is a directory, not a {type}",
	reason="bad type",
	)
	model = await self._dir_model(path, content=content)
	elif type == "notebook" or (type is None and path.endswith(".ipynb")):
	model = await self._notebook_model(path, content=content, require_hash=require_hash)
	else:
	if type == "directory":
	raise web.HTTPError(400, "%s is not a directory" % path, reason="bad type")
	model = await self._file_model(
	path, content=content, format=format, require_hash=require_hash
	)
	self.emit(data={"action": "get", "path": path})
	return model

	async def _save_directory(self, os_path, model, path=""):
	"""create a directory"""
	if not self.allow_hidden and is_hidden(os_path, self.root_dir):
	raise web.HTTPError(400, "Cannot create hidden directory %r" % os_path)
	if not os.path.exists(os_path):
	with self.perm_to_403():
	await run_sync(os.mkdir, os_path)
	elif not os.path.isdir(os_path):
	raise web.HTTPError(400, "Not a directory: %s" % (os_path))
	else:
	self.log.debug("Directory %r already exists", os_path)

	async def save(self, model, path=""):
	"""Save the file model and return the model with no content."""
	path = path.strip("/")

	self.run_pre_save_hooks(model=model, path=path)

	if "type" not in model:
	raise web.HTTPError(400, "No file type provided")
	if "content" not in model and model["type"] != "directory":
	raise web.HTTPError(400, "No file content provided")

	os_path = self._get_os_path(path)
	self.log.debug("Saving %s", os_path)

	validation_error: dict[str, t.Any] = {}
	try:
	if model["type"] == "notebook":
	nb = nbformat.from_dict(model["content"])
	self.check_and_sign(nb, path)
	await self._save_notebook(os_path, nb, capture_validation_error=validation_error)
	# One checkpoint should always exist for notebooks.
	if not (await self.checkpoints.list_checkpoints(path)):
	await self.create_checkpoint(path)
	elif model["type"] == "file":
	# Missing format will be handled internally by _save_file.
	await self._save_file(os_path, model["content"], model.get("format"))
	elif model["type"] == "directory":
	await self._save_directory(os_path, model, path)
	else:
	raise web.HTTPError(400, "Unhandled contents type: %s" % model["type"])
	except web.HTTPError:
	raise
	except Exception as e:
	self.log.error("Error while saving file: %s %s", path, e, exc_info=True)
	raise web.HTTPError(500, f"Unexpected error while saving file: {path} {e}") from e

	validation_message = None
	if model["type"] == "notebook":
	self.validate_notebook_model(model, validation_error=validation_error)
	validation_message = model.get("message", None)

	model = await self.get(path, content=False)
	if validation_message:
	model["message"] = validation_message

	self.run_post_save_hooks(model=model, os_path=os_path)
	self.emit(data={"action": "save", "path": path})
	return model

	async def delete_file(self, path):
	"""Delete file at path."""
	path = path.strip("/")
	os_path = self._get_os_path(path)
	rm = os.unlink

	if not self.allow_hidden and is_hidden(os_path, self.root_dir):
	raise web.HTTPError(400, f"Cannot delete file or directory {os_path!r}")

	if not os.path.exists(os_path):
	raise web.HTTPError(404, "File or directory does not exist: %s" % os_path)

	async def is_non_empty_dir(os_path):
	if os.path.isdir(os_path):
	# A directory containing only leftover checkpoints is
	# considered empty.
	cp_dir = getattr(self.checkpoints, "checkpoint_dir", None)
	dir_contents = set(await run_sync(os.listdir, os_path))
	if dir_contents - {cp_dir}:
	return True

	return False

	if self.delete_to_trash:
	if (
	not self.always_delete_dir
	and sys.platform == "win32"
	and await is_non_empty_dir(os_path)
	):
	# send2trash can really delete files on Windows, so disallow
	# deleting non-empty files. See Github issue 3631.
	raise web.HTTPError(400, "Directory %s not empty" % os_path)
	# send2trash now supports deleting directories. see #1290
	if not self.is_writable(path):
	raise web.HTTPError(403, "Permission denied: %s" % path) from None
	self.log.debug("Sending %s to trash", os_path)
	try:
	send2trash(os_path)
	except OSError as e:
	raise web.HTTPError(400, "send2trash failed: %s" % e) from e
	return

	if os.path.isdir(os_path):
	# Don't permanently delete non-empty directories.
	if not self.always_delete_dir and await is_non_empty_dir(os_path):
	raise web.HTTPError(400, "Directory %s not empty" % os_path)
	self.log.debug("Removing directory %s", os_path)
	with self.perm_to_403():
	await run_sync(shutil.rmtree, os_path)
	else:
	self.log.debug("Unlinking file %s", os_path)
	with self.perm_to_403():
	await run_sync(rm, os_path)

	async def rename_file(self, old_path, new_path):
	"""Rename a file."""
	old_path = old_path.strip("/")
	new_path = new_path.strip("/")
	if new_path == old_path:
	return

	new_os_path = self._get_os_path(new_path)
	old_os_path = self._get_os_path(old_path)

	if not self.allow_hidden and (
	is_hidden(old_os_path, self.root_dir) or is_hidden(new_os_path, self.root_dir)
	):
	raise web.HTTPError(400, f"Cannot rename file or directory {old_os_path!r}")

	# Should we proceed with the move?
	if os.path.exists(new_os_path) and not samefile(old_os_path, new_os_path):
	raise web.HTTPError(409, "File already exists: %s" % new_path)

	# Move the file
	try:
	with self.perm_to_403():
	await run_sync(shutil.move, old_os_path, new_os_path)
	except web.HTTPError:
	raise
	except Exception as e:
	raise web.HTTPError(500, f"Unknown error renaming file: {old_path} {e}") from e

	async def dir_exists(self, path):
	"""Does a directory exist at the given path"""
	path = path.strip("/")
	os_path = self._get_os_path(path=path)
	return os.path.isdir(os_path)

	async def file_exists(self, path):
	"""Does a file exist at the given path"""
	path = path.strip("/")
	os_path = self._get_os_path(path)
	return os.path.isfile(os_path)

	async def is_hidden(self, path):
	"""Is path a hidden directory or file"""
	path = path.strip("/")
	os_path = self._get_os_path(path=path)
	return is_hidden(os_path, self.root_dir)

	async def get_kernel_path(self, path, model=None):
	"""Return the initial API path of a kernel associated with a given notebook"""
	if await self.dir_exists(path):
	return path
	parent_dir = path.rsplit("/", 1)[0] if "/" in path else ""
	return parent_dir

	async def copy(self, from_path, to_path=None):
	"""
	Copy an existing file or directory and return its new model.
	If to_path not specified, it will be the parent directory of from_path.
	If copying a file and to_path is a directory, filename/directoryname will increment `from_path-Copy#.ext`.
	Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`.
	For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot.
	from_path must be a full path to a file or directory.
	"""
	to_path_original = str(to_path)
	path = from_path.strip("/")
	if to_path is not None:
	to_path = to_path.strip("/")

	if "/" in path:
	from_dir, from_name = path.rsplit("/", 1)
	else:
	from_dir = ""
	from_name = path

	model = await self.get(path)
	# limit the size of folders being copied to prevent a timeout error
	if model["type"] == "directory":
	await self.check_folder_size(path)
	else:
	# let the super class handle copying files
	return await AsyncContentsManager.copy(self, from_path=from_path, to_path=to_path)

	is_destination_specified = to_path is not None
	to_name = copy_pat.sub(".", from_name)
	if not is_destination_specified:
	to_path = from_dir
	if await self.dir_exists(to_path):
	name = copy_pat.sub(".", from_name)
	to_name = await super().increment_filename(name, to_path, insert="-Copy")
	to_path = f"{to_path}/{to_name}"

	return await self._copy_dir(
	from_path=from_path,
	to_path_original=to_path_original,
	to_name=to_name,
	to_path=to_path,
	)

	async def _copy_dir(
	self, from_path: str, to_path_original: str, to_name: str, to_path: str
	) -> dict[str, t.Any]:
	"""
	handles copying directories
	returns the model for the copied directory
	"""
	try:
	os_from_path = self._get_os_path(from_path.strip("/"))
	os_to_path = f'{self._get_os_path(to_path_original.strip("/"))}/{to_name}'
	shutil.copytree(os_from_path, os_to_path)
	model = await self.get(to_path, content=False)
	except OSError as err:
	self.log.error(f"OSError in _copy_dir: {err}")
	raise web.HTTPError(
	400,
	f"Can't copy '{from_path}' into read-only Folder '{to_path}'",
	) from err

	return model # type:ignore[no-any-return]

	async def check_folder_size(self, path: str) -> None:
	"""
	limit the size of folders being copied to be no more than the
	trait max_copy_folder_size_mb to prevent a timeout error
	"""
	limit_bytes = self.max_copy_folder_size_mb * 1024 * 1024

	size = int(await self._get_dir_size(self._get_os_path(path)))
	# convert from KB to Bytes for macOS
	size = size * 1024 if platform.system() == "Darwin" else size
	if size > limit_bytes:
	raise web.HTTPError(
	400,
	f"""
	Can't copy folders larger than {self.max_copy_folder_size_mb}MB,
	"{path}" is {await self._human_readable_size(size)}
	""",
	)

	async def _get_dir_size(self, path: str = ".") -> str:
	"""
	calls the command line program du to get the directory size
	"""
	try:
	if platform.system() == "Darwin":
	# returns the size of the folder in KB
	args = ["-sk", path]
	else:
	args = ["-s", "--block-size=1", path]
	proc = await asyncio.create_subprocess_exec(
	"du", *args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
	)

	stdout, _ = await proc.communicate()
	result = await proc.wait()
	self.log.info(f"current status of du command {result}")
	assert result == 0
	size = stdout.decode("utf-8").split()[0]
	except Exception:
	self.log.warning(
	"Not able to get the size of the %s directory. Copying might be slow if the directory is large!",
	path,
	)
	return "0"
	return size

	async def _human_readable_size(self, size: int) -> str:
	"""
	returns folder size in a human readable format
	"""
	if size == 0:
	return "0 Bytes"

	units = ["Bytes", "KB", "MB", "GB", "TB", "PB"]
	order = int(math.log2(size) / 10) if size else 0

	return f"{size / (1 << (order * 10)):.4g} {units[order]}"