Llama-3.1-8B-DALv0.1
/
venv
/lib
/python3.12
/site-packages
/jupyter_server
/services
/contents
/filemanager.py
"""A contents manager that uses the local file system for storage.""" | |
# Copyright (c) Jupyter Development Team. | |
# Distributed under the terms of the Modified BSD License. | |
from __future__ import annotations | |
import asyncio | |
import errno | |
import math | |
import mimetypes | |
import os | |
import platform | |
import shutil | |
import stat | |
import subprocess | |
import sys | |
import typing as t | |
import warnings | |
from datetime import datetime | |
from pathlib import Path | |
import nbformat | |
from anyio.to_thread import run_sync | |
from jupyter_core.paths import exists, is_file_hidden, is_hidden | |
from send2trash import send2trash | |
from tornado import web | |
from traitlets import Bool, Int, TraitError, Unicode, default, validate | |
from jupyter_server import _tz as tz | |
from jupyter_server.base.handlers import AuthenticatedFileHandler | |
from jupyter_server.transutils import _i18n | |
from jupyter_server.utils import to_api_path | |
from .filecheckpoints import AsyncFileCheckpoints, FileCheckpoints | |
from .fileio import AsyncFileManagerMixin, FileManagerMixin | |
from .manager import AsyncContentsManager, ContentsManager, copy_pat | |
try: | |
from os.path import samefile | |
except ImportError: | |
# windows | |
from jupyter_server.utils import samefile_simple as samefile # type:ignore[assignment] | |
_script_exporter = None | |
class FileContentsManager(FileManagerMixin, ContentsManager): | |
"""A file contents manager.""" | |
root_dir = Unicode(config=True) | |
max_copy_folder_size_mb = Int(500, config=True, help="The max folder size that can be copied") | |
def _default_root_dir(self): | |
if not self.parent: | |
return os.getcwd() | |
return self.parent.root_dir | |
def _validate_root_dir(self, proposal): | |
value = proposal["value"] | |
if not os.path.isabs(value): | |
# If we receive a non-absolute path, make it absolute. | |
value = os.path.abspath(value) | |
if not os.path.isdir(value): | |
raise TraitError("%r is not a directory" % value) | |
return value | |
def _default_preferred_dir(self): | |
if not self.parent: | |
return "" | |
try: | |
value = self.parent.preferred_dir | |
if value == self.parent.root_dir: | |
value = None | |
except AttributeError: | |
pass | |
else: | |
if value is not None: | |
warnings.warn( | |
"ServerApp.preferred_dir config is deprecated in jupyter-server 2.0. Use FileContentsManager.preferred_dir instead", | |
FutureWarning, | |
stacklevel=3, | |
) | |
try: | |
path = Path(value) | |
return path.relative_to(self.root_dir).as_posix() | |
except ValueError: | |
raise TraitError("%s is outside root contents directory" % value) from None | |
return "" | |
def _validate_preferred_dir(self, proposal): | |
# It should be safe to pass an API path through this method: | |
proposal["value"] = to_api_path(proposal["value"], self.root_dir) | |
return super()._validate_preferred_dir(proposal) | |
def _checkpoints_class_default(self): | |
return FileCheckpoints | |
delete_to_trash = Bool( | |
True, | |
config=True, | |
help="""If True (default), deleting files will send them to the | |
platform's trash/recycle bin, where they can be recovered. If False, | |
deleting files really deletes them.""", | |
) | |
always_delete_dir = Bool( | |
False, | |
config=True, | |
help="""If True, deleting a non-empty directory will always be allowed. | |
WARNING this may result in files being permanently removed; e.g. on Windows, | |
if the data size is too big for the trash/recycle bin the directory will be permanently | |
deleted. If False (default), the non-empty directory will be sent to the trash only | |
if safe. And if ``delete_to_trash`` is True, the directory won't be deleted.""", | |
) | |
def _files_handler_class_default(self): | |
return AuthenticatedFileHandler | |
def _files_handler_params_default(self): | |
return {"path": self.root_dir} | |
def is_hidden(self, path): | |
"""Does the API style path correspond to a hidden directory or file? | |
Parameters | |
---------- | |
path : str | |
The path to check. This is an API path (`/` separated, | |
relative to root_dir). | |
Returns | |
------- | |
hidden : bool | |
Whether the path exists and is hidden. | |
""" | |
path = path.strip("/") | |
os_path = self._get_os_path(path=path) | |
return is_hidden(os_path, self.root_dir) | |
def is_writable(self, path): | |
"""Does the API style path correspond to a writable directory or file? | |
Parameters | |
---------- | |
path : str | |
The path to check. This is an API path (`/` separated, | |
relative to root_dir). | |
Returns | |
------- | |
hidden : bool | |
Whether the path exists and is writable. | |
""" | |
path = path.strip("/") | |
os_path = self._get_os_path(path=path) | |
try: | |
return os.access(os_path, os.W_OK) | |
except OSError: | |
self.log.error("Failed to check write permissions on %s", os_path) | |
return False | |
def file_exists(self, path): | |
"""Returns True if the file exists, else returns False. | |
API-style wrapper for os.path.isfile | |
Parameters | |
---------- | |
path : str | |
The relative path to the file (with '/' as separator) | |
Returns | |
------- | |
exists : bool | |
Whether the file exists. | |
""" | |
path = path.strip("/") | |
os_path = self._get_os_path(path) | |
return os.path.isfile(os_path) | |
def dir_exists(self, path): | |
"""Does the API-style path refer to an extant directory? | |
API-style wrapper for os.path.isdir | |
Parameters | |
---------- | |
path : str | |
The path to check. This is an API path (`/` separated, | |
relative to root_dir). | |
Returns | |
------- | |
exists : bool | |
Whether the path is indeed a directory. | |
""" | |
path = path.strip("/") | |
os_path = self._get_os_path(path=path) | |
return os.path.isdir(os_path) | |
def exists(self, path): | |
"""Returns True if the path exists, else returns False. | |
API-style wrapper for os.path.exists | |
Parameters | |
---------- | |
path : str | |
The API path to the file (with '/' as separator) | |
Returns | |
------- | |
exists : bool | |
Whether the target exists. | |
""" | |
path = path.strip("/") | |
os_path = self._get_os_path(path=path) | |
return exists(os_path) | |
def _base_model(self, path): | |
"""Build the common base of a contents model""" | |
os_path = self._get_os_path(path) | |
info = os.lstat(os_path) | |
four_o_four = "file or directory does not exist: %r" % path | |
if not self.allow_hidden and is_hidden(os_path, self.root_dir): | |
self.log.info("Refusing to serve hidden file or directory %r, via 404 Error", os_path) | |
raise web.HTTPError(404, four_o_four) | |
try: | |
# size of file | |
size = info.st_size | |
except (ValueError, OSError): | |
self.log.warning("Unable to get size.") | |
size = None | |
try: | |
last_modified = tz.utcfromtimestamp(info.st_mtime) | |
except (ValueError, OSError): | |
# Files can rarely have an invalid timestamp | |
# https://github.com/jupyter/notebook/issues/2539 | |
# https://github.com/jupyter/notebook/issues/2757 | |
# Use the Unix epoch as a fallback so we don't crash. | |
self.log.warning("Invalid mtime %s for %s", info.st_mtime, os_path) | |
last_modified = datetime(1970, 1, 1, 0, 0, tzinfo=tz.UTC) | |
try: | |
created = tz.utcfromtimestamp(info.st_ctime) | |
except (ValueError, OSError): # See above | |
self.log.warning("Invalid ctime %s for %s", info.st_ctime, os_path) | |
created = datetime(1970, 1, 1, 0, 0, tzinfo=tz.UTC) | |
# Create the base model. | |
model = {} | |
model["name"] = path.rsplit("/", 1)[-1] | |
model["path"] = path | |
model["last_modified"] = last_modified | |
model["created"] = created | |
model["content"] = None | |
model["format"] = None | |
model["mimetype"] = None | |
model["size"] = size | |
model["writable"] = self.is_writable(path) | |
model["hash"] = None | |
model["hash_algorithm"] = None | |
return model | |
def _dir_model(self, path, content=True): | |
"""Build a model for a directory | |
if content is requested, will include a listing of the directory | |
""" | |
os_path = self._get_os_path(path) | |
four_o_four = "directory does not exist: %r" % path | |
if not os.path.isdir(os_path): | |
raise web.HTTPError(404, four_o_four) | |
elif not self.allow_hidden and is_hidden(os_path, self.root_dir): | |
self.log.info("Refusing to serve hidden directory %r, via 404 Error", os_path) | |
raise web.HTTPError(404, four_o_four) | |
model = self._base_model(path) | |
model["type"] = "directory" | |
model["size"] = None | |
if content: | |
model["content"] = contents = [] | |
os_dir = self._get_os_path(path) | |
for name in os.listdir(os_dir): | |
try: | |
os_path = os.path.join(os_dir, name) | |
except UnicodeDecodeError as e: | |
self.log.warning("failed to decode filename '%s': %r", name, e) | |
continue | |
try: | |
st = os.lstat(os_path) | |
except OSError as e: | |
# skip over broken symlinks in listing | |
if e.errno == errno.ENOENT: | |
self.log.warning("%s doesn't exist", os_path) | |
elif e.errno != errno.EACCES: # Don't provide clues about protected files | |
self.log.warning("Error stat-ing %s: %r", os_path, e) | |
continue | |
if ( | |
not stat.S_ISLNK(st.st_mode) | |
and not stat.S_ISREG(st.st_mode) | |
and not stat.S_ISDIR(st.st_mode) | |
): | |
self.log.debug("%s not a regular file", os_path) | |
continue | |
try: | |
if self.should_list(name) and ( | |
self.allow_hidden or not is_file_hidden(os_path, stat_res=st) | |
): | |
contents.append(self.get(path=f"{path}/{name}", content=False)) | |
except OSError as e: | |
# ELOOP: recursive symlink, also don't show failure due to permissions | |
if e.errno not in [errno.ELOOP, errno.EACCES]: | |
self.log.warning( | |
"Unknown error checking if file %r is hidden", | |
os_path, | |
exc_info=True, | |
) | |
model["format"] = "json" | |
return model | |
def _file_model(self, path, content=True, format=None, require_hash=False): | |
"""Build a model for a file | |
if content is requested, include the file contents. | |
format: | |
If 'text', the contents will be decoded as UTF-8. | |
If 'base64', the raw bytes contents will be encoded as base64. | |
If not specified, try to decode as UTF-8, and fall back to base64 | |
if require_hash is true, the model will include 'hash' | |
""" | |
model = self._base_model(path) | |
model["type"] = "file" | |
os_path = self._get_os_path(path) | |
model["mimetype"] = mimetypes.guess_type(os_path)[0] | |
bytes_content = None | |
if content: | |
content, format, bytes_content = self._read_file(os_path, format, raw=True) # type: ignore[misc] | |
if model["mimetype"] is None: | |
default_mime = { | |
"text": "text/plain", | |
"base64": "application/octet-stream", | |
}[format] | |
model["mimetype"] = default_mime | |
model.update( | |
content=content, | |
format=format, | |
) | |
if require_hash: | |
if bytes_content is None: | |
bytes_content, _ = self._read_file(os_path, "byte") # type: ignore[assignment,misc] | |
model.update(**self._get_hash(bytes_content)) # type: ignore[arg-type] | |
return model | |
def _notebook_model(self, path, content=True, require_hash=False): | |
"""Build a notebook model | |
if content is requested, the notebook content will be populated | |
as a JSON structure (not double-serialized) | |
if require_hash is true, the model will include 'hash' | |
""" | |
model = self._base_model(path) | |
model["type"] = "notebook" | |
os_path = self._get_os_path(path) | |
bytes_content = None | |
if content: | |
validation_error: dict[str, t.Any] = {} | |
nb, bytes_content = self._read_notebook( | |
os_path, as_version=4, capture_validation_error=validation_error, raw=True | |
) | |
self.mark_trusted_cells(nb, path) | |
model["content"] = nb | |
model["format"] = "json" | |
self.validate_notebook_model(model, validation_error) | |
if require_hash: | |
if bytes_content is None: | |
bytes_content, _ = self._read_file(os_path, "byte") # type: ignore[misc] | |
model.update(**self._get_hash(bytes_content)) # type: ignore[arg-type] | |
return model | |
def get(self, path, content=True, type=None, format=None, require_hash=False): | |
"""Takes a path for an entity and returns its model | |
Parameters | |
---------- | |
path : str | |
the API path that describes the relative path for the target | |
content : bool | |
Whether to include the contents in the reply | |
type : str, optional | |
The requested type - 'file', 'notebook', or 'directory'. | |
Will raise HTTPError 400 if the content doesn't match. | |
format : str, optional | |
The requested format for file contents. 'text' or 'base64'. | |
Ignored if this returns a notebook or directory model. | |
require_hash: bool, optional | |
Whether to include the hash of the file contents. | |
Returns | |
------- | |
model : dict | |
the contents model. If content=True, returns the contents | |
of the file or directory as well. | |
""" | |
path = path.strip("/") | |
os_path = self._get_os_path(path) | |
four_o_four = "file or directory does not exist: %r" % path | |
if not self.exists(path): | |
raise web.HTTPError(404, four_o_four) | |
if not self.allow_hidden and is_hidden(os_path, self.root_dir): | |
self.log.info("Refusing to serve hidden file or directory %r, via 404 Error", os_path) | |
raise web.HTTPError(404, four_o_four) | |
if os.path.isdir(os_path): | |
if type not in (None, "directory"): | |
raise web.HTTPError( | |
400, | |
f"{path} is a directory, not a {type}", | |
reason="bad type", | |
) | |
model = self._dir_model(path, content=content) | |
elif type == "notebook" or (type is None and path.endswith(".ipynb")): | |
model = self._notebook_model(path, content=content, require_hash=require_hash) | |
else: | |
if type == "directory": | |
raise web.HTTPError(400, "%s is not a directory" % path, reason="bad type") | |
model = self._file_model( | |
path, content=content, format=format, require_hash=require_hash | |
) | |
self.emit(data={"action": "get", "path": path}) | |
return model | |
def _save_directory(self, os_path, model, path=""): | |
"""create a directory""" | |
if not self.allow_hidden and is_hidden(os_path, self.root_dir): | |
raise web.HTTPError(400, "Cannot create directory %r" % os_path) | |
if not os.path.exists(os_path): | |
with self.perm_to_403(): | |
os.mkdir(os_path) | |
elif not os.path.isdir(os_path): | |
raise web.HTTPError(400, "Not a directory: %s" % (os_path)) | |
else: | |
self.log.debug("Directory %r already exists", os_path) | |
def save(self, model, path=""): | |
"""Save the file model and return the model with no content.""" | |
path = path.strip("/") | |
self.run_pre_save_hooks(model=model, path=path) | |
if "type" not in model: | |
raise web.HTTPError(400, "No file type provided") | |
if "content" not in model and model["type"] != "directory": | |
raise web.HTTPError(400, "No file content provided") | |
os_path = self._get_os_path(path) | |
if not self.allow_hidden and is_hidden(os_path, self.root_dir): | |
raise web.HTTPError(400, f"Cannot create file or directory {os_path!r}") | |
self.log.debug("Saving %s", os_path) | |
validation_error: dict[str, t.Any] = {} | |
try: | |
if model["type"] == "notebook": | |
nb = nbformat.from_dict(model["content"]) | |
self.check_and_sign(nb, path) | |
self._save_notebook(os_path, nb, capture_validation_error=validation_error) | |
# One checkpoint should always exist for notebooks. | |
if not self.checkpoints.list_checkpoints(path): | |
self.create_checkpoint(path) | |
elif model["type"] == "file": | |
# Missing format will be handled internally by _save_file. | |
self._save_file(os_path, model["content"], model.get("format")) | |
elif model["type"] == "directory": | |
self._save_directory(os_path, model, path) | |
else: | |
raise web.HTTPError(400, "Unhandled contents type: %s" % model["type"]) | |
except web.HTTPError: | |
raise | |
except Exception as e: | |
self.log.error("Error while saving file: %s %s", path, e, exc_info=True) | |
raise web.HTTPError(500, f"Unexpected error while saving file: {path} {e}") from e | |
validation_message = None | |
if model["type"] == "notebook": | |
self.validate_notebook_model(model, validation_error=validation_error) | |
validation_message = model.get("message", None) | |
model = self.get(path, content=False) | |
if validation_message: | |
model["message"] = validation_message | |
self.run_post_save_hooks(model=model, os_path=os_path) | |
self.emit(data={"action": "save", "path": path}) | |
return model | |
def delete_file(self, path): | |
"""Delete file at path.""" | |
path = path.strip("/") | |
os_path = self._get_os_path(path) | |
rm = os.unlink | |
if not self.allow_hidden and is_hidden(os_path, self.root_dir): | |
raise web.HTTPError(400, f"Cannot delete file or directory {os_path!r}") | |
four_o_four = "file or directory does not exist: %r" % path | |
if not self.exists(path): | |
raise web.HTTPError(404, four_o_four) | |
def is_non_empty_dir(os_path): | |
if os.path.isdir(os_path): | |
# A directory containing only leftover checkpoints is | |
# considered empty. | |
cp_dir = getattr(self.checkpoints, "checkpoint_dir", None) | |
if set(os.listdir(os_path)) - {cp_dir}: | |
return True | |
return False | |
if self.delete_to_trash: | |
if not self.always_delete_dir and sys.platform == "win32" and is_non_empty_dir(os_path): | |
# send2trash can really delete files on Windows, so disallow | |
# deleting non-empty files. See Github issue 3631. | |
raise web.HTTPError(400, "Directory %s not empty" % os_path) | |
# send2trash now supports deleting directories. see #1290 | |
if not self.is_writable(path): | |
raise web.HTTPError(403, "Permission denied: %s" % path) from None | |
self.log.debug("Sending %s to trash", os_path) | |
try: | |
send2trash(os_path) | |
except OSError as e: | |
raise web.HTTPError(400, "send2trash failed: %s" % e) from e | |
return | |
if os.path.isdir(os_path): | |
# Don't permanently delete non-empty directories. | |
if not self.always_delete_dir and is_non_empty_dir(os_path): | |
raise web.HTTPError(400, "Directory %s not empty" % os_path) | |
self.log.debug("Removing directory %s", os_path) | |
with self.perm_to_403(): | |
shutil.rmtree(os_path) | |
else: | |
self.log.debug("Unlinking file %s", os_path) | |
with self.perm_to_403(): | |
rm(os_path) | |
def rename_file(self, old_path, new_path): | |
"""Rename a file.""" | |
old_path = old_path.strip("/") | |
new_path = new_path.strip("/") | |
if new_path == old_path: | |
return | |
new_os_path = self._get_os_path(new_path) | |
old_os_path = self._get_os_path(old_path) | |
if not self.allow_hidden and ( | |
is_hidden(old_os_path, self.root_dir) or is_hidden(new_os_path, self.root_dir) | |
): | |
raise web.HTTPError(400, f"Cannot rename file or directory {old_os_path!r}") | |
# Should we proceed with the move? | |
if os.path.exists(new_os_path) and not samefile(old_os_path, new_os_path): | |
raise web.HTTPError(409, "File already exists: %s" % new_path) | |
# Move the file | |
try: | |
with self.perm_to_403(): | |
shutil.move(old_os_path, new_os_path) | |
except web.HTTPError: | |
raise | |
except Exception as e: | |
raise web.HTTPError(500, f"Unknown error renaming file: {old_path} {e}") from e | |
def info_string(self): | |
"""Get the information string for the manager.""" | |
return _i18n("Serving notebooks from local directory: %s") % self.root_dir | |
def get_kernel_path(self, path, model=None): | |
"""Return the initial API path of a kernel associated with a given notebook""" | |
if self.dir_exists(path): | |
return path | |
parent_dir = path.rsplit("/", 1)[0] if "/" in path else "" | |
return parent_dir | |
def copy(self, from_path, to_path=None): | |
""" | |
Copy an existing file or directory and return its new model. | |
If to_path not specified, it will be the parent directory of from_path. | |
If copying a file and to_path is a directory, filename/directoryname will increment `from_path-Copy#.ext`. | |
Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`. | |
For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot. | |
from_path must be a full path to a file or directory. | |
""" | |
to_path_original = str(to_path) | |
path = from_path.strip("/") | |
if to_path is not None: | |
to_path = to_path.strip("/") | |
if "/" in path: | |
from_dir, from_name = path.rsplit("/", 1) | |
else: | |
from_dir = "" | |
from_name = path | |
model = self.get(path) | |
# limit the size of folders being copied to prevent a timeout error | |
if model["type"] == "directory": | |
self.check_folder_size(path) | |
else: | |
# let the super class handle copying files | |
return super().copy(from_path=from_path, to_path=to_path) | |
is_destination_specified = to_path is not None | |
to_name = copy_pat.sub(".", from_name) | |
if not is_destination_specified: | |
to_path = from_dir | |
if self.dir_exists(to_path): | |
name = copy_pat.sub(".", from_name) | |
to_name = super().increment_filename(name, to_path, insert="-Copy") | |
to_path = f"{to_path}/{to_name}" | |
return self._copy_dir( | |
from_path=from_path, | |
to_path_original=to_path_original, | |
to_name=to_name, | |
to_path=to_path, | |
) | |
def _copy_dir(self, from_path, to_path_original, to_name, to_path): | |
""" | |
handles copying directories | |
returns the model for the copied directory | |
""" | |
try: | |
os_from_path = self._get_os_path(from_path.strip("/")) | |
os_to_path = f'{self._get_os_path(to_path_original.strip("/"))}/{to_name}' | |
shutil.copytree(os_from_path, os_to_path) | |
model = self.get(to_path, content=False) | |
except OSError as err: | |
self.log.error(f"OSError in _copy_dir: {err}") | |
raise web.HTTPError( | |
400, | |
f"Can't copy '{from_path}' into Folder '{to_path}'", | |
) from err | |
return model | |
def check_folder_size(self, path): | |
""" | |
limit the size of folders being copied to be no more than the | |
trait max_copy_folder_size_mb to prevent a timeout error | |
""" | |
limit_bytes = self.max_copy_folder_size_mb * 1024 * 1024 | |
size = int(self._get_dir_size(self._get_os_path(path))) | |
# convert from KB to Bytes for macOS | |
size = size * 1024 if platform.system() == "Darwin" else size | |
if size > limit_bytes: | |
raise web.HTTPError( | |
400, | |
f""" | |
Can't copy folders larger than {self.max_copy_folder_size_mb}MB, | |
"{path}" is {self._human_readable_size(size)} | |
""", | |
) | |
def _get_dir_size(self, path="."): | |
""" | |
calls the command line program du to get the directory size | |
""" | |
try: | |
if platform.system() == "Darwin": | |
# returns the size of the folder in KB | |
result = subprocess.run( | |
["du", "-sk", path], # noqa: S607 | |
capture_output=True, | |
check=True, | |
).stdout.split() | |
else: | |
result = subprocess.run( | |
["du", "-s", "--block-size=1", path], # noqa: S607 | |
capture_output=True, | |
check=True, | |
).stdout.split() | |
self.log.info(f"current status of du command {result}") | |
size = result[0].decode("utf-8") | |
except Exception: | |
self.log.warning( | |
"Not able to get the size of the %s directory. Copying might be slow if the directory is large!", | |
path, | |
) | |
return "0" | |
return size | |
def _human_readable_size(self, size): | |
""" | |
returns folder size in a human readable format | |
""" | |
if size == 0: | |
return "0 Bytes" | |
units = ["Bytes", "KB", "MB", "GB", "TB", "PB"] | |
order = int(math.log2(size) / 10) if size else 0 | |
return f"{size / (1 << (order * 10)):.4g} {units[order]}" | |
class AsyncFileContentsManager(FileContentsManager, AsyncFileManagerMixin, AsyncContentsManager): | |
"""An async file contents manager.""" | |
def _checkpoints_class_default(self): | |
return AsyncFileCheckpoints | |
async def _dir_model(self, path, content=True): | |
"""Build a model for a directory | |
if content is requested, will include a listing of the directory | |
""" | |
os_path = self._get_os_path(path) | |
four_o_four = "directory does not exist: %r" % path | |
if not os.path.isdir(os_path): | |
raise web.HTTPError(404, four_o_four) | |
elif not self.allow_hidden and is_hidden(os_path, self.root_dir): | |
self.log.info("Refusing to serve hidden directory %r, via 404 Error", os_path) | |
raise web.HTTPError(404, four_o_four) | |
model = self._base_model(path) | |
model["type"] = "directory" | |
model["size"] = None | |
if content: | |
model["content"] = contents = [] | |
os_dir = self._get_os_path(path) | |
dir_contents = await run_sync(os.listdir, os_dir) | |
for name in dir_contents: | |
try: | |
os_path = os.path.join(os_dir, name) | |
except UnicodeDecodeError as e: | |
self.log.warning("failed to decode filename '%s': %r", name, e) | |
continue | |
try: | |
st = await run_sync(os.lstat, os_path) | |
except OSError as e: | |
# skip over broken symlinks in listing | |
if e.errno == errno.ENOENT: | |
self.log.warning("%s doesn't exist", os_path) | |
elif e.errno != errno.EACCES: # Don't provide clues about protected files | |
self.log.warning("Error stat-ing %s: %r", os_path, e) | |
continue | |
if ( | |
not stat.S_ISLNK(st.st_mode) | |
and not stat.S_ISREG(st.st_mode) | |
and not stat.S_ISDIR(st.st_mode) | |
): | |
self.log.debug("%s not a regular file", os_path) | |
continue | |
try: | |
if self.should_list(name) and ( | |
self.allow_hidden or not is_file_hidden(os_path, stat_res=st) | |
): | |
contents.append(await self.get(path=f"{path}/{name}", content=False)) | |
except OSError as e: | |
# ELOOP: recursive symlink, also don't show failure due to permissions | |
if e.errno not in [errno.ELOOP, errno.EACCES]: | |
self.log.warning( | |
"Unknown error checking if file %r is hidden", | |
os_path, | |
exc_info=True, | |
) | |
model["format"] = "json" | |
return model | |
async def _file_model(self, path, content=True, format=None, require_hash=False): | |
"""Build a model for a file | |
if content is requested, include the file contents. | |
format: | |
If 'text', the contents will be decoded as UTF-8. | |
If 'base64', the raw bytes contents will be encoded as base64. | |
If not specified, try to decode as UTF-8, and fall back to base64 | |
if require_hash is true, the model will include 'hash' | |
""" | |
model = self._base_model(path) | |
model["type"] = "file" | |
os_path = self._get_os_path(path) | |
model["mimetype"] = mimetypes.guess_type(os_path)[0] | |
bytes_content = None | |
if content: | |
content, format, bytes_content = await self._read_file(os_path, format, raw=True) # type: ignore[misc] | |
if model["mimetype"] is None: | |
default_mime = { | |
"text": "text/plain", | |
"base64": "application/octet-stream", | |
}[format] | |
model["mimetype"] = default_mime | |
model.update( | |
content=content, | |
format=format, | |
) | |
if require_hash: | |
if bytes_content is None: | |
bytes_content, _ = await self._read_file(os_path, "byte") # type: ignore[assignment,misc] | |
model.update(**self._get_hash(bytes_content)) # type: ignore[arg-type] | |
return model | |
async def _notebook_model(self, path, content=True, require_hash=False): | |
"""Build a notebook model | |
if content is requested, the notebook content will be populated | |
as a JSON structure (not double-serialized) | |
""" | |
model = self._base_model(path) | |
model["type"] = "notebook" | |
os_path = self._get_os_path(path) | |
bytes_content = None | |
if content: | |
validation_error: dict[str, t.Any] = {} | |
nb, bytes_content = await self._read_notebook( | |
os_path, as_version=4, capture_validation_error=validation_error, raw=True | |
) | |
self.mark_trusted_cells(nb, path) | |
model["content"] = nb | |
model["format"] = "json" | |
self.validate_notebook_model(model, validation_error) | |
if require_hash: | |
if bytes_content is None: | |
bytes_content, _ = await self._read_file(os_path, "byte") # type: ignore[misc] | |
model.update(**(self._get_hash(bytes_content))) # type: ignore[arg-type] | |
return model | |
async def get(self, path, content=True, type=None, format=None, require_hash=False): | |
"""Takes a path for an entity and returns its model | |
Parameters | |
---------- | |
path : str | |
the API path that describes the relative path for the target | |
content : bool | |
Whether to include the contents in the reply | |
type : str, optional | |
The requested type - 'file', 'notebook', or 'directory'. | |
Will raise HTTPError 400 if the content doesn't match. | |
format : str, optional | |
The requested format for file contents. 'text' or 'base64'. | |
Ignored if this returns a notebook or directory model. | |
require_hash: bool, optional | |
Whether to include the hash of the file contents. | |
Returns | |
------- | |
model : dict | |
the contents model. If content=True, returns the contents | |
of the file or directory as well. | |
""" | |
path = path.strip("/") | |
if not self.exists(path): | |
raise web.HTTPError(404, "No such file or directory: %s" % path) | |
os_path = self._get_os_path(path) | |
if os.path.isdir(os_path): | |
if type not in (None, "directory"): | |
raise web.HTTPError( | |
400, | |
f"{path} is a directory, not a {type}", | |
reason="bad type", | |
) | |
model = await self._dir_model(path, content=content) | |
elif type == "notebook" or (type is None and path.endswith(".ipynb")): | |
model = await self._notebook_model(path, content=content, require_hash=require_hash) | |
else: | |
if type == "directory": | |
raise web.HTTPError(400, "%s is not a directory" % path, reason="bad type") | |
model = await self._file_model( | |
path, content=content, format=format, require_hash=require_hash | |
) | |
self.emit(data={"action": "get", "path": path}) | |
return model | |
async def _save_directory(self, os_path, model, path=""): | |
"""create a directory""" | |
if not self.allow_hidden and is_hidden(os_path, self.root_dir): | |
raise web.HTTPError(400, "Cannot create hidden directory %r" % os_path) | |
if not os.path.exists(os_path): | |
with self.perm_to_403(): | |
await run_sync(os.mkdir, os_path) | |
elif not os.path.isdir(os_path): | |
raise web.HTTPError(400, "Not a directory: %s" % (os_path)) | |
else: | |
self.log.debug("Directory %r already exists", os_path) | |
async def save(self, model, path=""): | |
"""Save the file model and return the model with no content.""" | |
path = path.strip("/") | |
self.run_pre_save_hooks(model=model, path=path) | |
if "type" not in model: | |
raise web.HTTPError(400, "No file type provided") | |
if "content" not in model and model["type"] != "directory": | |
raise web.HTTPError(400, "No file content provided") | |
os_path = self._get_os_path(path) | |
self.log.debug("Saving %s", os_path) | |
validation_error: dict[str, t.Any] = {} | |
try: | |
if model["type"] == "notebook": | |
nb = nbformat.from_dict(model["content"]) | |
self.check_and_sign(nb, path) | |
await self._save_notebook(os_path, nb, capture_validation_error=validation_error) | |
# One checkpoint should always exist for notebooks. | |
if not (await self.checkpoints.list_checkpoints(path)): | |
await self.create_checkpoint(path) | |
elif model["type"] == "file": | |
# Missing format will be handled internally by _save_file. | |
await self._save_file(os_path, model["content"], model.get("format")) | |
elif model["type"] == "directory": | |
await self._save_directory(os_path, model, path) | |
else: | |
raise web.HTTPError(400, "Unhandled contents type: %s" % model["type"]) | |
except web.HTTPError: | |
raise | |
except Exception as e: | |
self.log.error("Error while saving file: %s %s", path, e, exc_info=True) | |
raise web.HTTPError(500, f"Unexpected error while saving file: {path} {e}") from e | |
validation_message = None | |
if model["type"] == "notebook": | |
self.validate_notebook_model(model, validation_error=validation_error) | |
validation_message = model.get("message", None) | |
model = await self.get(path, content=False) | |
if validation_message: | |
model["message"] = validation_message | |
self.run_post_save_hooks(model=model, os_path=os_path) | |
self.emit(data={"action": "save", "path": path}) | |
return model | |
async def delete_file(self, path): | |
"""Delete file at path.""" | |
path = path.strip("/") | |
os_path = self._get_os_path(path) | |
rm = os.unlink | |
if not self.allow_hidden and is_hidden(os_path, self.root_dir): | |
raise web.HTTPError(400, f"Cannot delete file or directory {os_path!r}") | |
if not os.path.exists(os_path): | |
raise web.HTTPError(404, "File or directory does not exist: %s" % os_path) | |
async def is_non_empty_dir(os_path): | |
if os.path.isdir(os_path): | |
# A directory containing only leftover checkpoints is | |
# considered empty. | |
cp_dir = getattr(self.checkpoints, "checkpoint_dir", None) | |
dir_contents = set(await run_sync(os.listdir, os_path)) | |
if dir_contents - {cp_dir}: | |
return True | |
return False | |
if self.delete_to_trash: | |
if ( | |
not self.always_delete_dir | |
and sys.platform == "win32" | |
and await is_non_empty_dir(os_path) | |
): | |
# send2trash can really delete files on Windows, so disallow | |
# deleting non-empty files. See Github issue 3631. | |
raise web.HTTPError(400, "Directory %s not empty" % os_path) | |
# send2trash now supports deleting directories. see #1290 | |
if not self.is_writable(path): | |
raise web.HTTPError(403, "Permission denied: %s" % path) from None | |
self.log.debug("Sending %s to trash", os_path) | |
try: | |
send2trash(os_path) | |
except OSError as e: | |
raise web.HTTPError(400, "send2trash failed: %s" % e) from e | |
return | |
if os.path.isdir(os_path): | |
# Don't permanently delete non-empty directories. | |
if not self.always_delete_dir and await is_non_empty_dir(os_path): | |
raise web.HTTPError(400, "Directory %s not empty" % os_path) | |
self.log.debug("Removing directory %s", os_path) | |
with self.perm_to_403(): | |
await run_sync(shutil.rmtree, os_path) | |
else: | |
self.log.debug("Unlinking file %s", os_path) | |
with self.perm_to_403(): | |
await run_sync(rm, os_path) | |
async def rename_file(self, old_path, new_path): | |
"""Rename a file.""" | |
old_path = old_path.strip("/") | |
new_path = new_path.strip("/") | |
if new_path == old_path: | |
return | |
new_os_path = self._get_os_path(new_path) | |
old_os_path = self._get_os_path(old_path) | |
if not self.allow_hidden and ( | |
is_hidden(old_os_path, self.root_dir) or is_hidden(new_os_path, self.root_dir) | |
): | |
raise web.HTTPError(400, f"Cannot rename file or directory {old_os_path!r}") | |
# Should we proceed with the move? | |
if os.path.exists(new_os_path) and not samefile(old_os_path, new_os_path): | |
raise web.HTTPError(409, "File already exists: %s" % new_path) | |
# Move the file | |
try: | |
with self.perm_to_403(): | |
await run_sync(shutil.move, old_os_path, new_os_path) | |
except web.HTTPError: | |
raise | |
except Exception as e: | |
raise web.HTTPError(500, f"Unknown error renaming file: {old_path} {e}") from e | |
async def dir_exists(self, path): | |
"""Does a directory exist at the given path""" | |
path = path.strip("/") | |
os_path = self._get_os_path(path=path) | |
return os.path.isdir(os_path) | |
async def file_exists(self, path): | |
"""Does a file exist at the given path""" | |
path = path.strip("/") | |
os_path = self._get_os_path(path) | |
return os.path.isfile(os_path) | |
async def is_hidden(self, path): | |
"""Is path a hidden directory or file""" | |
path = path.strip("/") | |
os_path = self._get_os_path(path=path) | |
return is_hidden(os_path, self.root_dir) | |
async def get_kernel_path(self, path, model=None): | |
"""Return the initial API path of a kernel associated with a given notebook""" | |
if await self.dir_exists(path): | |
return path | |
parent_dir = path.rsplit("/", 1)[0] if "/" in path else "" | |
return parent_dir | |
async def copy(self, from_path, to_path=None): | |
""" | |
Copy an existing file or directory and return its new model. | |
If to_path not specified, it will be the parent directory of from_path. | |
If copying a file and to_path is a directory, filename/directoryname will increment `from_path-Copy#.ext`. | |
Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`. | |
For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot. | |
from_path must be a full path to a file or directory. | |
""" | |
to_path_original = str(to_path) | |
path = from_path.strip("/") | |
if to_path is not None: | |
to_path = to_path.strip("/") | |
if "/" in path: | |
from_dir, from_name = path.rsplit("/", 1) | |
else: | |
from_dir = "" | |
from_name = path | |
model = await self.get(path) | |
# limit the size of folders being copied to prevent a timeout error | |
if model["type"] == "directory": | |
await self.check_folder_size(path) | |
else: | |
# let the super class handle copying files | |
return await AsyncContentsManager.copy(self, from_path=from_path, to_path=to_path) | |
is_destination_specified = to_path is not None | |
to_name = copy_pat.sub(".", from_name) | |
if not is_destination_specified: | |
to_path = from_dir | |
if await self.dir_exists(to_path): | |
name = copy_pat.sub(".", from_name) | |
to_name = await super().increment_filename(name, to_path, insert="-Copy") | |
to_path = f"{to_path}/{to_name}" | |
return await self._copy_dir( | |
from_path=from_path, | |
to_path_original=to_path_original, | |
to_name=to_name, | |
to_path=to_path, | |
) | |
async def _copy_dir( | |
self, from_path: str, to_path_original: str, to_name: str, to_path: str | |
) -> dict[str, t.Any]: | |
""" | |
handles copying directories | |
returns the model for the copied directory | |
""" | |
try: | |
os_from_path = self._get_os_path(from_path.strip("/")) | |
os_to_path = f'{self._get_os_path(to_path_original.strip("/"))}/{to_name}' | |
shutil.copytree(os_from_path, os_to_path) | |
model = await self.get(to_path, content=False) | |
except OSError as err: | |
self.log.error(f"OSError in _copy_dir: {err}") | |
raise web.HTTPError( | |
400, | |
f"Can't copy '{from_path}' into read-only Folder '{to_path}'", | |
) from err | |
return model # type:ignore[no-any-return] | |
async def check_folder_size(self, path: str) -> None: | |
""" | |
limit the size of folders being copied to be no more than the | |
trait max_copy_folder_size_mb to prevent a timeout error | |
""" | |
limit_bytes = self.max_copy_folder_size_mb * 1024 * 1024 | |
size = int(await self._get_dir_size(self._get_os_path(path))) | |
# convert from KB to Bytes for macOS | |
size = size * 1024 if platform.system() == "Darwin" else size | |
if size > limit_bytes: | |
raise web.HTTPError( | |
400, | |
f""" | |
Can't copy folders larger than {self.max_copy_folder_size_mb}MB, | |
"{path}" is {await self._human_readable_size(size)} | |
""", | |
) | |
async def _get_dir_size(self, path: str = ".") -> str: | |
""" | |
calls the command line program du to get the directory size | |
""" | |
try: | |
if platform.system() == "Darwin": | |
# returns the size of the folder in KB | |
args = ["-sk", path] | |
else: | |
args = ["-s", "--block-size=1", path] | |
proc = await asyncio.create_subprocess_exec( | |
"du", *args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE | |
) | |
stdout, _ = await proc.communicate() | |
result = await proc.wait() | |
self.log.info(f"current status of du command {result}") | |
assert result == 0 | |
size = stdout.decode("utf-8").split()[0] | |
except Exception: | |
self.log.warning( | |
"Not able to get the size of the %s directory. Copying might be slow if the directory is large!", | |
path, | |
) | |
return "0" | |
return size | |
async def _human_readable_size(self, size: int) -> str: | |
""" | |
returns folder size in a human readable format | |
""" | |
if size == 0: | |
return "0 Bytes" | |
units = ["Bytes", "KB", "MB", "GB", "TB", "PB"] | |
order = int(math.log2(size) / 10) if size else 0 | |
return f"{size / (1 << (order * 10)):.4g} {units[order]}" | |