Upload folder using huggingface_hub

d1ceb73 verified 11 months ago

24.8 kB

	#!/usr/bin/env python
	"""NbConvert is a utility for conversion of .ipynb files.

	Command-line interface for the NbConvert conversion utility.
	"""

	# Copyright (c) IPython Development Team.
	# Distributed under the terms of the Modified BSD License.
	from __future__ import annotations

	import asyncio
	import glob
	import logging
	import os
	import sys
	import typing as t
	from textwrap import dedent, fill

	from jupyter_core.application import JupyterApp, base_aliases, base_flags
	from traitlets import Bool, DottedObjectName, Instance, List, Type, Unicode, default, observe
	from traitlets.config import Configurable, catch_config_error
	from traitlets.utils.importstring import import_item

	from nbconvert import __version__, exporters, postprocessors, preprocessors, writers
	from nbconvert.utils.text import indent

	from .exporters.base import get_export_names, get_exporter
	from .utils.base import NbConvertBase
	from .utils.exceptions import ConversionException
	from .utils.io import unicode_stdin_stream

	# -----------------------------------------------------------------------------
	# Classes and functions
	# -----------------------------------------------------------------------------


	class DottedOrNone(DottedObjectName):
	"""A string holding a valid dotted object name in Python, such as A.b3._c
	Also allows for None type.
	"""

	default_value = ""

	def validate(self, obj, value):
	"""Validate an input."""
	if value is not None and len(value) > 0:
	return super().validate(obj, value)
	return value


	nbconvert_aliases = {}
	nbconvert_aliases.update(base_aliases)
	nbconvert_aliases.update(
	{
	"to": "NbConvertApp.export_format",
	"template": "TemplateExporter.template_name",
	"template-file": "TemplateExporter.template_file",
	"theme": "HTMLExporter.theme",
	"sanitize_html": "HTMLExporter.sanitize_html",
	"writer": "NbConvertApp.writer_class",
	"post": "NbConvertApp.postprocessor_class",
	"output": "NbConvertApp.output_base",
	"output-dir": "FilesWriter.build_directory",
	"reveal-prefix": "SlidesExporter.reveal_url_prefix",
	"nbformat": "NotebookExporter.nbformat_version",
	}
	)

	nbconvert_flags = {}
	nbconvert_flags.update(base_flags)
	nbconvert_flags.update(
	{
	"execute": (
	{"ExecutePreprocessor": {"enabled": True}},
	"Execute the notebook prior to export.",
	),
	"allow-errors": (
	{"ExecutePreprocessor": {"allow_errors": True}},
	(
	"Continue notebook execution even if one of the cells throws "
	"an error and include the error message in the cell output "
	"(the default behaviour is to abort conversion). This flag "
	"is only relevant if '--execute' was specified, too."
	),
	),
	"stdin": (
	{
	"NbConvertApp": {
	"from_stdin": True,
	}
	},
	"read a single notebook file from stdin. Write the resulting notebook with default basename 'notebook.*'",
	),
	"stdout": (
	{"NbConvertApp": {"writer_class": "StdoutWriter"}},
	"Write notebook output to stdout instead of files.",
	),
	"inplace": (
	{
	"NbConvertApp": {
	"use_output_suffix": False,
	"export_format": "notebook",
	},
	"FilesWriter": {"build_directory": ""},
	},
	"""Run nbconvert in place, overwriting the existing notebook (only
	relevant when converting to notebook format)""",
	),
	"clear-output": (
	{
	"NbConvertApp": {
	"use_output_suffix": False,
	"export_format": "notebook",
	},
	"FilesWriter": {"build_directory": ""},
	"ClearOutputPreprocessor": {"enabled": True},
	},
	"""Clear output of current file and save in place,
	overwriting the existing notebook. """,
	),
	"coalesce-streams": (
	{
	"NbConvertApp": {"use_output_suffix": False, "export_format": "notebook"},
	"FilesWriter": {"build_directory": ""},
	"CoalesceStreamsPreprocessor": {"enabled": True},
	},
	"""Coalesce consecutive stdout and stderr outputs into one stream (within each cell).""",
	),
	"no-prompt": (
	{
	"TemplateExporter": {
	"exclude_input_prompt": True,
	"exclude_output_prompt": True,
	}
	},
	"Exclude input and output prompts from converted document.",
	),
	"no-input": (
	{
	"TemplateExporter": {
	"exclude_output_prompt": True,
	"exclude_input": True,
	"exclude_input_prompt": True,
	}
	},
	"""Exclude input cells and output prompts from converted document.
	This mode is ideal for generating code-free reports.""",
	),
	"allow-chromium-download": (
	{
	"WebPDFExporter": {
	"allow_chromium_download": True,
	}
	},
	"""Whether to allow downloading chromium if no suitable version is found on the system.""",
	),
	"disable-chromium-sandbox": (
	{
	"WebPDFExporter": {
	"disable_sandbox": True,
	}
	},
	"""Disable chromium security sandbox when converting to PDF..""",
	),
	"show-input": (
	{
	"TemplateExporter": {
	"exclude_input": False,
	}
	},
	"""Shows code input. This flag is only useful for dejavu users.""",
	),
	"embed-images": (
	{
	"HTMLExporter": {
	"embed_images": True,
	}
	},
	"""Embed the images as base64 dataurls in the output. This flag is only useful for the HTML/WebPDF/Slides exports.""",
	),
	"sanitize-html": (
	{
	"HTMLExporter": {
	"sanitize_html": True,
	}
	},
	"""Whether the HTML in Markdown cells and cell outputs should be sanitized..""",
	),
	}
	)


	class NbConvertApp(JupyterApp):
	"""Application used to convert from notebook file type (``*.ipynb``)"""

	version = __version__
	name = "jupyter-nbconvert"
	aliases = nbconvert_aliases
	flags = nbconvert_flags

	@default("log_level")
	def _log_level_default(self):
	return logging.INFO

	classes = List() # type:ignore[assignment]

	@default("classes")
	def _classes_default(self):
	classes: list[type[t.Any]] = [NbConvertBase]
	for pkg in (exporters, preprocessors, writers, postprocessors):
	for name in dir(pkg):
	cls = getattr(pkg, name)
	if isinstance(cls, type) and issubclass(cls, Configurable):
	classes.append(cls)

	return classes

	description = Unicode( # type:ignore[assignment]
	"""This application is used to convert notebook files (*.ipynb)
	to various other formats.

	WARNING: THE COMMANDLINE INTERFACE MAY CHANGE IN FUTURE RELEASES."""
	)

	output_base = Unicode(
	"{notebook_name}",
	help="""Overwrite base name use for output files.
	Supports pattern replacements '{notebook_name}'.
	""",
	).tag(config=True)

	use_output_suffix = Bool(
	True,
	help="""Whether to apply a suffix prior to the extension (only relevant
	when converting to notebook format). The suffix is determined by
	the exporter, and is usually '.nbconvert'.""",
	).tag(config=True)

	output_files_dir = Unicode(
	"{notebook_name}_files",
	help="""Directory to copy extra files (figures) to.
	'{notebook_name}' in the string will be converted to notebook
	basename.""",
	).tag(config=True)

	examples = Unicode(
	f"""
	The simplest way to use nbconvert is

	> jupyter nbconvert mynotebook.ipynb --to html

	Options include {get_export_names()}.

	> jupyter nbconvert --to latex mynotebook.ipynb

	Both HTML and LaTeX support multiple output templates. LaTeX includes
	'base', 'article' and 'report'. HTML includes 'basic', 'lab' and
	'classic'. You can specify the flavor of the format used.

	> jupyter nbconvert --to html --template lab mynotebook.ipynb

	You can also pipe the output to stdout, rather than a file

	> jupyter nbconvert mynotebook.ipynb --stdout

	PDF is generated via latex

	> jupyter nbconvert mynotebook.ipynb --to pdf

	You can get (and serve) a Reveal.js-powered slideshow

	> jupyter nbconvert myslides.ipynb --to slides --post serve

	Multiple notebooks can be given at the command line in a couple of
	different ways:

	> jupyter nbconvert notebook*.ipynb
	> jupyter nbconvert notebook1.ipynb notebook2.ipynb

	or you can specify the notebooks list in a config file, containing::

	c.NbConvertApp.notebooks = ["my_notebook.ipynb"]

	> jupyter nbconvert --config mycfg.py
	"""
	)

	# Writer specific variables
	writer = Instance(
	"nbconvert.writers.base.WriterBase",
	help="""Instance of the writer class used to write the
	results of the conversion.""",
	allow_none=True,
	)
	writer_class = DottedObjectName(
	"FilesWriter",
	help="""Writer class used to write the
	results of the conversion""",
	).tag(config=True)
	writer_aliases = {
	"fileswriter": "nbconvert.writers.files.FilesWriter",
	"debugwriter": "nbconvert.writers.debug.DebugWriter",
	"stdoutwriter": "nbconvert.writers.stdout.StdoutWriter",
	}
	writer_factory = Type(allow_none=True)

	@observe("writer_class")
	def _writer_class_changed(self, change):
	new = change["new"]
	if new.lower() in self.writer_aliases:
	new = self.writer_aliases[new.lower()]
	self.writer_factory = import_item(new)

	# Post-processor specific variables
	postprocessor = Instance(
	"nbconvert.postprocessors.base.PostProcessorBase",
	help="""Instance of the PostProcessor class used to write the
	results of the conversion.""",
	allow_none=True,
	)

	postprocessor_class = DottedOrNone(
	help="""PostProcessor class used to write the
	results of the conversion"""
	).tag(config=True)
	postprocessor_aliases = {"serve": "nbconvert.postprocessors.serve.ServePostProcessor"}
	postprocessor_factory = Type(None, allow_none=True)

	@observe("postprocessor_class")
	def _postprocessor_class_changed(self, change):
	new = change["new"]
	if new.lower() in self.postprocessor_aliases:
	new = self.postprocessor_aliases[new.lower()]
	if new:
	self.postprocessor_factory = import_item(new)

	export_format = Unicode( # type:ignore[call-overload]
	allow_none=False,
	help=f"""The export format to be used, either one of the built-in formats
	{get_export_names()}
	or a dotted object name that represents the import path for an
	``Exporter`` class""",
	).tag(config=True)

	notebooks = List(
	Unicode(),
	help="""List of notebooks to convert.
	Wildcards are supported.
	Filenames passed positionally will be added to the list.
	""",
	).tag(config=True)
	from_stdin = Bool(False, help="read a single notebook from stdin.").tag(config=True)
	recursive_glob = Bool(
	False, help="set the 'recursive' option for glob for searching wildcards."
	).tag(config=True)

	@catch_config_error
	def initialize(self, argv=None):
	"""Initialize application, notebooks, writer, and postprocessor"""
	# See https://bugs.python.org/issue37373 :(
	if sys.version_info > (3, 8) and sys.platform.startswith("win"):
	asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())

	self.init_syspath()
	super().initialize(argv)
	if hasattr(self, "load_config_environ"):
	self.load_config_environ()
	self.init_notebooks()
	self.init_writer()
	self.init_postprocessor()

	def init_syspath(self):
	"""Add the cwd to the sys.path ($PYTHONPATH)"""
	sys.path.insert(0, os.getcwd())

	def init_notebooks(self):
	"""Construct the list of notebooks.

	If notebooks are passed on the command-line,
	they override (rather than add) notebooks specified in config files.
	Glob each notebook to replace notebook patterns with filenames.
	"""

	# Specifying notebooks on the command-line overrides (rather than
	# adds) the notebook list
	patterns = self.extra_args if self.extra_args else self.notebooks

	# Use glob to replace all the notebook patterns with filenames.
	filenames = []
	for pattern in patterns:
	# Use glob to find matching filenames. Allow the user to convert
	# notebooks without having to type the extension.
	globbed_files = glob.glob(pattern, recursive=self.recursive_glob)
	globbed_files.extend(glob.glob(pattern + ".ipynb", recursive=self.recursive_glob))
	if not globbed_files:
	self.log.warning("pattern %r matched no files", pattern)

	for filename in globbed_files:
	if filename not in filenames:
	filenames.append(filename)
	self.notebooks = filenames

	def init_writer(self):
	"""Initialize the writer (which is stateless)"""
	self._writer_class_changed({"new": self.writer_class})
	if self.writer_factory:
	self.writer = self.writer_factory(parent=self)
	if hasattr(self.writer, "build_directory") and self.writer.build_directory != "":
	self.use_output_suffix = False

	def init_postprocessor(self):
	"""Initialize the postprocessor (which is stateless)"""
	self._postprocessor_class_changed({"new": self.postprocessor_class})
	if self.postprocessor_factory:
	self.postprocessor = self.postprocessor_factory(parent=self)

	def start(self):
	"""Run start after initialization process has completed"""
	super().start()
	self.convert_notebooks()

	def _notebook_filename_to_name(self, notebook_filename):
	"""
	Returns the notebook name from the notebook filename by
	applying `output_base` pattern and stripping extension
	"""
	basename = os.path.basename(notebook_filename)
	notebook_name = basename[: basename.rfind(".")]
	notebook_name = self.output_base.format(notebook_name=notebook_name)

	return notebook_name # noqa: RET504

	def init_single_notebook_resources(self, notebook_filename):
	"""Step 1: Initialize resources

	This initializes the resources dictionary for a single notebook.

	Returns
	-------
	dict
	resources dictionary for a single notebook that MUST include the following keys:
	- config_dir: the location of the Jupyter config directory
	- unique_key: the notebook name
	- output_files_dir: a directory where output files (not
	including the notebook itself) should be saved
	"""
	notebook_name = self._notebook_filename_to_name(notebook_filename)
	self.log.debug("Notebook name is '%s'", notebook_name)

	# first initialize the resources we want to use
	resources = {}
	resources["config_dir"] = self.config_dir
	resources["unique_key"] = notebook_name

	output_files_dir = self.output_files_dir.format(notebook_name=notebook_name)

	resources["output_files_dir"] = output_files_dir

	return resources

	def export_single_notebook(self, notebook_filename, resources, input_buffer=None):
	"""Step 2: Export the notebook

	Exports the notebook to a particular format according to the specified
	exporter. This function returns the output and (possibly modified)
	resources from the exporter.

	Parameters
	----------
	notebook_filename : str
	name of notebook file.
	resources : dict
	input_buffer :
	readable file-like object returning unicode.
	if not None, notebook_filename is ignored

	Returns
	-------
	output
	dict
	resources (possibly modified)
	"""
	try:
	if input_buffer is not None:
	output, resources = self.exporter.from_file(input_buffer, resources=resources)
	else:
	output, resources = self.exporter.from_filename(
	notebook_filename, resources=resources
	)
	except ConversionException:
	self.log.error("Error while converting '%s'", notebook_filename, exc_info=True) # noqa: G201
	self.exit(1)

	return output, resources

	def write_single_notebook(self, output, resources):
	"""Step 3: Write the notebook to file

	This writes output from the exporter to file using the specified writer.
	It returns the results from the writer.

	Parameters
	----------
	output :
	resources : dict
	resources for a single notebook including name, config directory
	and directory to save output

	Returns
	-------
	file
	results from the specified writer output of exporter
	"""

	if "unique_key" not in resources:
	msg = "unique_key MUST be specified in the resources, but it is not"
	raise KeyError(msg)

	notebook_name = resources["unique_key"]
	if self.use_output_suffix and self.output_base == "{notebook_name}":
	notebook_name += resources.get("output_suffix", "")

	if not self.writer:
	msg = "No writer object defined!"
	raise ValueError(msg)
	return self.writer.write(output, resources, notebook_name=notebook_name)

	def postprocess_single_notebook(self, write_results):
	"""Step 4: Post-process the written file

	Only used if a postprocessor has been specified. After the
	converted notebook is written to a file in Step 3, this post-processes
	the notebook.
	"""
	# Post-process if post processor has been defined.
	if hasattr(self, "postprocessor") and self.postprocessor:
	self.postprocessor(write_results)

	def convert_single_notebook(self, notebook_filename, input_buffer=None):
	"""Convert a single notebook.

	Performs the following steps:

	1. Initialize notebook resources
	2. Export the notebook to a particular format
	3. Write the exported notebook to file
	4. (Maybe) postprocess the written file

	Parameters
	----------
	notebook_filename : str
	input_buffer :
	If input_buffer is not None, conversion is done and the buffer is
	used as source into a file basenamed by the notebook_filename
	argument.
	"""
	if input_buffer is None:
	self.log.info("Converting notebook %s to %s", notebook_filename, self.export_format)
	else:
	self.log.info("Converting notebook into %s", self.export_format)

	resources = self.init_single_notebook_resources(notebook_filename)
	output, resources = self.export_single_notebook(
	notebook_filename, resources, input_buffer=input_buffer
	)
	write_results = self.write_single_notebook(output, resources)
	self.postprocess_single_notebook(write_results)

	def convert_notebooks(self):
	"""Convert the notebooks in the self.notebooks traitlet"""

	# no notebooks to convert!
	if len(self.notebooks) == 0 and not self.from_stdin:
	self.print_help()
	sys.exit(-1)

	if not self.export_format:
	msg = (
	"Please specify an output format with '--to <format>'."
	f"\nThe following formats are available: {get_export_names()}"
	)
	raise ValueError(msg)

	# initialize the exporter
	cls = get_exporter(self.export_format)
	self.exporter = cls(config=self.config)

	# strip duplicate extension from output_base, to avoid Basename.ext.ext
	if getattr(self.exporter, "file_extension", False):
	base, ext = os.path.splitext(self.output_base)
	if ext == self.exporter.file_extension:
	self.output_base = base

	# convert each notebook
	if not self.from_stdin:
	for notebook_filename in self.notebooks:
	self.convert_single_notebook(notebook_filename)
	else:
	input_buffer = unicode_stdin_stream()
	# default name when conversion from stdin
	self.convert_single_notebook("notebook.ipynb", input_buffer=input_buffer)
	input_buffer.close()

	def document_flag_help(self):
	"""
	Return a string containing descriptions of all the flags.
	"""
	flags = "The following flags are defined:\n\n"
	for flag, (cfg, fhelp) in self.flags.items():
	flags += f"{flag}\n"
	flags += indent(fill(fhelp, 80)) + "\n\n"
	flags += indent(fill("Long Form: " + str(cfg), 80)) + "\n\n"
	return flags

	def document_alias_help(self):
	"""Return a string containing all of the aliases"""

	aliases = "The following aliases are defined:\n\n"
	for alias, longname in self.aliases.items():
	aliases += f"\t{alias} ({longname})\n\n"
	return aliases

	def document_config_options(self):
	"""
	Provides a much improves version of the configuration documentation by
	breaking the configuration options into app, exporter, writer,
	preprocessor, postprocessor, and other sections.
	"""
	categories = {
	category: [c for c in self._classes_inc_parents() if category in c.__name__.lower()]
	for category in ["app", "exporter", "writer", "preprocessor", "postprocessor"]
	}
	accounted_for = {c for category in categories.values() for c in category}
	categories["other"] = [c for c in self._classes_inc_parents() if c not in accounted_for]

	header = dedent(
	"""
	{section} Options
	-----------------------

	"""
	)
	sections = ""
	for category in categories:
	sections += header.format(section=category.title())
	if category in ["exporter", "preprocessor", "writer"]:
	sections += f".. image:: _static/{category}_inheritance.png\n\n"
	sections += "\n".join(c.class_config_rst_doc() for c in categories[category])

	return sections.replace(" : ", r" \: ")


	class DejavuApp(NbConvertApp):
	"""A deja vu app."""

	def initialize(self, argv=None):
	"""Initialize the app."""
	self.config.TemplateExporter.exclude_input = True
	self.config.TemplateExporter.exclude_output_prompt = True
	self.config.TemplateExporter.exclude_input_prompt = True
	self.config.ExecutePreprocessor.enabled = True
	self.config.WebPDFExporter.paginate = False
	self.config.QtPDFExporter.paginate = False

	super().initialize(argv)
	if hasattr(self, "load_config_environ"):
	self.load_config_environ()

	@default("export_format")
	def _default_export_format(self):
	return "html"


	# -----------------------------------------------------------------------------
	# Main entry point
	# -----------------------------------------------------------------------------

	main = launch_new_instance = NbConvertApp.launch_instance
	dejavu_main = DejavuApp.launch_instance