|
"""Read and write notebooks as regular .py files. |
|
|
|
Authors: |
|
|
|
* Brian Granger |
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from __future__ import annotations |
|
|
|
import re |
|
|
|
from .nbbase import ( |
|
nbformat, |
|
nbformat_minor, |
|
new_code_cell, |
|
new_heading_cell, |
|
new_notebook, |
|
new_text_cell, |
|
new_worksheet, |
|
) |
|
from .rwbase import NotebookReader, NotebookWriter |
|
|
|
|
|
|
|
|
|
|
|
_encoding_declaration_re = re.compile(r"^#.*coding[:=]\s*([-\w.]+)") |
|
|
|
|
|
class PyReaderError(Exception): |
|
"""An error raised for a pyreader error.""" |
|
|
|
|
|
class PyReader(NotebookReader): |
|
"""A python notebook reader.""" |
|
|
|
def reads(self, s, **kwargs): |
|
"""Convert a string to a notebook""" |
|
return self.to_notebook(s, **kwargs) |
|
|
|
def to_notebook(self, s, **kwargs): |
|
"""Convert a string to a notebook""" |
|
lines = s.splitlines() |
|
cells = [] |
|
cell_lines: list[str] = [] |
|
kwargs = {} |
|
state = "codecell" |
|
for line in lines: |
|
if line.startswith("# <nbformat>") or _encoding_declaration_re.match(line): |
|
pass |
|
elif line.startswith("# <codecell>"): |
|
cell = self.new_cell(state, cell_lines, **kwargs) |
|
if cell is not None: |
|
cells.append(cell) |
|
state = "codecell" |
|
cell_lines = [] |
|
kwargs = {} |
|
elif line.startswith("# <htmlcell>"): |
|
cell = self.new_cell(state, cell_lines, **kwargs) |
|
if cell is not None: |
|
cells.append(cell) |
|
state = "htmlcell" |
|
cell_lines = [] |
|
kwargs = {} |
|
elif line.startswith("# <markdowncell>"): |
|
cell = self.new_cell(state, cell_lines, **kwargs) |
|
if cell is not None: |
|
cells.append(cell) |
|
state = "markdowncell" |
|
cell_lines = [] |
|
kwargs = {} |
|
|
|
elif line.startswith(("# <rawcell>", "# <plaintextcell>")): |
|
cell = self.new_cell(state, cell_lines, **kwargs) |
|
if cell is not None: |
|
cells.append(cell) |
|
state = "rawcell" |
|
cell_lines = [] |
|
kwargs = {} |
|
elif line.startswith("# <headingcell"): |
|
cell = self.new_cell(state, cell_lines, **kwargs) |
|
if cell is not None: |
|
cells.append(cell) |
|
cell_lines = [] |
|
m = re.match(r"# <headingcell level=(?P<level>\d)>", line) |
|
if m is not None: |
|
state = "headingcell" |
|
kwargs = {} |
|
kwargs["level"] = int(m.group("level")) |
|
else: |
|
state = "codecell" |
|
kwargs = {} |
|
cell_lines = [] |
|
else: |
|
cell_lines.append(line) |
|
if cell_lines and state == "codecell": |
|
cell = self.new_cell(state, cell_lines) |
|
if cell is not None: |
|
cells.append(cell) |
|
ws = new_worksheet(cells=cells) |
|
return new_notebook(worksheets=[ws]) |
|
|
|
def new_cell(self, state, lines, **kwargs): |
|
"""Create a new cell.""" |
|
if state == "codecell": |
|
input_ = "\n".join(lines) |
|
input_ = input_.strip("\n") |
|
if input_: |
|
return new_code_cell(input=input_) |
|
elif state == "htmlcell": |
|
text = self._remove_comments(lines) |
|
if text: |
|
return new_text_cell("html", source=text) |
|
elif state == "markdowncell": |
|
text = self._remove_comments(lines) |
|
if text: |
|
return new_text_cell("markdown", source=text) |
|
elif state == "rawcell": |
|
text = self._remove_comments(lines) |
|
if text: |
|
return new_text_cell("raw", source=text) |
|
elif state == "headingcell": |
|
text = self._remove_comments(lines) |
|
level = kwargs.get("level", 1) |
|
if text: |
|
return new_heading_cell(source=text, level=level) |
|
|
|
def _remove_comments(self, lines): |
|
new_lines = [] |
|
for line in lines: |
|
if line.startswith("#"): |
|
new_lines.append(line[2:]) |
|
else: |
|
new_lines.append(line) |
|
text = "\n".join(new_lines) |
|
text = text.strip("\n") |
|
return text |
|
|
|
def split_lines_into_blocks(self, lines): |
|
"""Split lines into code blocks.""" |
|
if len(lines) == 1: |
|
yield lines[0] |
|
raise StopIteration() |
|
import ast |
|
|
|
source = "\n".join(lines) |
|
code = ast.parse(source) |
|
starts = [x.lineno - 1 for x in code.body] |
|
for i in range(len(starts) - 1): |
|
yield "\n".join(lines[starts[i] : starts[i + 1]]).strip("\n") |
|
yield "\n".join(lines[starts[-1] :]).strip("\n") |
|
|
|
|
|
class PyWriter(NotebookWriter): |
|
"""A Python notebook writer.""" |
|
|
|
def writes(self, nb, **kwargs): |
|
"""Convert a notebook to a string.""" |
|
lines = ["# -*- coding: utf-8 -*-"] |
|
lines.extend( |
|
[ |
|
"# <nbformat>%i.%i</nbformat>" % (nbformat, nbformat_minor), |
|
"", |
|
] |
|
) |
|
for ws in nb.worksheets: |
|
for cell in ws.cells: |
|
if cell.cell_type == "code": |
|
input_ = cell.get("input") |
|
if input_ is not None: |
|
lines.extend(["# <codecell>", ""]) |
|
lines.extend(input_.splitlines()) |
|
lines.append("") |
|
elif cell.cell_type == "html": |
|
input_ = cell.get("source") |
|
if input_ is not None: |
|
lines.extend(["# <htmlcell>", ""]) |
|
lines.extend(["# " + line for line in input_.splitlines()]) |
|
lines.append("") |
|
elif cell.cell_type == "markdown": |
|
input_ = cell.get("source") |
|
if input_ is not None: |
|
lines.extend(["# <markdowncell>", ""]) |
|
lines.extend(["# " + line for line in input_.splitlines()]) |
|
lines.append("") |
|
elif cell.cell_type == "raw": |
|
input_ = cell.get("source") |
|
if input_ is not None: |
|
lines.extend(["# <rawcell>", ""]) |
|
lines.extend(["# " + line for line in input_.splitlines()]) |
|
lines.append("") |
|
elif cell.cell_type == "heading": |
|
input_ = cell.get("source") |
|
level = cell.get("level", 1) |
|
if input_ is not None: |
|
lines.extend(["# <headingcell level=%s>" % level, ""]) |
|
lines.extend(["# " + line for line in input_.splitlines()]) |
|
lines.append("") |
|
lines.append("") |
|
return "\n".join(lines) |
|
|
|
|
|
_reader = PyReader() |
|
_writer = PyWriter() |
|
|
|
reads = _reader.reads |
|
read = _reader.read |
|
to_notebook = _reader.to_notebook |
|
write = _writer.write |
|
writes = _writer.writes |
|
|