Spaces:
Sleeping
Sleeping
"""Pickler class to extend the standard pickle.Pickler functionality | |
The main objective is to make it natural to perform distributed computing on | |
clusters (such as PySpark, Dask, Ray...) with interactively defined code | |
(functions, classes, ...) written in notebooks or console. | |
In particular this pickler adds the following features: | |
- serialize interactively-defined or locally-defined functions, classes, | |
enums, typevars, lambdas and nested functions to compiled byte code; | |
- deal with some other non-serializable objects in an ad-hoc manner where | |
applicable. | |
This pickler is therefore meant to be used for the communication between short | |
lived Python processes running the same version of Python and libraries. In | |
particular, it is not meant to be used for long term storage of Python objects. | |
It does not include an unpickler, as standard Python unpickling suffices. | |
This module was extracted from the `cloud` package, developed by `PiCloud, Inc. | |
<https://web.archive.org/web/20140626004012/http://www.picloud.com/>`_. | |
Copyright (c) 2012-now, CloudPickle developers and contributors. | |
Copyright (c) 2012, Regents of the University of California. | |
Copyright (c) 2009 `PiCloud, Inc. <https://web.archive.org/web/20140626004012/http://www.picloud.com/>`_. | |
All rights reserved. | |
Redistribution and use in source and binary forms, with or without | |
modification, are permitted provided that the following conditions | |
are met: | |
* Redistributions of source code must retain the above copyright | |
notice, this list of conditions and the following disclaimer. | |
* Redistributions in binary form must reproduce the above copyright | |
notice, this list of conditions and the following disclaimer in the | |
documentation and/or other materials provided with the distribution. | |
* Neither the name of the University of California, Berkeley nor the | |
names of its contributors may be used to endorse or promote | |
products derived from this software without specific prior written | |
permission. | |
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED | |
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | |
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
""" | |
import _collections_abc | |
from collections import ChainMap, OrderedDict | |
import abc | |
import builtins | |
import copyreg | |
import dataclasses | |
import dis | |
from enum import Enum | |
import io | |
import itertools | |
import logging | |
import opcode | |
import pickle | |
from pickle import _getattribute | |
import platform | |
import struct | |
import sys | |
import threading | |
import types | |
import typing | |
import uuid | |
import warnings | |
import weakref | |
# The following import is required to be imported in the cloudpickle | |
# namespace to be able to load pickle files generated with older versions of | |
# cloudpickle. See: tests/test_backward_compat.py | |
from types import CellType # noqa: F401 | |
# cloudpickle is meant for inter process communication: we expect all | |
# communicating processes to run the same Python version hence we favor | |
# communication speed over compatibility: | |
DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL | |
# Names of modules whose resources should be treated as dynamic. | |
_PICKLE_BY_VALUE_MODULES = set() | |
# Track the provenance of reconstructed dynamic classes to make it possible to | |
# reconstruct instances from the matching singleton class definition when | |
# appropriate and preserve the usual "isinstance" semantics of Python objects. | |
_DYNAMIC_CLASS_TRACKER_BY_CLASS = weakref.WeakKeyDictionary() | |
_DYNAMIC_CLASS_TRACKER_BY_ID = weakref.WeakValueDictionary() | |
_DYNAMIC_CLASS_TRACKER_LOCK = threading.Lock() | |
PYPY = platform.python_implementation() == "PyPy" | |
builtin_code_type = None | |
if PYPY: | |
# builtin-code objects only exist in pypy | |
builtin_code_type = type(float.__new__.__code__) | |
_extract_code_globals_cache = weakref.WeakKeyDictionary() | |
def _get_or_create_tracker_id(class_def): | |
with _DYNAMIC_CLASS_TRACKER_LOCK: | |
class_tracker_id = _DYNAMIC_CLASS_TRACKER_BY_CLASS.get(class_def) | |
if class_tracker_id is None: | |
class_tracker_id = uuid.uuid4().hex | |
_DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id | |
_DYNAMIC_CLASS_TRACKER_BY_ID[class_tracker_id] = class_def | |
return class_tracker_id | |
def _lookup_class_or_track(class_tracker_id, class_def): | |
if class_tracker_id is not None: | |
with _DYNAMIC_CLASS_TRACKER_LOCK: | |
class_def = _DYNAMIC_CLASS_TRACKER_BY_ID.setdefault( | |
class_tracker_id, class_def | |
) | |
_DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id | |
return class_def | |
def register_pickle_by_value(module): | |
"""Register a module to make it functions and classes picklable by value. | |
By default, functions and classes that are attributes of an importable | |
module are to be pickled by reference, that is relying on re-importing | |
the attribute from the module at load time. | |
If `register_pickle_by_value(module)` is called, all its functions and | |
classes are subsequently to be pickled by value, meaning that they can | |
be loaded in Python processes where the module is not importable. | |
This is especially useful when developing a module in a distributed | |
execution environment: restarting the client Python process with the new | |
source code is enough: there is no need to re-install the new version | |
of the module on all the worker nodes nor to restart the workers. | |
Note: this feature is considered experimental. See the cloudpickle | |
README.md file for more details and limitations. | |
""" | |
if not isinstance(module, types.ModuleType): | |
raise ValueError(f"Input should be a module object, got {str(module)} instead") | |
# In the future, cloudpickle may need a way to access any module registered | |
# for pickling by value in order to introspect relative imports inside | |
# functions pickled by value. (see | |
# https://github.com/cloudpipe/cloudpickle/pull/417#issuecomment-873684633). | |
# This access can be ensured by checking that module is present in | |
# sys.modules at registering time and assuming that it will still be in | |
# there when accessed during pickling. Another alternative would be to | |
# store a weakref to the module. Even though cloudpickle does not implement | |
# this introspection yet, in order to avoid a possible breaking change | |
# later, we still enforce the presence of module inside sys.modules. | |
if module.__name__ not in sys.modules: | |
raise ValueError( | |
f"{module} was not imported correctly, have you used an " | |
"`import` statement to access it?" | |
) | |
_PICKLE_BY_VALUE_MODULES.add(module.__name__) | |
def unregister_pickle_by_value(module): | |
"""Unregister that the input module should be pickled by value.""" | |
if not isinstance(module, types.ModuleType): | |
raise ValueError(f"Input should be a module object, got {str(module)} instead") | |
if module.__name__ not in _PICKLE_BY_VALUE_MODULES: | |
raise ValueError(f"{module} is not registered for pickle by value") | |
else: | |
_PICKLE_BY_VALUE_MODULES.remove(module.__name__) | |
def list_registry_pickle_by_value(): | |
return _PICKLE_BY_VALUE_MODULES.copy() | |
def _is_registered_pickle_by_value(module): | |
module_name = module.__name__ | |
if module_name in _PICKLE_BY_VALUE_MODULES: | |
return True | |
while True: | |
parent_name = module_name.rsplit(".", 1)[0] | |
if parent_name == module_name: | |
break | |
if parent_name in _PICKLE_BY_VALUE_MODULES: | |
return True | |
module_name = parent_name | |
return False | |
def _whichmodule(obj, name): | |
"""Find the module an object belongs to. | |
This function differs from ``pickle.whichmodule`` in two ways: | |
- it does not mangle the cases where obj's module is __main__ and obj was | |
not found in any module. | |
- Errors arising during module introspection are ignored, as those errors | |
are considered unwanted side effects. | |
""" | |
module_name = getattr(obj, "__module__", None) | |
if module_name is not None: | |
return module_name | |
# Protect the iteration by using a copy of sys.modules against dynamic | |
# modules that trigger imports of other modules upon calls to getattr or | |
# other threads importing at the same time. | |
for module_name, module in sys.modules.copy().items(): | |
# Some modules such as coverage can inject non-module objects inside | |
# sys.modules | |
if ( | |
module_name == "__main__" | |
or module is None | |
or not isinstance(module, types.ModuleType) | |
): | |
continue | |
try: | |
if _getattribute(module, name)[0] is obj: | |
return module_name | |
except Exception: | |
pass | |
return None | |
def _should_pickle_by_reference(obj, name=None): | |
"""Test whether an function or a class should be pickled by reference | |
Pickling by reference means by that the object (typically a function or a | |
class) is an attribute of a module that is assumed to be importable in the | |
target Python environment. Loading will therefore rely on importing the | |
module and then calling `getattr` on it to access the function or class. | |
Pickling by reference is the only option to pickle functions and classes | |
in the standard library. In cloudpickle the alternative option is to | |
pickle by value (for instance for interactively or locally defined | |
functions and classes or for attributes of modules that have been | |
explicitly registered to be pickled by value. | |
""" | |
if isinstance(obj, types.FunctionType) or issubclass(type(obj), type): | |
module_and_name = _lookup_module_and_qualname(obj, name=name) | |
if module_and_name is None: | |
return False | |
module, name = module_and_name | |
return not _is_registered_pickle_by_value(module) | |
elif isinstance(obj, types.ModuleType): | |
# We assume that sys.modules is primarily used as a cache mechanism for | |
# the Python import machinery. Checking if a module has been added in | |
# is sys.modules therefore a cheap and simple heuristic to tell us | |
# whether we can assume that a given module could be imported by name | |
# in another Python process. | |
if _is_registered_pickle_by_value(obj): | |
return False | |
return obj.__name__ in sys.modules | |
else: | |
raise TypeError( | |
"cannot check importability of {} instances".format(type(obj).__name__) | |
) | |
def _lookup_module_and_qualname(obj, name=None): | |
if name is None: | |
name = getattr(obj, "__qualname__", None) | |
if name is None: # pragma: no cover | |
# This used to be needed for Python 2.7 support but is probably not | |
# needed anymore. However we keep the __name__ introspection in case | |
# users of cloudpickle rely on this old behavior for unknown reasons. | |
name = getattr(obj, "__name__", None) | |
module_name = _whichmodule(obj, name) | |
if module_name is None: | |
# In this case, obj.__module__ is None AND obj was not found in any | |
# imported module. obj is thus treated as dynamic. | |
return None | |
if module_name == "__main__": | |
return None | |
# Note: if module_name is in sys.modules, the corresponding module is | |
# assumed importable at unpickling time. See #357 | |
module = sys.modules.get(module_name, None) | |
if module is None: | |
# The main reason why obj's module would not be imported is that this | |
# module has been dynamically created, using for example | |
# types.ModuleType. The other possibility is that module was removed | |
# from sys.modules after obj was created/imported. But this case is not | |
# supported, as the standard pickle does not support it either. | |
return None | |
try: | |
obj2, parent = _getattribute(module, name) | |
except AttributeError: | |
# obj was not found inside the module it points to | |
return None | |
if obj2 is not obj: | |
return None | |
return module, name | |
def _extract_code_globals(co): | |
"""Find all globals names read or written to by codeblock co.""" | |
out_names = _extract_code_globals_cache.get(co) | |
if out_names is None: | |
# We use a dict with None values instead of a set to get a | |
# deterministic order and avoid introducing non-deterministic pickle | |
# bytes as a results. | |
out_names = {name: None for name in _walk_global_ops(co)} | |
# Declaring a function inside another one using the "def ..." syntax | |
# generates a constant code object corresponding to the one of the | |
# nested function's As the nested function may itself need global | |
# variables, we need to introspect its code, extract its globals, (look | |
# for code object in it's co_consts attribute..) and add the result to | |
# code_globals | |
if co.co_consts: | |
for const in co.co_consts: | |
if isinstance(const, types.CodeType): | |
out_names.update(_extract_code_globals(const)) | |
_extract_code_globals_cache[co] = out_names | |
return out_names | |
def _find_imported_submodules(code, top_level_dependencies): | |
"""Find currently imported submodules used by a function. | |
Submodules used by a function need to be detected and referenced for the | |
function to work correctly at depickling time. Because submodules can be | |
referenced as attribute of their parent package (``package.submodule``), we | |
need a special introspection technique that does not rely on GLOBAL-related | |
opcodes to find references of them in a code object. | |
Example: | |
``` | |
import concurrent.futures | |
import cloudpickle | |
def func(): | |
x = concurrent.futures.ThreadPoolExecutor | |
if __name__ == '__main__': | |
cloudpickle.dumps(func) | |
``` | |
The globals extracted by cloudpickle in the function's state include the | |
concurrent package, but not its submodule (here, concurrent.futures), which | |
is the module used by func. Find_imported_submodules will detect the usage | |
of concurrent.futures. Saving this module alongside with func will ensure | |
that calling func once depickled does not fail due to concurrent.futures | |
not being imported | |
""" | |
subimports = [] | |
# check if any known dependency is an imported package | |
for x in top_level_dependencies: | |
if ( | |
isinstance(x, types.ModuleType) | |
and hasattr(x, "__package__") | |
and x.__package__ | |
): | |
# check if the package has any currently loaded sub-imports | |
prefix = x.__name__ + "." | |
# A concurrent thread could mutate sys.modules, | |
# make sure we iterate over a copy to avoid exceptions | |
for name in list(sys.modules): | |
# Older versions of pytest will add a "None" module to | |
# sys.modules. | |
if name is not None and name.startswith(prefix): | |
# check whether the function can address the sub-module | |
tokens = set(name[len(prefix) :].split(".")) | |
if not tokens - set(code.co_names): | |
subimports.append(sys.modules[name]) | |
return subimports | |
# relevant opcodes | |
STORE_GLOBAL = opcode.opmap["STORE_GLOBAL"] | |
DELETE_GLOBAL = opcode.opmap["DELETE_GLOBAL"] | |
LOAD_GLOBAL = opcode.opmap["LOAD_GLOBAL"] | |
GLOBAL_OPS = (STORE_GLOBAL, DELETE_GLOBAL, LOAD_GLOBAL) | |
HAVE_ARGUMENT = dis.HAVE_ARGUMENT | |
EXTENDED_ARG = dis.EXTENDED_ARG | |
_BUILTIN_TYPE_NAMES = {} | |
for k, v in types.__dict__.items(): | |
if type(v) is type: | |
_BUILTIN_TYPE_NAMES[v] = k | |
def _builtin_type(name): | |
if name == "ClassType": # pragma: no cover | |
# Backward compat to load pickle files generated with cloudpickle | |
# < 1.3 even if loading pickle files from older versions is not | |
# officially supported. | |
return type | |
return getattr(types, name) | |
def _walk_global_ops(code): | |
"""Yield referenced name for global-referencing instructions in code.""" | |
for instr in dis.get_instructions(code): | |
op = instr.opcode | |
if op in GLOBAL_OPS: | |
yield instr.argval | |
def _extract_class_dict(cls): | |
"""Retrieve a copy of the dict of a class without the inherited method.""" | |
clsdict = dict(cls.__dict__) # copy dict proxy to a dict | |
if len(cls.__bases__) == 1: | |
inherited_dict = cls.__bases__[0].__dict__ | |
else: | |
inherited_dict = {} | |
for base in reversed(cls.__bases__): | |
inherited_dict.update(base.__dict__) | |
to_remove = [] | |
for name, value in clsdict.items(): | |
try: | |
base_value = inherited_dict[name] | |
if value is base_value: | |
to_remove.append(name) | |
except KeyError: | |
pass | |
for name in to_remove: | |
clsdict.pop(name) | |
return clsdict | |
def is_tornado_coroutine(func): | |
"""Return whether `func` is a Tornado coroutine function. | |
Running coroutines are not supported. | |
""" | |
warnings.warn( | |
"is_tornado_coroutine is deprecated in cloudpickle 3.0 and will be " | |
"removed in cloudpickle 4.0. Use tornado.gen.is_coroutine_function " | |
"directly instead.", | |
category=DeprecationWarning, | |
) | |
if "tornado.gen" not in sys.modules: | |
return False | |
gen = sys.modules["tornado.gen"] | |
if not hasattr(gen, "is_coroutine_function"): | |
# Tornado version is too old | |
return False | |
return gen.is_coroutine_function(func) | |
def subimport(name): | |
# We cannot do simply: `return __import__(name)`: Indeed, if ``name`` is | |
# the name of a submodule, __import__ will return the top-level root module | |
# of this submodule. For instance, __import__('os.path') returns the `os` | |
# module. | |
__import__(name) | |
return sys.modules[name] | |
def dynamic_subimport(name, vars): | |
mod = types.ModuleType(name) | |
mod.__dict__.update(vars) | |
mod.__dict__["__builtins__"] = builtins.__dict__ | |
return mod | |
def _get_cell_contents(cell): | |
try: | |
return cell.cell_contents | |
except ValueError: | |
# Handle empty cells explicitly with a sentinel value. | |
return _empty_cell_value | |
def instance(cls): | |
"""Create a new instance of a class. | |
Parameters | |
---------- | |
cls : type | |
The class to create an instance of. | |
Returns | |
------- | |
instance : cls | |
A new instance of ``cls``. | |
""" | |
return cls() | |
class _empty_cell_value: | |
"""Sentinel for empty closures.""" | |
def __reduce__(cls): | |
return cls.__name__ | |
def _make_function(code, globals, name, argdefs, closure): | |
# Setting __builtins__ in globals is needed for nogil CPython. | |
globals["__builtins__"] = __builtins__ | |
return types.FunctionType(code, globals, name, argdefs, closure) | |
def _make_empty_cell(): | |
if False: | |
# trick the compiler into creating an empty cell in our lambda | |
cell = None | |
raise AssertionError("this route should not be executed") | |
return (lambda: cell).__closure__[0] | |
def _make_cell(value=_empty_cell_value): | |
cell = _make_empty_cell() | |
if value is not _empty_cell_value: | |
cell.cell_contents = value | |
return cell | |
def _make_skeleton_class( | |
type_constructor, name, bases, type_kwargs, class_tracker_id, extra | |
): | |
"""Build dynamic class with an empty __dict__ to be filled once memoized | |
If class_tracker_id is not None, try to lookup an existing class definition | |
matching that id. If none is found, track a newly reconstructed class | |
definition under that id so that other instances stemming from the same | |
class id will also reuse this class definition. | |
The "extra" variable is meant to be a dict (or None) that can be used for | |
forward compatibility shall the need arise. | |
""" | |
skeleton_class = types.new_class( | |
name, bases, {"metaclass": type_constructor}, lambda ns: ns.update(type_kwargs) | |
) | |
return _lookup_class_or_track(class_tracker_id, skeleton_class) | |
def _make_skeleton_enum( | |
bases, name, qualname, members, module, class_tracker_id, extra | |
): | |
"""Build dynamic enum with an empty __dict__ to be filled once memoized | |
The creation of the enum class is inspired by the code of | |
EnumMeta._create_. | |
If class_tracker_id is not None, try to lookup an existing enum definition | |
matching that id. If none is found, track a newly reconstructed enum | |
definition under that id so that other instances stemming from the same | |
class id will also reuse this enum definition. | |
The "extra" variable is meant to be a dict (or None) that can be used for | |
forward compatibility shall the need arise. | |
""" | |
# enums always inherit from their base Enum class at the last position in | |
# the list of base classes: | |
enum_base = bases[-1] | |
metacls = enum_base.__class__ | |
classdict = metacls.__prepare__(name, bases) | |
for member_name, member_value in members.items(): | |
classdict[member_name] = member_value | |
enum_class = metacls.__new__(metacls, name, bases, classdict) | |
enum_class.__module__ = module | |
enum_class.__qualname__ = qualname | |
return _lookup_class_or_track(class_tracker_id, enum_class) | |
def _make_typevar(name, bound, constraints, covariant, contravariant, class_tracker_id): | |
tv = typing.TypeVar( | |
name, | |
*constraints, | |
bound=bound, | |
covariant=covariant, | |
contravariant=contravariant, | |
) | |
return _lookup_class_or_track(class_tracker_id, tv) | |
def _decompose_typevar(obj): | |
return ( | |
obj.__name__, | |
obj.__bound__, | |
obj.__constraints__, | |
obj.__covariant__, | |
obj.__contravariant__, | |
_get_or_create_tracker_id(obj), | |
) | |
def _typevar_reduce(obj): | |
# TypeVar instances require the module information hence why we | |
# are not using the _should_pickle_by_reference directly | |
module_and_name = _lookup_module_and_qualname(obj, name=obj.__name__) | |
if module_and_name is None: | |
return (_make_typevar, _decompose_typevar(obj)) | |
elif _is_registered_pickle_by_value(module_and_name[0]): | |
return (_make_typevar, _decompose_typevar(obj)) | |
return (getattr, module_and_name) | |
def _get_bases(typ): | |
if "__orig_bases__" in getattr(typ, "__dict__", {}): | |
# For generic types (see PEP 560) | |
# Note that simply checking `hasattr(typ, '__orig_bases__')` is not | |
# correct. Subclasses of a fully-parameterized generic class does not | |
# have `__orig_bases__` defined, but `hasattr(typ, '__orig_bases__')` | |
# will return True because it's defined in the base class. | |
bases_attr = "__orig_bases__" | |
else: | |
# For regular class objects | |
bases_attr = "__bases__" | |
return getattr(typ, bases_attr) | |
def _make_dict_keys(obj, is_ordered=False): | |
if is_ordered: | |
return OrderedDict.fromkeys(obj).keys() | |
else: | |
return dict.fromkeys(obj).keys() | |
def _make_dict_values(obj, is_ordered=False): | |
if is_ordered: | |
return OrderedDict((i, _) for i, _ in enumerate(obj)).values() | |
else: | |
return {i: _ for i, _ in enumerate(obj)}.values() | |
def _make_dict_items(obj, is_ordered=False): | |
if is_ordered: | |
return OrderedDict(obj).items() | |
else: | |
return obj.items() | |
# COLLECTION OF OBJECTS __getnewargs__-LIKE METHODS | |
# ------------------------------------------------- | |
def _class_getnewargs(obj): | |
type_kwargs = {} | |
if "__module__" in obj.__dict__: | |
type_kwargs["__module__"] = obj.__module__ | |
__dict__ = obj.__dict__.get("__dict__", None) | |
if isinstance(__dict__, property): | |
type_kwargs["__dict__"] = __dict__ | |
return ( | |
type(obj), | |
obj.__name__, | |
_get_bases(obj), | |
type_kwargs, | |
_get_or_create_tracker_id(obj), | |
None, | |
) | |
def _enum_getnewargs(obj): | |
members = {e.name: e.value for e in obj} | |
return ( | |
obj.__bases__, | |
obj.__name__, | |
obj.__qualname__, | |
members, | |
obj.__module__, | |
_get_or_create_tracker_id(obj), | |
None, | |
) | |
# COLLECTION OF OBJECTS RECONSTRUCTORS | |
# ------------------------------------ | |
def _file_reconstructor(retval): | |
return retval | |
# COLLECTION OF OBJECTS STATE GETTERS | |
# ----------------------------------- | |
def _function_getstate(func): | |
# - Put func's dynamic attributes (stored in func.__dict__) in state. These | |
# attributes will be restored at unpickling time using | |
# f.__dict__.update(state) | |
# - Put func's members into slotstate. Such attributes will be restored at | |
# unpickling time by iterating over slotstate and calling setattr(func, | |
# slotname, slotvalue) | |
slotstate = { | |
"__name__": func.__name__, | |
"__qualname__": func.__qualname__, | |
"__annotations__": func.__annotations__, | |
"__kwdefaults__": func.__kwdefaults__, | |
"__defaults__": func.__defaults__, | |
"__module__": func.__module__, | |
"__doc__": func.__doc__, | |
"__closure__": func.__closure__, | |
} | |
f_globals_ref = _extract_code_globals(func.__code__) | |
f_globals = {k: func.__globals__[k] for k in f_globals_ref if k in func.__globals__} | |
if func.__closure__ is not None: | |
closure_values = list(map(_get_cell_contents, func.__closure__)) | |
else: | |
closure_values = () | |
# Extract currently-imported submodules used by func. Storing these modules | |
# in a smoke _cloudpickle_subimports attribute of the object's state will | |
# trigger the side effect of importing these modules at unpickling time | |
# (which is necessary for func to work correctly once depickled) | |
slotstate["_cloudpickle_submodules"] = _find_imported_submodules( | |
func.__code__, itertools.chain(f_globals.values(), closure_values) | |
) | |
slotstate["__globals__"] = f_globals | |
state = func.__dict__ | |
return state, slotstate | |
def _class_getstate(obj): | |
clsdict = _extract_class_dict(obj) | |
clsdict.pop("__weakref__", None) | |
if issubclass(type(obj), abc.ABCMeta): | |
# If obj is an instance of an ABCMeta subclass, don't pickle the | |
# cache/negative caches populated during isinstance/issubclass | |
# checks, but pickle the list of registered subclasses of obj. | |
clsdict.pop("_abc_cache", None) | |
clsdict.pop("_abc_negative_cache", None) | |
clsdict.pop("_abc_negative_cache_version", None) | |
registry = clsdict.pop("_abc_registry", None) | |
if registry is None: | |
# The abc caches and registered subclasses of a | |
# class are bundled into the single _abc_impl attribute | |
clsdict.pop("_abc_impl", None) | |
(registry, _, _, _) = abc._get_dump(obj) | |
clsdict["_abc_impl"] = [subclass_weakref() for subclass_weakref in registry] | |
else: | |
# In the above if clause, registry is a set of weakrefs -- in | |
# this case, registry is a WeakSet | |
clsdict["_abc_impl"] = [type_ for type_ in registry] | |
if "__slots__" in clsdict: | |
# pickle string length optimization: member descriptors of obj are | |
# created automatically from obj's __slots__ attribute, no need to | |
# save them in obj's state | |
if isinstance(obj.__slots__, str): | |
clsdict.pop(obj.__slots__) | |
else: | |
for k in obj.__slots__: | |
clsdict.pop(k, None) | |
clsdict.pop("__dict__", None) # unpicklable property object | |
return (clsdict, {}) | |
def _enum_getstate(obj): | |
clsdict, slotstate = _class_getstate(obj) | |
members = {e.name: e.value for e in obj} | |
# Cleanup the clsdict that will be passed to _make_skeleton_enum: | |
# Those attributes are already handled by the metaclass. | |
for attrname in [ | |
"_generate_next_value_", | |
"_member_names_", | |
"_member_map_", | |
"_member_type_", | |
"_value2member_map_", | |
]: | |
clsdict.pop(attrname, None) | |
for member in members: | |
clsdict.pop(member) | |
# Special handling of Enum subclasses | |
return clsdict, slotstate | |
# COLLECTIONS OF OBJECTS REDUCERS | |
# ------------------------------- | |
# A reducer is a function taking a single argument (obj), and that returns a | |
# tuple with all the necessary data to re-construct obj. Apart from a few | |
# exceptions (list, dict, bytes, int, etc.), a reducer is necessary to | |
# correctly pickle an object. | |
# While many built-in objects (Exceptions objects, instances of the "object" | |
# class, etc), are shipped with their own built-in reducer (invoked using | |
# obj.__reduce__), some do not. The following methods were created to "fill | |
# these holes". | |
def _code_reduce(obj): | |
"""code object reducer.""" | |
# If you are not sure about the order of arguments, take a look at help | |
# of the specific type from types, for example: | |
# >>> from types import CodeType | |
# >>> help(CodeType) | |
if hasattr(obj, "co_exceptiontable"): | |
# Python 3.11 and later: there are some new attributes | |
# related to the enhanced exceptions. | |
args = ( | |
obj.co_argcount, | |
obj.co_posonlyargcount, | |
obj.co_kwonlyargcount, | |
obj.co_nlocals, | |
obj.co_stacksize, | |
obj.co_flags, | |
obj.co_code, | |
obj.co_consts, | |
obj.co_names, | |
obj.co_varnames, | |
obj.co_filename, | |
obj.co_name, | |
obj.co_qualname, | |
obj.co_firstlineno, | |
obj.co_linetable, | |
obj.co_exceptiontable, | |
obj.co_freevars, | |
obj.co_cellvars, | |
) | |
elif hasattr(obj, "co_linetable"): | |
# Python 3.10 and later: obj.co_lnotab is deprecated and constructor | |
# expects obj.co_linetable instead. | |
args = ( | |
obj.co_argcount, | |
obj.co_posonlyargcount, | |
obj.co_kwonlyargcount, | |
obj.co_nlocals, | |
obj.co_stacksize, | |
obj.co_flags, | |
obj.co_code, | |
obj.co_consts, | |
obj.co_names, | |
obj.co_varnames, | |
obj.co_filename, | |
obj.co_name, | |
obj.co_firstlineno, | |
obj.co_linetable, | |
obj.co_freevars, | |
obj.co_cellvars, | |
) | |
elif hasattr(obj, "co_nmeta"): # pragma: no cover | |
# "nogil" Python: modified attributes from 3.9 | |
args = ( | |
obj.co_argcount, | |
obj.co_posonlyargcount, | |
obj.co_kwonlyargcount, | |
obj.co_nlocals, | |
obj.co_framesize, | |
obj.co_ndefaultargs, | |
obj.co_nmeta, | |
obj.co_flags, | |
obj.co_code, | |
obj.co_consts, | |
obj.co_varnames, | |
obj.co_filename, | |
obj.co_name, | |
obj.co_firstlineno, | |
obj.co_lnotab, | |
obj.co_exc_handlers, | |
obj.co_jump_table, | |
obj.co_freevars, | |
obj.co_cellvars, | |
obj.co_free2reg, | |
obj.co_cell2reg, | |
) | |
else: | |
# Backward compat for 3.8 and 3.9 | |
args = ( | |
obj.co_argcount, | |
obj.co_posonlyargcount, | |
obj.co_kwonlyargcount, | |
obj.co_nlocals, | |
obj.co_stacksize, | |
obj.co_flags, | |
obj.co_code, | |
obj.co_consts, | |
obj.co_names, | |
obj.co_varnames, | |
obj.co_filename, | |
obj.co_name, | |
obj.co_firstlineno, | |
obj.co_lnotab, | |
obj.co_freevars, | |
obj.co_cellvars, | |
) | |
return types.CodeType, args | |
def _cell_reduce(obj): | |
"""Cell (containing values of a function's free variables) reducer.""" | |
try: | |
obj.cell_contents | |
except ValueError: # cell is empty | |
return _make_empty_cell, () | |
else: | |
return _make_cell, (obj.cell_contents,) | |
def _classmethod_reduce(obj): | |
orig_func = obj.__func__ | |
return type(obj), (orig_func,) | |
def _file_reduce(obj): | |
"""Save a file.""" | |
import io | |
if not hasattr(obj, "name") or not hasattr(obj, "mode"): | |
raise pickle.PicklingError( | |
"Cannot pickle files that do not map to an actual file" | |
) | |
if obj is sys.stdout: | |
return getattr, (sys, "stdout") | |
if obj is sys.stderr: | |
return getattr, (sys, "stderr") | |
if obj is sys.stdin: | |
raise pickle.PicklingError("Cannot pickle standard input") | |
if obj.closed: | |
raise pickle.PicklingError("Cannot pickle closed files") | |
if hasattr(obj, "isatty") and obj.isatty(): | |
raise pickle.PicklingError("Cannot pickle files that map to tty objects") | |
if "r" not in obj.mode and "+" not in obj.mode: | |
raise pickle.PicklingError( | |
"Cannot pickle files that are not opened for reading: %s" % obj.mode | |
) | |
name = obj.name | |
retval = io.StringIO() | |
try: | |
# Read the whole file | |
curloc = obj.tell() | |
obj.seek(0) | |
contents = obj.read() | |
obj.seek(curloc) | |
except OSError as e: | |
raise pickle.PicklingError( | |
"Cannot pickle file %s as it cannot be read" % name | |
) from e | |
retval.write(contents) | |
retval.seek(curloc) | |
retval.name = name | |
return _file_reconstructor, (retval,) | |
def _getset_descriptor_reduce(obj): | |
return getattr, (obj.__objclass__, obj.__name__) | |
def _mappingproxy_reduce(obj): | |
return types.MappingProxyType, (dict(obj),) | |
def _memoryview_reduce(obj): | |
return bytes, (obj.tobytes(),) | |
def _module_reduce(obj): | |
if _should_pickle_by_reference(obj): | |
return subimport, (obj.__name__,) | |
else: | |
# Some external libraries can populate the "__builtins__" entry of a | |
# module's `__dict__` with unpicklable objects (see #316). For that | |
# reason, we do not attempt to pickle the "__builtins__" entry, and | |
# restore a default value for it at unpickling time. | |
state = obj.__dict__.copy() | |
state.pop("__builtins__", None) | |
return dynamic_subimport, (obj.__name__, state) | |
def _method_reduce(obj): | |
return (types.MethodType, (obj.__func__, obj.__self__)) | |
def _logger_reduce(obj): | |
return logging.getLogger, (obj.name,) | |
def _root_logger_reduce(obj): | |
return logging.getLogger, () | |
def _property_reduce(obj): | |
return property, (obj.fget, obj.fset, obj.fdel, obj.__doc__) | |
def _weakset_reduce(obj): | |
return weakref.WeakSet, (list(obj),) | |
def _dynamic_class_reduce(obj): | |
"""Save a class that can't be referenced as a module attribute. | |
This method is used to serialize classes that are defined inside | |
functions, or that otherwise can't be serialized as attribute lookups | |
from importable modules. | |
""" | |
if Enum is not None and issubclass(obj, Enum): | |
return ( | |
_make_skeleton_enum, | |
_enum_getnewargs(obj), | |
_enum_getstate(obj), | |
None, | |
None, | |
_class_setstate, | |
) | |
else: | |
return ( | |
_make_skeleton_class, | |
_class_getnewargs(obj), | |
_class_getstate(obj), | |
None, | |
None, | |
_class_setstate, | |
) | |
def _class_reduce(obj): | |
"""Select the reducer depending on the dynamic nature of the class obj.""" | |
if obj is type(None): # noqa | |
return type, (None,) | |
elif obj is type(Ellipsis): | |
return type, (Ellipsis,) | |
elif obj is type(NotImplemented): | |
return type, (NotImplemented,) | |
elif obj in _BUILTIN_TYPE_NAMES: | |
return _builtin_type, (_BUILTIN_TYPE_NAMES[obj],) | |
elif not _should_pickle_by_reference(obj): | |
return _dynamic_class_reduce(obj) | |
return NotImplemented | |
def _dict_keys_reduce(obj): | |
# Safer not to ship the full dict as sending the rest might | |
# be unintended and could potentially cause leaking of | |
# sensitive information | |
return _make_dict_keys, (list(obj),) | |
def _dict_values_reduce(obj): | |
# Safer not to ship the full dict as sending the rest might | |
# be unintended and could potentially cause leaking of | |
# sensitive information | |
return _make_dict_values, (list(obj),) | |
def _dict_items_reduce(obj): | |
return _make_dict_items, (dict(obj),) | |
def _odict_keys_reduce(obj): | |
# Safer not to ship the full dict as sending the rest might | |
# be unintended and could potentially cause leaking of | |
# sensitive information | |
return _make_dict_keys, (list(obj), True) | |
def _odict_values_reduce(obj): | |
# Safer not to ship the full dict as sending the rest might | |
# be unintended and could potentially cause leaking of | |
# sensitive information | |
return _make_dict_values, (list(obj), True) | |
def _odict_items_reduce(obj): | |
return _make_dict_items, (dict(obj), True) | |
def _dataclass_field_base_reduce(obj): | |
return _get_dataclass_field_type_sentinel, (obj.name,) | |
# COLLECTIONS OF OBJECTS STATE SETTERS | |
# ------------------------------------ | |
# state setters are called at unpickling time, once the object is created and | |
# it has to be updated to how it was at unpickling time. | |
def _function_setstate(obj, state): | |
"""Update the state of a dynamic function. | |
As __closure__ and __globals__ are readonly attributes of a function, we | |
cannot rely on the native setstate routine of pickle.load_build, that calls | |
setattr on items of the slotstate. Instead, we have to modify them inplace. | |
""" | |
state, slotstate = state | |
obj.__dict__.update(state) | |
obj_globals = slotstate.pop("__globals__") | |
obj_closure = slotstate.pop("__closure__") | |
# _cloudpickle_subimports is a set of submodules that must be loaded for | |
# the pickled function to work correctly at unpickling time. Now that these | |
# submodules are depickled (hence imported), they can be removed from the | |
# object's state (the object state only served as a reference holder to | |
# these submodules) | |
slotstate.pop("_cloudpickle_submodules") | |
obj.__globals__.update(obj_globals) | |
obj.__globals__["__builtins__"] = __builtins__ | |
if obj_closure is not None: | |
for i, cell in enumerate(obj_closure): | |
try: | |
value = cell.cell_contents | |
except ValueError: # cell is empty | |
continue | |
obj.__closure__[i].cell_contents = value | |
for k, v in slotstate.items(): | |
setattr(obj, k, v) | |
def _class_setstate(obj, state): | |
state, slotstate = state | |
registry = None | |
for attrname, attr in state.items(): | |
if attrname == "_abc_impl": | |
registry = attr | |
else: | |
setattr(obj, attrname, attr) | |
if registry is not None: | |
for subclass in registry: | |
obj.register(subclass) | |
return obj | |
# COLLECTION OF DATACLASS UTILITIES | |
# --------------------------------- | |
# There are some internal sentinel values whose identity must be preserved when | |
# unpickling dataclass fields. Each sentinel value has a unique name that we can | |
# use to retrieve its identity at unpickling time. | |
_DATACLASSE_FIELD_TYPE_SENTINELS = { | |
dataclasses._FIELD.name: dataclasses._FIELD, | |
dataclasses._FIELD_CLASSVAR.name: dataclasses._FIELD_CLASSVAR, | |
dataclasses._FIELD_INITVAR.name: dataclasses._FIELD_INITVAR, | |
} | |
def _get_dataclass_field_type_sentinel(name): | |
return _DATACLASSE_FIELD_TYPE_SENTINELS[name] | |
class Pickler(pickle.Pickler): | |
# set of reducers defined and used by cloudpickle (private) | |
_dispatch_table = {} | |
_dispatch_table[classmethod] = _classmethod_reduce | |
_dispatch_table[io.TextIOWrapper] = _file_reduce | |
_dispatch_table[logging.Logger] = _logger_reduce | |
_dispatch_table[logging.RootLogger] = _root_logger_reduce | |
_dispatch_table[memoryview] = _memoryview_reduce | |
_dispatch_table[property] = _property_reduce | |
_dispatch_table[staticmethod] = _classmethod_reduce | |
_dispatch_table[CellType] = _cell_reduce | |
_dispatch_table[types.CodeType] = _code_reduce | |
_dispatch_table[types.GetSetDescriptorType] = _getset_descriptor_reduce | |
_dispatch_table[types.ModuleType] = _module_reduce | |
_dispatch_table[types.MethodType] = _method_reduce | |
_dispatch_table[types.MappingProxyType] = _mappingproxy_reduce | |
_dispatch_table[weakref.WeakSet] = _weakset_reduce | |
_dispatch_table[typing.TypeVar] = _typevar_reduce | |
_dispatch_table[_collections_abc.dict_keys] = _dict_keys_reduce | |
_dispatch_table[_collections_abc.dict_values] = _dict_values_reduce | |
_dispatch_table[_collections_abc.dict_items] = _dict_items_reduce | |
_dispatch_table[type(OrderedDict().keys())] = _odict_keys_reduce | |
_dispatch_table[type(OrderedDict().values())] = _odict_values_reduce | |
_dispatch_table[type(OrderedDict().items())] = _odict_items_reduce | |
_dispatch_table[abc.abstractmethod] = _classmethod_reduce | |
_dispatch_table[abc.abstractclassmethod] = _classmethod_reduce | |
_dispatch_table[abc.abstractstaticmethod] = _classmethod_reduce | |
_dispatch_table[abc.abstractproperty] = _property_reduce | |
_dispatch_table[dataclasses._FIELD_BASE] = _dataclass_field_base_reduce | |
dispatch_table = ChainMap(_dispatch_table, copyreg.dispatch_table) | |
# function reducers are defined as instance methods of cloudpickle.Pickler | |
# objects, as they rely on a cloudpickle.Pickler attribute (globals_ref) | |
def _dynamic_function_reduce(self, func): | |
"""Reduce a function that is not pickleable via attribute lookup.""" | |
newargs = self._function_getnewargs(func) | |
state = _function_getstate(func) | |
return (_make_function, newargs, state, None, None, _function_setstate) | |
def _function_reduce(self, obj): | |
"""Reducer for function objects. | |
If obj is a top-level attribute of a file-backed module, this reducer | |
returns NotImplemented, making the cloudpickle.Pickler fall back to | |
traditional pickle.Pickler routines to save obj. Otherwise, it reduces | |
obj using a custom cloudpickle reducer designed specifically to handle | |
dynamic functions. | |
""" | |
if _should_pickle_by_reference(obj): | |
return NotImplemented | |
else: | |
return self._dynamic_function_reduce(obj) | |
def _function_getnewargs(self, func): | |
code = func.__code__ | |
# base_globals represents the future global namespace of func at | |
# unpickling time. Looking it up and storing it in | |
# cloudpickle.Pickler.globals_ref allow functions sharing the same | |
# globals at pickling time to also share them once unpickled, at one | |
# condition: since globals_ref is an attribute of a cloudpickle.Pickler | |
# instance, and that a new cloudpickle.Pickler is created each time | |
# cloudpickle.dump or cloudpickle.dumps is called, functions also need | |
# to be saved within the same invocation of | |
# cloudpickle.dump/cloudpickle.dumps (for example: | |
# cloudpickle.dumps([f1, f2])). There is no such limitation when using | |
# cloudpickle.Pickler.dump, as long as the multiple invocations are | |
# bound to the same cloudpickle.Pickler instance. | |
base_globals = self.globals_ref.setdefault(id(func.__globals__), {}) | |
if base_globals == {}: | |
# Add module attributes used to resolve relative imports | |
# instructions inside func. | |
for k in ["__package__", "__name__", "__path__", "__file__"]: | |
if k in func.__globals__: | |
base_globals[k] = func.__globals__[k] | |
# Do not bind the free variables before the function is created to | |
# avoid infinite recursion. | |
if func.__closure__ is None: | |
closure = None | |
else: | |
closure = tuple(_make_empty_cell() for _ in range(len(code.co_freevars))) | |
return code, base_globals, None, None, closure | |
def dump(self, obj): | |
try: | |
return super().dump(obj) | |
except RuntimeError as e: | |
if len(e.args) > 0 and "recursion" in e.args[0]: | |
msg = "Could not pickle object as excessively deep recursion required." | |
raise pickle.PicklingError(msg) from e | |
else: | |
raise | |
def __init__(self, file, protocol=None, buffer_callback=None): | |
if protocol is None: | |
protocol = DEFAULT_PROTOCOL | |
super().__init__(file, protocol=protocol, buffer_callback=buffer_callback) | |
# map functions __globals__ attribute ids, to ensure that functions | |
# sharing the same global namespace at pickling time also share | |
# their global namespace at unpickling time. | |
self.globals_ref = {} | |
self.proto = int(protocol) | |
if not PYPY: | |
# pickle.Pickler is the C implementation of the CPython pickler and | |
# therefore we rely on reduce_override method to customize the pickler | |
# behavior. | |
# `cloudpickle.Pickler.dispatch` is only left for backward | |
# compatibility - note that when using protocol 5, | |
# `cloudpickle.Pickler.dispatch` is not an extension of | |
# `pickle._Pickler.dispatch` dictionary, because `cloudpickle.Pickler` | |
# subclasses the C-implemented `pickle.Pickler`, which does not expose | |
# a `dispatch` attribute. Earlier versions of `cloudpickle.Pickler` | |
# used `cloudpickle.Pickler.dispatch` as a class-level attribute | |
# storing all reducers implemented by cloudpickle, but the attribute | |
# name was not a great choice given because it would collide with a | |
# similarly named attribute in the pure-Python `pickle._Pickler` | |
# implementation in the standard library. | |
dispatch = dispatch_table | |
# Implementation of the reducer_override callback, in order to | |
# efficiently serialize dynamic functions and classes by subclassing | |
# the C-implemented `pickle.Pickler`. | |
# TODO: decorrelate reducer_override (which is tied to CPython's | |
# implementation - would it make sense to backport it to pypy? - and | |
# pickle's protocol 5 which is implementation agnostic. Currently, the | |
# availability of both notions coincide on CPython's pickle, but it may | |
# not be the case anymore when pypy implements protocol 5. | |
def reducer_override(self, obj): | |
"""Type-agnostic reducing callback for function and classes. | |
For performance reasons, subclasses of the C `pickle.Pickler` class | |
cannot register custom reducers for functions and classes in the | |
dispatch_table attribute. Reducers for such types must instead | |
implemented via the special `reducer_override` method. | |
Note that this method will be called for any object except a few | |
builtin-types (int, lists, dicts etc.), which differs from reducers | |
in the Pickler's dispatch_table, each of them being invoked for | |
objects of a specific type only. | |
This property comes in handy for classes: although most classes are | |
instances of the ``type`` metaclass, some of them can be instances | |
of other custom metaclasses (such as enum.EnumMeta for example). In | |
particular, the metaclass will likely not be known in advance, and | |
thus cannot be special-cased using an entry in the dispatch_table. | |
reducer_override, among other things, allows us to register a | |
reducer that will be called for any class, independently of its | |
type. | |
Notes: | |
* reducer_override has the priority over dispatch_table-registered | |
reducers. | |
* reducer_override can be used to fix other limitations of | |
cloudpickle for other types that suffered from type-specific | |
reducers, such as Exceptions. See | |
https://github.com/cloudpipe/cloudpickle/issues/248 | |
""" | |
t = type(obj) | |
try: | |
is_anyclass = issubclass(t, type) | |
except TypeError: # t is not a class (old Boost; see SF #502085) | |
is_anyclass = False | |
if is_anyclass: | |
return _class_reduce(obj) | |
elif isinstance(obj, types.FunctionType): | |
return self._function_reduce(obj) | |
else: | |
# fallback to save_global, including the Pickler's | |
# dispatch_table | |
return NotImplemented | |
else: | |
# When reducer_override is not available, hack the pure-Python | |
# Pickler's types.FunctionType and type savers. Note: the type saver | |
# must override Pickler.save_global, because pickle.py contains a | |
# hard-coded call to save_global when pickling meta-classes. | |
dispatch = pickle.Pickler.dispatch.copy() | |
def _save_reduce_pickle5( | |
self, | |
func, | |
args, | |
state=None, | |
listitems=None, | |
dictitems=None, | |
state_setter=None, | |
obj=None, | |
): | |
save = self.save | |
write = self.write | |
self.save_reduce( | |
func, | |
args, | |
state=None, | |
listitems=listitems, | |
dictitems=dictitems, | |
obj=obj, | |
) | |
# backport of the Python 3.8 state_setter pickle operations | |
save(state_setter) | |
save(obj) # simple BINGET opcode as obj is already memoized. | |
save(state) | |
write(pickle.TUPLE2) | |
# Trigger a state_setter(obj, state) function call. | |
write(pickle.REDUCE) | |
# The purpose of state_setter is to carry-out an | |
# inplace modification of obj. We do not care about what the | |
# method might return, so its output is eventually removed from | |
# the stack. | |
write(pickle.POP) | |
def save_global(self, obj, name=None, pack=struct.pack): | |
"""Main dispatch method. | |
The name of this method is somewhat misleading: all types get | |
dispatched here. | |
""" | |
if obj is type(None): # noqa | |
return self.save_reduce(type, (None,), obj=obj) | |
elif obj is type(Ellipsis): | |
return self.save_reduce(type, (Ellipsis,), obj=obj) | |
elif obj is type(NotImplemented): | |
return self.save_reduce(type, (NotImplemented,), obj=obj) | |
elif obj in _BUILTIN_TYPE_NAMES: | |
return self.save_reduce( | |
_builtin_type, (_BUILTIN_TYPE_NAMES[obj],), obj=obj | |
) | |
if name is not None: | |
super().save_global(obj, name=name) | |
elif not _should_pickle_by_reference(obj, name=name): | |
self._save_reduce_pickle5(*_dynamic_class_reduce(obj), obj=obj) | |
else: | |
super().save_global(obj, name=name) | |
dispatch[type] = save_global | |
def save_function(self, obj, name=None): | |
"""Registered with the dispatch to handle all function types. | |
Determines what kind of function obj is (e.g. lambda, defined at | |
interactive prompt, etc) and handles the pickling appropriately. | |
""" | |
if _should_pickle_by_reference(obj, name=name): | |
return super().save_global(obj, name=name) | |
elif PYPY and isinstance(obj.__code__, builtin_code_type): | |
return self.save_pypy_builtin_func(obj) | |
else: | |
return self._save_reduce_pickle5( | |
*self._dynamic_function_reduce(obj), obj=obj | |
) | |
def save_pypy_builtin_func(self, obj): | |
"""Save pypy equivalent of builtin functions. | |
PyPy does not have the concept of builtin-functions. Instead, | |
builtin-functions are simple function instances, but with a | |
builtin-code attribute. | |
Most of the time, builtin functions should be pickled by attribute. | |
But PyPy has flaky support for __qualname__, so some builtin | |
functions such as float.__new__ will be classified as dynamic. For | |
this reason only, we created this special routine. Because | |
builtin-functions are not expected to have closure or globals, | |
there is no additional hack (compared the one already implemented | |
in pickle) to protect ourselves from reference cycles. A simple | |
(reconstructor, newargs, obj.__dict__) tuple is save_reduced. Note | |
also that PyPy improved their support for __qualname__ in v3.6, so | |
this routing should be removed when cloudpickle supports only PyPy | |
3.6 and later. | |
""" | |
rv = ( | |
types.FunctionType, | |
(obj.__code__, {}, obj.__name__, obj.__defaults__, obj.__closure__), | |
obj.__dict__, | |
) | |
self.save_reduce(*rv, obj=obj) | |
dispatch[types.FunctionType] = save_function | |
# Shorthands similar to pickle.dump/pickle.dumps | |
def dump(obj, file, protocol=None, buffer_callback=None): | |
"""Serialize obj as bytes streamed into file | |
protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to | |
pickle.HIGHEST_PROTOCOL. This setting favors maximum communication | |
speed between processes running the same Python version. | |
Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure | |
compatibility with older versions of Python (although this is not always | |
guaranteed to work because cloudpickle relies on some internal | |
implementation details that can change from one Python version to the | |
next). | |
""" | |
Pickler(file, protocol=protocol, buffer_callback=buffer_callback).dump(obj) | |
def dumps(obj, protocol=None, buffer_callback=None): | |
"""Serialize obj as a string of bytes allocated in memory | |
protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to | |
pickle.HIGHEST_PROTOCOL. This setting favors maximum communication | |
speed between processes running the same Python version. | |
Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure | |
compatibility with older versions of Python (although this is not always | |
guaranteed to work because cloudpickle relies on some internal | |
implementation details that can change from one Python version to the | |
next). | |
""" | |
with io.BytesIO() as file: | |
cp = Pickler(file, protocol=protocol, buffer_callback=buffer_callback) | |
cp.dump(obj) | |
return file.getvalue() | |
# Include pickles unloading functions in this namespace for convenience. | |
load, loads = pickle.load, pickle.loads | |
# Backward compat alias. | |
CloudPickler = Pickler | |